diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index d97b4511b4..a97388bd05 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -92,26 +92,6 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The namespace cv { namespace cuda { - class MemoryStack; - - class CV_EXPORTS StackAllocator : public GpuMat::Allocator - { - public: - explicit StackAllocator(cudaStream_t stream); - ~StackAllocator(); - - bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize); - void free(GpuMat* mat); - - private: - StackAllocator(const StackAllocator&); - StackAllocator& operator =(const StackAllocator&); - - cudaStream_t stream_; - MemoryStack* memStack_; - size_t alignment_; - }; - class CV_EXPORTS BufferPool { public: @@ -120,6 +100,8 @@ namespace cv { namespace cuda GpuMat getBuffer(int rows, int cols, int type); GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } + GpuMat::Allocator* getAllocator() const { return allocator_; } + private: GpuMat::Allocator* allocator_; }; diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp index efcf9cb3ee..87afe72a12 100644 --- a/modules/core/src/cuda_stream.cpp +++ b/modules/core/src/cuda_stream.cpp @@ -53,55 +53,55 @@ using namespace cv::cuda; namespace { class MemoryPool; -} -class cv::cuda::MemoryStack -{ -public: - uchar* requestMemory(size_t size); - void returnMemory(uchar* ptr); + class MemoryStack + { + public: + uchar* requestMemory(size_t size); + void returnMemory(uchar* ptr); - uchar* datastart; - uchar* dataend; - uchar* tip; + uchar* datastart; + uchar* dataend; + uchar* tip; - bool isFree; - MemoryPool* pool; + bool isFree; + MemoryPool* pool; -#if !defined(NDEBUG) - std::vector allocations; -#endif -}; + #if !defined(NDEBUG) + std::vector allocations; + #endif + }; -uchar* cv::cuda::MemoryStack::requestMemory(size_t size) -{ - const size_t freeMem = dataend - tip; + uchar* MemoryStack::requestMemory(size_t size) + { + const size_t freeMem = dataend - tip; - if (size > freeMem) - return 0; + if (size > freeMem) + return 0; - uchar* ptr = tip; + uchar* ptr = tip; - tip += size; + tip += size; -#if !defined(NDEBUG) - allocations.push_back(size); -#endif + #if !defined(NDEBUG) + allocations.push_back(size); + #endif - return ptr; -} + return ptr; + } -void cv::cuda::MemoryStack::returnMemory(uchar* ptr) -{ - CV_DbgAssert( ptr >= datastart && ptr < dataend ); + void MemoryStack::returnMemory(uchar* ptr) + { + CV_DbgAssert( ptr >= datastart && ptr < dataend ); -#if !defined(NDEBUG) - const size_t allocSize = tip - ptr; - CV_Assert( allocSize == allocations.back() ); - allocations.pop_back(); -#endif + #if !defined(NDEBUG) + const size_t allocSize = tip - ptr; + CV_Assert( allocSize == allocations.back() ); + allocations.pop_back(); + #endif - tip = ptr; + tip = ptr; + } } #endif @@ -271,6 +271,11 @@ public: #else +namespace +{ + class StackAllocator; +} + class cv::cuda::Stream::Impl { public: @@ -540,29 +545,44 @@ cudaStream_t cv::cuda::StreamAccessor::getStream(const Stream& stream) namespace { bool enableMemoryPool = true; -} -cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0) -{ - if (enableMemoryPool) + class StackAllocator : public GpuMat::Allocator { - const int deviceId = getDevice(); - memStack_ = initializer.getMemoryPool(deviceId)->getFreeMemStack(); - DeviceInfo devInfo(deviceId); - alignment_ = devInfo.textureAlignment(); + public: + explicit StackAllocator(cudaStream_t stream); + ~StackAllocator(); + + bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize); + void free(GpuMat* mat); + + private: + StackAllocator(const StackAllocator&); + StackAllocator& operator =(const StackAllocator&); + + cudaStream_t stream_; + MemoryStack* memStack_; + size_t alignment_; + }; + + StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0) + { + if (enableMemoryPool) + { + const int deviceId = getDevice(); + memStack_ = initializer.getMemoryPool(deviceId)->getFreeMemStack(); + DeviceInfo devInfo(deviceId); + alignment_ = devInfo.textureAlignment(); + } } -} -cv::cuda::StackAllocator::~StackAllocator() -{ - cudaStreamSynchronize(stream_); + StackAllocator::~StackAllocator() + { + cudaStreamSynchronize(stream_); - if (memStack_ != 0) - memStack_->pool->returnMemStack(memStack_); -} + if (memStack_ != 0) + memStack_->pool->returnMemStack(memStack_); + } -namespace -{ size_t alignUp(size_t what, size_t alignment) { size_t alignMask = alignment-1; @@ -570,55 +590,71 @@ namespace size_t res = (what + alignMask) & inverseAlignMask; return res; } -} -bool cv::cuda::StackAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize) -{ - if (memStack_ == 0) - return false; - - size_t pitch, memSize; - - if (rows > 1 && cols > 1) + bool StackAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize) { - pitch = alignUp(cols * elemSize, alignment_); - memSize = pitch * rows; - } - else - { - // Single row or single column must be continuous - pitch = elemSize * cols; - memSize = alignUp(elemSize * cols * rows, 64); + if (memStack_ == 0) + return false; + + size_t pitch, memSize; + + if (rows > 1 && cols > 1) + { + pitch = alignUp(cols * elemSize, alignment_); + memSize = pitch * rows; + } + else + { + // Single row or single column must be continuous + pitch = elemSize * cols; + memSize = alignUp(elemSize * cols * rows, 64); + } + + uchar* ptr = memStack_->requestMemory(memSize); + + if (ptr == 0) + return false; + + mat->data = ptr; + mat->step = pitch; + mat->refcount = (int*) fastMalloc(sizeof(int)); + + return true; } - uchar* ptr = memStack_->requestMemory(memSize); + void StackAllocator::free(GpuMat* mat) + { + if (memStack_ == 0) + return; - if (ptr == 0) - return false; - - mat->data = ptr; - mat->step = pitch; - mat->refcount = (int*) fastMalloc(sizeof(int)); - - return true; + memStack_->returnMemory(mat->datastart); + fastFree(mat->refcount); + } } -void cv::cuda::StackAllocator::free(GpuMat* mat) -{ - if (memStack_ == 0) - return; +#endif - memStack_->returnMemory(mat->datastart); - fastFree(mat->refcount); -} +///////////////////////////////////////////////////////////// +/// BufferPool void cv::cuda::setBufferPoolUsage(bool on) { +#ifndef HAVE_CUDA + (void)on; + throw_no_cuda(); +#else enableMemoryPool = on; +#endif } void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount) { +#ifndef HAVE_CUDA + (void)deviceId; + (void)stackSize; + (void)stackCount; + throw_no_cuda(); +#else const int currentDevice = getDevice(); if (deviceId >= 0) @@ -638,12 +674,8 @@ void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCoun } setDevice(currentDevice); -} - #endif - -///////////////////////////////////////////////////////////// -/// BufferPool +} #ifdef HAVE_CUDA