diff --git a/modules/core/include/opencv2/core/cuda/utility.hpp b/modules/core/include/opencv2/core/cuda/utility.hpp index ed604712a7..96616d67b5 100644 --- a/modules/core/include/opencv2/core/cuda/utility.hpp +++ b/modules/core/include/opencv2/core/cuda/utility.hpp @@ -54,6 +54,15 @@ namespace cv { namespace cuda { namespace device { + struct CV_EXPORTS ThrustAllocator + { + typedef uchar value_type; + + virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0; + virtual __device__ __host__ void free(uchar* ptr) = 0; + static ThrustAllocator& getAllocator(); + static void setAllocator(ThrustAllocator* allocator); + }; #define OPENCV_CUDA_LOG_WARP_SIZE (5) #define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE) #define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu index f21c5f4c19..a4e8ee7302 100644 --- a/modules/core/src/cuda/gpu_mat.cu +++ b/modules/core/src/cuda/gpu_mat.cu @@ -50,11 +50,52 @@ #include "opencv2/core/cuda.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/cuda/utility.hpp" using namespace cv; using namespace cv::cuda; using namespace cv::cudev; +namespace +{ + class DefaultThrustAllocator: public cv::cuda::device::ThrustAllocator + { + public: + __device__ __host__ uchar* allocate(size_t numBytes) + { +#ifndef __CUDA_ARCH__ + uchar* ptr; + CV_CUDEV_SAFE_CALL(cudaMalloc(&ptr, numBytes)); + return ptr; +#else + return NULL; +#endif + } + __device__ __host__ void free(uchar* ptr) + { +#ifndef __CUDA_ARCH__ + CV_CUDEV_SAFE_CALL(cudaFree(ptr)); +#endif + } + }; + DefaultThrustAllocator defaultThrustAllocator; + cv::cuda::device::ThrustAllocator* g_thrustAllocator = &defaultThrustAllocator; +} + + +cv::cuda::device::ThrustAllocator& cv::cuda::device::ThrustAllocator::getAllocator() +{ + return *g_thrustAllocator; +} + +void cv::cuda::device::ThrustAllocator::setAllocator(cv::cuda::device::ThrustAllocator* allocator) +{ + if(allocator == NULL) + g_thrustAllocator = &defaultThrustAllocator; + else + g_thrustAllocator = allocator; +} + namespace { class DefaultAllocator : public GpuMat::Allocator