diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index 5b3ef205cc..e43fbbc951 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -705,7 +705,7 @@ namespace ogl namespace cuda { class CV_EXPORTS GpuMat; - class CV_EXPORTS CudaMem; + class CV_EXPORTS HostMem; class CV_EXPORTS Stream; class CV_EXPORTS Event; } diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index b67bf62e34..a9c7a39a8f 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -67,7 +67,9 @@ namespace cv { namespace cuda { //! @addtogroup cudacore_struct //! @{ -//////////////////////////////// GpuMat /////////////////////////////// +//=================================================================================== +// GpuMat +//=================================================================================== /** @brief Base storage class for GPU memory with reference counting. @@ -325,13 +327,13 @@ The function does not reallocate memory if the matrix has proper attributes alre */ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); -CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat); - //! BufferPool management (must be called before Stream creation) CV_EXPORTS void setBufferPoolUsage(bool on); CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); -//////////////////////////////// CudaMem //////////////////////////////// +//=================================================================================== +// HostMem +//=================================================================================== /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA. @@ -348,43 +350,45 @@ Its interface is also Mat-like but with additional memory type parameters. @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 Pinned Memory APIs* document or *CUDA C Programming Guide*. */ -class CV_EXPORTS CudaMem +class CV_EXPORTS HostMem { public: enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 }; - explicit CudaMem(AllocType alloc_type = PAGE_LOCKED); + static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED); - CudaMem(const CudaMem& m); + explicit HostMem(AllocType alloc_type = PAGE_LOCKED); - CudaMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED); - CudaMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED); + HostMem(const HostMem& m); + + HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED); + HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED); //! creates from host memory with coping data - explicit CudaMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED); + explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED); - ~CudaMem(); + ~HostMem(); - CudaMem& operator =(const CudaMem& m); + HostMem& operator =(const HostMem& m); //! swaps with other smart pointer - void swap(CudaMem& b); + void swap(HostMem& b); //! returns deep copy of the matrix, i.e. the data is copied - CudaMem clone() const; + HostMem clone() const; //! allocates new matrix data unless the matrix already has specified size and type. void create(int rows, int cols, int type); void create(Size size, int type); - //! creates alternative CudaMem header for the same data, with different + //! creates alternative HostMem header for the same data, with different //! number of channels and/or different number of rows - CudaMem reshape(int cn, int rows = 0) const; + HostMem reshape(int cn, int rows = 0) const; //! decrements reference counter and released memory if needed. void release(); - //! returns matrix header with disabled reference counting for CudaMem data. + //! returns matrix header with disabled reference counting for HostMem data. Mat createMatHeader() const; /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting @@ -433,7 +437,9 @@ CV_EXPORTS void registerPageLocked(Mat& m); */ CV_EXPORTS void unregisterPageLocked(Mat& m); -///////////////////////////////// Stream ////////////////////////////////// +//=================================================================================== +// Stream +//=================================================================================== /** @brief This class encapsulates a queue of asynchronous calls. @@ -528,7 +534,9 @@ private: //! @} cudacore_struct -//////////////////////////////// Initialization & Info //////////////////////// +//=================================================================================== +// Initialization & Info +//=================================================================================== //! @addtogroup cudacore_init //! @{ @@ -570,7 +578,9 @@ enum FeatureSet FEATURE_SET_COMPUTE_20 = 20, FEATURE_SET_COMPUTE_21 = 21, FEATURE_SET_COMPUTE_30 = 30, + FEATURE_SET_COMPUTE_32 = 32, FEATURE_SET_COMPUTE_35 = 35, + FEATURE_SET_COMPUTE_50 = 50, GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, diff --git a/modules/core/include/opencv2/core/cuda.inl.hpp b/modules/core/include/opencv2/core/cuda.inl.hpp index 652bcfea29..1285b1a23d 100644 --- a/modules/core/include/opencv2/core/cuda.inl.hpp +++ b/modules/core/include/opencv2/core/cuda.inl.hpp @@ -50,7 +50,9 @@ namespace cv { namespace cuda { -//////////////////////////////// GpuMat /////////////////////////////// +//=================================================================================== +// GpuMat +//=================================================================================== inline GpuMat::GpuMat(Allocator* allocator_) @@ -145,6 +147,7 @@ void GpuMat::swap(GpuMat& b) std::swap(datastart, b.datastart); std::swap(dataend, b.dataend); std::swap(refcount, b.refcount); + std::swap(allocator, b.allocator); } inline @@ -374,16 +377,18 @@ void swap(GpuMat& a, GpuMat& b) a.swap(b); } -//////////////////////////////// CudaMem //////////////////////////////// +//=================================================================================== +// HostMem +//=================================================================================== inline -CudaMem::CudaMem(AllocType alloc_type_) +HostMem::HostMem(AllocType alloc_type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) { } inline -CudaMem::CudaMem(const CudaMem& m) +HostMem::HostMem(const HostMem& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type) { if( refcount ) @@ -391,7 +396,7 @@ CudaMem::CudaMem(const CudaMem& m) } inline -CudaMem::CudaMem(int rows_, int cols_, int type_, AllocType alloc_type_) +HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) { if (rows_ > 0 && cols_ > 0) @@ -399,7 +404,7 @@ CudaMem::CudaMem(int rows_, int cols_, int type_, AllocType alloc_type_) } inline -CudaMem::CudaMem(Size size_, int type_, AllocType alloc_type_) +HostMem::HostMem(Size size_, int type_, AllocType alloc_type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) { if (size_.height > 0 && size_.width > 0) @@ -407,24 +412,24 @@ CudaMem::CudaMem(Size size_, int type_, AllocType alloc_type_) } inline -CudaMem::CudaMem(InputArray arr, AllocType alloc_type_) +HostMem::HostMem(InputArray arr, AllocType alloc_type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) { arr.getMat().copyTo(*this); } inline -CudaMem::~CudaMem() +HostMem::~HostMem() { release(); } inline -CudaMem& CudaMem::operator =(const CudaMem& m) +HostMem& HostMem::operator =(const HostMem& m) { if (this != &m) { - CudaMem temp(m); + HostMem temp(m); swap(temp); } @@ -432,7 +437,7 @@ CudaMem& CudaMem::operator =(const CudaMem& m) } inline -void CudaMem::swap(CudaMem& b) +void HostMem::swap(HostMem& b) { std::swap(flags, b.flags); std::swap(rows, b.rows); @@ -446,86 +451,88 @@ void CudaMem::swap(CudaMem& b) } inline -CudaMem CudaMem::clone() const +HostMem HostMem::clone() const { - CudaMem m(size(), type(), alloc_type); + HostMem m(size(), type(), alloc_type); createMatHeader().copyTo(m); return m; } inline -void CudaMem::create(Size size_, int type_) +void HostMem::create(Size size_, int type_) { create(size_.height, size_.width, type_); } inline -Mat CudaMem::createMatHeader() const +Mat HostMem::createMatHeader() const { return Mat(size(), type(), data, step); } inline -bool CudaMem::isContinuous() const +bool HostMem::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; } inline -size_t CudaMem::elemSize() const +size_t HostMem::elemSize() const { return CV_ELEM_SIZE(flags); } inline -size_t CudaMem::elemSize1() const +size_t HostMem::elemSize1() const { return CV_ELEM_SIZE1(flags); } inline -int CudaMem::type() const +int HostMem::type() const { return CV_MAT_TYPE(flags); } inline -int CudaMem::depth() const +int HostMem::depth() const { return CV_MAT_DEPTH(flags); } inline -int CudaMem::channels() const +int HostMem::channels() const { return CV_MAT_CN(flags); } inline -size_t CudaMem::step1() const +size_t HostMem::step1() const { return step / elemSize1(); } inline -Size CudaMem::size() const +Size HostMem::size() const { return Size(cols, rows); } inline -bool CudaMem::empty() const +bool HostMem::empty() const { return data == 0; } static inline -void swap(CudaMem& a, CudaMem& b) +void swap(HostMem& a, HostMem& b) { a.swap(b); } -//////////////////////////////// Stream /////////////////////////////// +//=================================================================================== +// Stream +//=================================================================================== inline Stream::Stream(const Ptr& impl) @@ -533,7 +540,9 @@ Stream::Stream(const Ptr& impl) { } -//////////////////////////////// Initialization & Info //////////////////////// +//=================================================================================== +// Initialization & Info +//=================================================================================== inline bool TargetArchs::has(int major, int minor) @@ -592,7 +601,9 @@ bool DeviceInfo::supports(FeatureSet feature_set) const }} // namespace cv { namespace cuda { -//////////////////////////////// Mat //////////////////////////////// +//=================================================================================== +// Mat +//=================================================================================== namespace cv { diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 7bddc0b358..c8836274e6 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -160,8 +160,8 @@ public: STD_VECTOR_MAT = 5 << KIND_SHIFT, EXPR = 6 << KIND_SHIFT, OPENGL_BUFFER = 7 << KIND_SHIFT, - CUDA_MEM = 8 << KIND_SHIFT, - GPU_MAT = 9 << KIND_SHIFT, + CUDA_HOST_MEM = 8 << KIND_SHIFT, + CUDA_GPU_MAT = 9 << KIND_SHIFT, UMAT =10 << KIND_SHIFT, STD_VECTOR_UMAT =11 << KIND_SHIFT }; @@ -180,7 +180,7 @@ public: _InputArray(const double& val); _InputArray(const cuda::GpuMat& d_mat); _InputArray(const ogl::Buffer& buf); - _InputArray(const cuda::CudaMem& cuda_mem); + _InputArray(const cuda::HostMem& cuda_mem); template _InputArray(const cudev::GpuMat_<_Tp>& m); _InputArray(const UMat& um); _InputArray(const std::vector& umv); @@ -277,7 +277,7 @@ public: _OutputArray(std::vector& vec); _OutputArray(cuda::GpuMat& d_mat); _OutputArray(ogl::Buffer& buf); - _OutputArray(cuda::CudaMem& cuda_mem); + _OutputArray(cuda::HostMem& cuda_mem); template _OutputArray(cudev::GpuMat_<_Tp>& m); template _OutputArray(std::vector<_Tp>& vec); template _OutputArray(std::vector >& vec); @@ -292,7 +292,7 @@ public: _OutputArray(const std::vector& vec); _OutputArray(const cuda::GpuMat& d_mat); _OutputArray(const ogl::Buffer& buf); - _OutputArray(const cuda::CudaMem& cuda_mem); + _OutputArray(const cuda::HostMem& cuda_mem); template _OutputArray(const cudev::GpuMat_<_Tp>& m); template _OutputArray(const std::vector<_Tp>& vec); template _OutputArray(const std::vector >& vec); @@ -310,7 +310,7 @@ public: virtual UMat& getUMatRef(int i=-1) const; virtual cuda::GpuMat& getGpuMatRef() const; virtual ogl::Buffer& getOGlBufferRef() const; - virtual cuda::CudaMem& getCudaMemRef() const; + virtual cuda::HostMem& getHostMemRef() const; virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; @@ -333,7 +333,7 @@ public: _InputOutputArray(std::vector& vec); _InputOutputArray(cuda::GpuMat& d_mat); _InputOutputArray(ogl::Buffer& buf); - _InputOutputArray(cuda::CudaMem& cuda_mem); + _InputOutputArray(cuda::HostMem& cuda_mem); template _InputOutputArray(cudev::GpuMat_<_Tp>& m); template _InputOutputArray(std::vector<_Tp>& vec); template _InputOutputArray(std::vector >& vec); @@ -348,7 +348,7 @@ public: _InputOutputArray(const std::vector& vec); _InputOutputArray(const cuda::GpuMat& d_mat); _InputOutputArray(const ogl::Buffer& buf); - _InputOutputArray(const cuda::CudaMem& cuda_mem); + _InputOutputArray(const cuda::HostMem& cuda_mem); template _InputOutputArray(const cudev::GpuMat_<_Tp>& m); template _InputOutputArray(const std::vector<_Tp>& vec); template _InputOutputArray(const std::vector >& vec); diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index 24c6b453c1..9ca85116a0 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -100,13 +100,13 @@ inline _InputArray::_InputArray(const MatExpr& expr) { init(FIXED_TYPE + FIXED_SIZE + EXPR + ACCESS_READ, &expr); } inline _InputArray::_InputArray(const cuda::GpuMat& d_mat) -{ init(GPU_MAT + ACCESS_READ, &d_mat); } +{ init(CUDA_GPU_MAT + ACCESS_READ, &d_mat); } inline _InputArray::_InputArray(const ogl::Buffer& buf) { init(OPENGL_BUFFER + ACCESS_READ, &buf); } -inline _InputArray::_InputArray(const cuda::CudaMem& cuda_mem) -{ init(CUDA_MEM + ACCESS_READ, &cuda_mem); } +inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); } inline _InputArray::~_InputArray() {} @@ -174,13 +174,13 @@ _OutputArray::_OutputArray(const _Tp* vec, int n) { init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); } inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat) -{ init(GPU_MAT + ACCESS_WRITE, &d_mat); } +{ init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } inline _OutputArray::_OutputArray(ogl::Buffer& buf) { init(OPENGL_BUFFER + ACCESS_WRITE, &buf); } -inline _OutputArray::_OutputArray(cuda::CudaMem& cuda_mem) -{ init(CUDA_MEM + ACCESS_WRITE, &cuda_mem); } +inline _OutputArray::_OutputArray(cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } inline _OutputArray::_OutputArray(const Mat& m) { init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); } @@ -195,13 +195,13 @@ inline _OutputArray::_OutputArray(const std::vector& vec) { init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat) -{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_WRITE, &d_mat); } +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } inline _OutputArray::_OutputArray(const ogl::Buffer& buf) { init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_WRITE, &buf); } -inline _OutputArray::_OutputArray(const cuda::CudaMem& cuda_mem) -{ init(FIXED_TYPE + FIXED_SIZE + CUDA_MEM + ACCESS_WRITE, &cuda_mem); } +inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } /////////////////////////////////////////////////////////////////////////////////////////// @@ -261,13 +261,13 @@ _InputOutputArray::_InputOutputArray(const _Tp* vec, int n) { init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); } inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat) -{ init(GPU_MAT + ACCESS_RW, &d_mat); } +{ init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); } inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf) { init(OPENGL_BUFFER + ACCESS_RW, &buf); } -inline _InputOutputArray::_InputOutputArray(cuda::CudaMem& cuda_mem) -{ init(CUDA_MEM + ACCESS_RW, &cuda_mem); } +inline _InputOutputArray::_InputOutputArray(cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } inline _InputOutputArray::_InputOutputArray(const Mat& m) { init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); } @@ -282,13 +282,13 @@ inline _InputOutputArray::_InputOutputArray(const std::vector& vec) { init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_RW, &vec); } inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat) -{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_RW, &d_mat); } +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_RW, &d_mat); } inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf) { init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); } -inline _InputOutputArray::_InputOutputArray(const cuda::CudaMem& cuda_mem) -{ init(FIXED_TYPE + FIXED_SIZE + CUDA_MEM + ACCESS_RW, &cuda_mem); } +inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } //////////////////////////////////////////// Mat ////////////////////////////////////////// diff --git a/modules/cuda/perf/perf_matop.cpp b/modules/core/perf/cuda/perf_gpumat.cpp similarity index 91% rename from modules/cuda/perf/perf_matop.cpp rename to modules/core/perf/cuda/perf_gpumat.cpp index 751e6e7148..4ef79c7ad8 100644 --- a/modules/cuda/perf/perf_matop.cpp +++ b/modules/core/perf/cuda/perf_gpumat.cpp @@ -40,7 +40,12 @@ // //M*/ -#include "perf_precomp.hpp" +#include "../perf_precomp.hpp" + +#ifdef HAVE_CUDA + +#include "opencv2/core/cuda.hpp" +#include "opencv2/ts/cuda_perf.hpp" using namespace std; using namespace testing; @@ -49,7 +54,7 @@ using namespace perf; ////////////////////////////////////////////////////////////////////// // SetTo -PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, +PERF_TEST_P(Sz_Depth_Cn, CUDA_GpuMat_SetTo, Combine(CUDA_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), CUDA_CHANNELS_1_3_4)) @@ -67,23 +72,21 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, cv::cuda::GpuMat dst(size, type); TEST_CYCLE() dst.setTo(val); - - CUDA_SANITY_CHECK(dst); } else { cv::Mat dst(size, type); TEST_CYCLE() dst.setTo(val); - - CPU_SANITY_CHECK(dst); } + + SANITY_CHECK_NOTHING(); } ////////////////////////////////////////////////////////////////////// // SetToMasked -PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, +PERF_TEST_P(Sz_Depth_Cn, CUDA_GpuMat_SetToMasked, Combine(CUDA_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), CUDA_CHANNELS_1_3_4)) @@ -106,23 +109,21 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, const cv::cuda::GpuMat d_mask(mask); TEST_CYCLE() dst.setTo(val, d_mask); - - CUDA_SANITY_CHECK(dst, 1e-10); } else { cv::Mat dst = src; TEST_CYCLE() dst.setTo(val, mask); - - CPU_SANITY_CHECK(dst); } + + SANITY_CHECK_NOTHING(); } ////////////////////////////////////////////////////////////////////// // CopyToMasked -PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, +PERF_TEST_P(Sz_Depth_Cn, CUDA_GpuMat_CopyToMasked, Combine(CUDA_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), CUDA_CHANNELS_1_3_4)) @@ -144,17 +145,15 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, cv::cuda::GpuMat dst(d_src.size(), d_src.type(), cv::Scalar::all(0)); TEST_CYCLE() d_src.copyTo(dst, d_mask); - - CUDA_SANITY_CHECK(dst, 1e-10); } else { cv::Mat dst(src.size(), src.type(), cv::Scalar::all(0)); TEST_CYCLE() src.copyTo(dst, mask); - - CPU_SANITY_CHECK(dst); } + + SANITY_CHECK_NOTHING(); } ////////////////////////////////////////////////////////////////////// @@ -162,7 +161,7 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth); -PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, +PERF_TEST_P(Sz_2Depth, CUDA_GpuMat_ConvertTo, Combine(CUDA_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F))) @@ -183,15 +182,15 @@ PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, cv::cuda::GpuMat dst; TEST_CYCLE() d_src.convertTo(dst, depth2, a, b); - - CUDA_SANITY_CHECK(dst, 1e-10); } else { cv::Mat dst; TEST_CYCLE() src.convertTo(dst, depth2, a, b); - - CPU_SANITY_CHECK(dst); } + + SANITY_CHECK_NOTHING(); } + +#endif diff --git a/modules/core/src/cuda_gpu_mat.cpp b/modules/core/src/cuda_gpu_mat.cpp index 803b21069d..4440d58536 100644 --- a/modules/core/src/cuda_gpu_mat.cpp +++ b/modules/core/src/cuda_gpu_mat.cpp @@ -275,12 +275,12 @@ void cv::cuda::createContinuous(int rows, int cols, int type, OutputArray arr) ::createContinuousImpl(rows, cols, type, arr.getMatRef()); break; - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: ::createContinuousImpl(rows, cols, type, arr.getGpuMatRef()); break; - case _InputArray::CUDA_MEM: - ::createContinuousImpl(rows, cols, type, arr.getCudaMemRef()); + case _InputArray::CUDA_HOST_MEM: + ::createContinuousImpl(rows, cols, type, arr.getHostMemRef()); break; default: @@ -329,12 +329,12 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr) ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getMatRef()); break; - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getGpuMatRef()); break; - case _InputArray::CUDA_MEM: - ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getCudaMemRef()); + case _InputArray::CUDA_HOST_MEM: + ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getHostMemRef()); break; default: @@ -342,14 +342,6 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr) } } -GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) -{ - if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) - return mat(Rect(0, 0, cols, rows)); - - return mat = GpuMat(rows, cols, type); -} - #ifndef HAVE_CUDA GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator() diff --git a/modules/core/src/cuda_host_mem.cpp b/modules/core/src/cuda_host_mem.cpp index b27d52e329..2ad733b675 100644 --- a/modules/core/src/cuda_host_mem.cpp +++ b/modules/core/src/cuda_host_mem.cpp @@ -42,10 +42,124 @@ //M*/ #include "precomp.hpp" +#include using namespace cv; using namespace cv::cuda; +#ifdef HAVE_CUDA + +namespace { + +class HostMemAllocator : public MatAllocator +{ +public: + explicit HostMemAllocator(unsigned int flags) : flags_(flags) + { + } + + UMatData* allocate(int dims, const int* sizes, int type, + void* data0, size_t* step, + int /*flags*/, UMatUsageFlags /*usageFlags*/) const + { + size_t total = CV_ELEM_SIZE(type); + for (int i = dims-1; i >= 0; i--) + { + if (step) + { + if (data0 && step[i] != CV_AUTOSTEP) + { + CV_Assert(total <= step[i]); + total = step[i]; + } + else + { + step[i] = total; + } + } + + total *= sizes[i]; + } + + UMatData* u = new UMatData(this); + u->size = total; + + if (data0) + { + u->data = u->origdata = static_cast(data0); + u->flags |= UMatData::USER_ALLOCATED; + } + else + { + void* ptr = 0; + cudaSafeCall( cudaHostAlloc(&ptr, total, flags_) ); + + u->data = u->origdata = static_cast(ptr); + } + + return u; + } + + bool allocate(UMatData* u, int /*accessFlags*/, UMatUsageFlags /*usageFlags*/) const + { + return (u != NULL); + } + + void deallocate(UMatData* u) const + { + CV_Assert(u->urefcount >= 0); + CV_Assert(u->refcount >= 0); + + if (u && u->refcount == 0) + { + if ( !(u->flags & UMatData::USER_ALLOCATED) ) + { + cudaFreeHost(u->origdata); + u->origdata = 0; + } + + delete u; + } + } + +private: + unsigned int flags_; +}; + +} // namespace + +#endif + +MatAllocator* cv::cuda::HostMem::getAllocator(AllocType alloc_type) +{ +#ifndef HAVE_CUDA + (void) alloc_type; + throw_no_cuda(); + return NULL; +#else + static std::map > allocators; + + unsigned int flag = cudaHostAllocDefault; + + switch (alloc_type) + { + case PAGE_LOCKED: flag = cudaHostAllocDefault; break; + case SHARED: flag = cudaHostAllocMapped; break; + case WRITE_COMBINED: flag = cudaHostAllocWriteCombined; break; + default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); + } + + Ptr& a = allocators[flag]; + + if (a.empty()) + { + a = makePtr(flag); + } + + return a.get(); +#endif +} + #ifdef HAVE_CUDA namespace { @@ -59,7 +173,7 @@ namespace } #endif -void cv::cuda::CudaMem::create(int rows_, int cols_, int type_) +void cv::cuda::HostMem::create(int rows_, int cols_, int type_) { #ifndef HAVE_CUDA (void) rows_; @@ -123,9 +237,9 @@ void cv::cuda::CudaMem::create(int rows_, int cols_, int type_) #endif } -CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const +HostMem cv::cuda::HostMem::reshape(int new_cn, int new_rows) const { - CudaMem hdr = *this; + HostMem hdr = *this; int cn = channels(); if (new_cn == 0) @@ -166,7 +280,7 @@ CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const return hdr; } -void cv::cuda::CudaMem::release() +void cv::cuda::HostMem::release() { #ifdef HAVE_CUDA if (refcount && CV_XADD(refcount, -1) == 1) @@ -181,7 +295,7 @@ void cv::cuda::CudaMem::release() #endif } -GpuMat cv::cuda::CudaMem::createGpuMatHeader() const +GpuMat cv::cuda::HostMem::createGpuMatHeader() const { #ifndef HAVE_CUDA throw_no_cuda(); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 980ade1845..38ff7ed53a 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -1187,18 +1187,18 @@ Mat _InputArray::getMat(int i) const return Mat(); } - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) { CV_Assert( i < 0 ); CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for cuda::GpuMat object"); return Mat(); } - if( k == CUDA_MEM ) + if( k == CUDA_HOST_MEM ) { CV_Assert( i < 0 ); - const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj; + const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj; return cuda_mem->createMatHeader(); } @@ -1391,15 +1391,15 @@ cuda::GpuMat _InputArray::getGpuMat() const { int k = kind(); - if (k == GPU_MAT) + if (k == CUDA_GPU_MAT) { const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj; return *d_mat; } - if (k == CUDA_MEM) + if (k == CUDA_HOST_MEM) { - const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj; + const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj; return cuda_mem->createGpuMatHeader(); } @@ -1412,7 +1412,7 @@ cuda::GpuMat _InputArray::getGpuMat() const if (k == NONE) return cuda::GpuMat(); - CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::CudaMem"); + CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem"); return cuda::GpuMat(); } @@ -1520,20 +1520,22 @@ Size _InputArray::size(int i) const return buf->size(); } - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) { CV_Assert( i < 0 ); const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj; return d_mat->size(); } - CV_Assert( k == CUDA_MEM ); - //if( k == CUDA_MEM ) + if( k == CUDA_HOST_MEM ) { CV_Assert( i < 0 ); - const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj; + const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj; return cuda_mem->size(); } + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); + return Size(); } int _InputArray::sizend(int* arrsz, int i) const @@ -1700,18 +1702,20 @@ int _InputArray::dims(int i) const return 2; } - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) { CV_Assert( i < 0 ); return 2; } - CV_Assert( k == CUDA_MEM ); - //if( k == CUDA_MEM ) + if( k == CUDA_HOST_MEM ) { CV_Assert( i < 0 ); return 2; } + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); + return 0; } size_t _InputArray::total(int i) const @@ -1799,12 +1803,14 @@ int _InputArray::type(int i) const if( k == OPENGL_BUFFER ) return ((const ogl::Buffer*)obj)->type(); - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) return ((const cuda::GpuMat*)obj)->type(); - CV_Assert( k == CUDA_MEM ); - //if( k == CUDA_MEM ) - return ((const cuda::CudaMem*)obj)->type(); + if( k == CUDA_HOST_MEM ) + return ((const cuda::HostMem*)obj)->type(); + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); + return 0; } int _InputArray::depth(int i) const @@ -1863,12 +1869,14 @@ bool _InputArray::empty() const if( k == OPENGL_BUFFER ) return ((const ogl::Buffer*)obj)->empty(); - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) return ((const cuda::GpuMat*)obj)->empty(); - CV_Assert( k == CUDA_MEM ); - //if( k == CUDA_MEM ) - return ((const cuda::CudaMem*)obj)->empty(); + if( k == CUDA_HOST_MEM ) + return ((const cuda::HostMem*)obj)->empty(); + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); + return true; } bool _InputArray::isContinuous(int i) const @@ -1970,7 +1978,7 @@ size_t _InputArray::offset(int i) const return vv[i].offset; } - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) { CV_Assert( i < 0 ); const cuda::GpuMat * const m = ((const cuda::GpuMat*)obj); @@ -2016,7 +2024,7 @@ size_t _InputArray::step(int i) const return vv[i].step; } - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) { CV_Assert( i < 0 ); return ((const cuda::GpuMat*)obj)->step; @@ -2095,7 +2103,7 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int ((UMat*)obj)->create(_sz, mtype); return; } - if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + if( k == CUDA_GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); @@ -2109,11 +2117,11 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int ((ogl::Buffer*)obj)->create(_sz, mtype); return; } - if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { - CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == _sz); - CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype); - ((cuda::CudaMem*)obj)->create(_sz, mtype); + CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz); + CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); + ((cuda::HostMem*)obj)->create(_sz, mtype); return; } int sizes[] = {_sz.height, _sz.width}; @@ -2137,7 +2145,7 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran ((UMat*)obj)->create(_rows, _cols, mtype); return; } - if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + if( k == CUDA_GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); @@ -2151,11 +2159,11 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran ((ogl::Buffer*)obj)->create(_rows, _cols, mtype); return; } - if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { - CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(_cols, _rows)); - CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype); - ((cuda::CudaMem*)obj)->create(_rows, _cols, mtype); + CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows)); + CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); + ((cuda::HostMem*)obj)->create(_rows, _cols, mtype); return; } int sizes[] = {_rows, _cols}; @@ -2479,15 +2487,15 @@ void _OutputArray::release() const return; } - if( k == GPU_MAT ) + if( k == CUDA_GPU_MAT ) { ((cuda::GpuMat*)obj)->release(); return; } - if( k == CUDA_MEM ) + if( k == CUDA_HOST_MEM ) { - ((cuda::CudaMem*)obj)->release(); + ((cuda::HostMem*)obj)->release(); return; } @@ -2583,7 +2591,7 @@ UMat& _OutputArray::getUMatRef(int i) const cuda::GpuMat& _OutputArray::getGpuMatRef() const { int k = kind(); - CV_Assert( k == GPU_MAT ); + CV_Assert( k == CUDA_GPU_MAT ); return *(cuda::GpuMat*)obj; } @@ -2594,11 +2602,11 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const return *(ogl::Buffer*)obj; } -cuda::CudaMem& _OutputArray::getCudaMemRef() const +cuda::HostMem& _OutputArray::getHostMemRef() const { int k = kind(); - CV_Assert( k == CUDA_MEM ); - return *(cuda::CudaMem*)obj; + CV_Assert( k == CUDA_HOST_MEM ); + return *(cuda::HostMem*)obj; } void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const @@ -2614,10 +2622,10 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const } else if( k == UMAT ) ((UMat*)obj)->setTo(arr, mask); - else if( k == GPU_MAT ) + else if( k == CUDA_GPU_MAT ) { Mat value = arr.getMat(); - CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::GPU_MAT) ); + CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) ); ((cuda::GpuMat*)obj)->setTo(Scalar(Vec(value.ptr())), mask); } else diff --git a/modules/core/src/opengl.cpp b/modules/core/src/opengl.cpp index e7b2a7627a..00a7f66662 100644 --- a/modules/core/src/opengl.cpp +++ b/modules/core/src/opengl.cpp @@ -509,7 +509,7 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_ switch (kind) { case _InputArray::OPENGL_BUFFER: - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: copyFrom(arr, target, autoRelease); break; @@ -594,7 +594,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease) break; } - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: { #ifndef HAVE_CUDA throw_no_cuda(); @@ -657,7 +657,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr) const break; } - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: { #ifndef HAVE_CUDA throw_no_cuda(); @@ -1018,7 +1018,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols break; } - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: { #ifndef HAVE_CUDA throw_no_cuda(); @@ -1132,7 +1132,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease) break; } - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: { #ifndef HAVE_CUDA throw_no_cuda(); @@ -1184,7 +1184,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c break; } - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: { #ifndef HAVE_CUDA throw_no_cuda(); diff --git a/modules/cuda/test/test_buffer_pool.cpp b/modules/core/test/cuda/test_buffer_pool.cpp similarity index 62% rename from modules/cuda/test/test_buffer_pool.cpp rename to modules/core/test/cuda/test_buffer_pool.cpp index 2526358d95..eec6ed3f64 100644 --- a/modules/cuda/test/test_buffer_pool.cpp +++ b/modules/core/test/cuda/test_buffer_pool.cpp @@ -40,13 +40,13 @@ // //M*/ -#include "test_precomp.hpp" +#include "../test_precomp.hpp" #ifdef HAVE_CUDA -#include "opencv2/cudaarithm.hpp" -#include "opencv2/cudawarping.hpp" +#include "opencv2/core/cuda.hpp" #include "opencv2/core/private.cuda.hpp" +#include "opencv2/ts/cuda_test.hpp" using namespace testing; using namespace cv; @@ -54,65 +54,64 @@ using namespace cv::cuda; struct BufferPoolTest : TestWithParam { + void RunSimpleTest(Stream& stream, HostMem& dst_1, HostMem& dst_2) + { + BufferPool pool(stream); + + { + GpuMat buf0 = pool.getBuffer(Size(640, 480), CV_8UC1); + EXPECT_FALSE( buf0.empty() ); + + buf0.setTo(Scalar::all(0), stream); + + GpuMat buf1 = pool.getBuffer(Size(640, 480), CV_8UC1); + EXPECT_FALSE( buf1.empty() ); + + buf0.convertTo(buf1, buf1.type(), 1.0, 1.0, stream); + + buf1.download(dst_1, stream); + } + + { + GpuMat buf2 = pool.getBuffer(Size(1280, 1024), CV_32SC1); + EXPECT_FALSE( buf2.empty() ); + + buf2.setTo(Scalar::all(2), stream); + + buf2.download(dst_2, stream); + } + } + + void CheckSimpleTest(HostMem& dst_1, HostMem& dst_2) + { + EXPECT_MAT_NEAR(Mat(Size(640, 480), CV_8UC1, Scalar::all(1)), dst_1, 0.0); + EXPECT_MAT_NEAR(Mat(Size(1280, 1024), CV_32SC1, Scalar::all(2)), dst_2, 0.0); + } }; -namespace +CUDA_TEST_P(BufferPoolTest, FromNullStream) { - void func1(const GpuMat& src, GpuMat& dst, Stream& stream) - { - BufferPool pool(stream); + HostMem dst_1, dst_2; - GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels())); + RunSimpleTest(Stream::Null(), dst_1, dst_2); - src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); - - cuda::exp(buf, dst, stream); - } - - void func2(const GpuMat& src, GpuMat& dst, Stream& stream) - { - BufferPool pool(stream); - - GpuMat buf1 = pool.getBuffer(saturate_cast(src.rows * 0.5), saturate_cast(src.cols * 0.5), src.type()); - - cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream); - - GpuMat buf2 = pool.getBuffer(buf1.size(), CV_32FC(buf1.channels())); - - func1(buf1, buf2, stream); - - GpuMat buf3 = pool.getBuffer(src.size(), buf2.type()); - - cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream); - - buf3.convertTo(dst, CV_8U, stream); - } + CheckSimpleTest(dst_1, dst_2); } -CUDA_TEST_P(BufferPoolTest, SimpleUsage) +CUDA_TEST_P(BufferPoolTest, From2Streams) { - DeviceInfo devInfo = GetParam(); - setDevice(devInfo.deviceID()); + HostMem dst1_1, dst1_2; + HostMem dst2_1, dst2_2; - GpuMat src(200, 200, CV_8UC1); - GpuMat dst; + Stream stream1, stream2; + RunSimpleTest(stream1, dst1_1, dst1_2); + RunSimpleTest(stream2, dst2_1, dst2_2); - Stream stream; + stream1.waitForCompletion(); + stream2.waitForCompletion(); - func2(src, dst, stream); - - stream.waitForCompletion(); - - GpuMat buf, buf1, buf2, buf3; - GpuMat dst_gold; - - cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST); - buf1.convertTo(buf, CV_32F, 1.0 / 255.0); - cuda::exp(buf, buf2); - cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST); - buf3.convertTo(dst_gold, CV_8U); - - ASSERT_MAT_NEAR(dst_gold, dst, 0); + CheckSimpleTest(dst1_1, dst1_2); + CheckSimpleTest(dst2_1, dst2_2); } INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPoolTest, ALL_DEVICES); diff --git a/modules/cuda/test/test_gpumat.cpp b/modules/core/test/cuda/test_gpumat.cpp similarity index 91% rename from modules/cuda/test/test_gpumat.cpp rename to modules/core/test/cuda/test_gpumat.cpp index dcd368c085..b549f03a05 100644 --- a/modules/cuda/test/test_gpumat.cpp +++ b/modules/core/test/cuda/test_gpumat.cpp @@ -40,16 +40,19 @@ // //M*/ -#include "test_precomp.hpp" +#include "../test_precomp.hpp" #ifdef HAVE_CUDA +#include "opencv2/core/cuda.hpp" +#include "opencv2/ts/cuda_test.hpp" + using namespace cvtest; //////////////////////////////////////////////////////////////////////////////// // SetTo -PARAM_TEST_CASE(SetTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(GpuMat_SetTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -67,7 +70,7 @@ PARAM_TEST_CASE(SetTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi) } }; -CUDA_TEST_P(SetTo, Zero) +CUDA_TEST_P(GpuMat_SetTo, Zero) { cv::Scalar zero = cv::Scalar::all(0); @@ -77,7 +80,7 @@ CUDA_TEST_P(SetTo, Zero) EXPECT_MAT_NEAR(cv::Mat::zeros(size, type), mat, 0.0); } -CUDA_TEST_P(SetTo, SameVal) +CUDA_TEST_P(GpuMat_SetTo, SameVal) { cv::Scalar val = cv::Scalar::all(randomDouble(0.0, 255.0)); @@ -102,7 +105,7 @@ CUDA_TEST_P(SetTo, SameVal) } } -CUDA_TEST_P(SetTo, DifferentVal) +CUDA_TEST_P(GpuMat_SetTo, DifferentVal) { cv::Scalar val = randomScalar(0.0, 255.0); @@ -127,7 +130,7 @@ CUDA_TEST_P(SetTo, DifferentVal) } } -CUDA_TEST_P(SetTo, Masked) +CUDA_TEST_P(GpuMat_SetTo, Masked) { cv::Scalar val = randomScalar(0.0, 255.0); cv::Mat mat_gold = randomMat(size, type); @@ -156,7 +159,7 @@ CUDA_TEST_P(SetTo, Masked) } } -INSTANTIATE_TEST_CASE_P(CUDA_GpuMat, SetTo, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA, GpuMat_SetTo, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, ALL_TYPES, @@ -165,7 +168,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_GpuMat, SetTo, testing::Combine( //////////////////////////////////////////////////////////////////////////////// // CopyTo -PARAM_TEST_CASE(CopyTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(GpuMat_CopyTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -184,7 +187,7 @@ PARAM_TEST_CASE(CopyTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi) } }; -CUDA_TEST_P(CopyTo, WithOutMask) +CUDA_TEST_P(GpuMat_CopyTo, WithOutMask) { cv::Mat src = randomMat(size, type); @@ -195,7 +198,7 @@ CUDA_TEST_P(CopyTo, WithOutMask) EXPECT_MAT_NEAR(src, dst, 0.0); } -CUDA_TEST_P(CopyTo, Masked) +CUDA_TEST_P(GpuMat_CopyTo, Masked) { cv::Mat src = randomMat(size, type); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); @@ -226,7 +229,7 @@ CUDA_TEST_P(CopyTo, Masked) } } -INSTANTIATE_TEST_CASE_P(CUDA_GpuMat, CopyTo, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA, GpuMat_CopyTo, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, ALL_TYPES, @@ -235,7 +238,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_GpuMat, CopyTo, testing::Combine( //////////////////////////////////////////////////////////////////////////////// // ConvertTo -PARAM_TEST_CASE(ConvertTo, cv::cuda::DeviceInfo, cv::Size, MatDepth, MatDepth, UseRoi) +PARAM_TEST_CASE(GpuMat_ConvertTo, cv::cuda::DeviceInfo, cv::Size, MatDepth, MatDepth, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -255,7 +258,7 @@ PARAM_TEST_CASE(ConvertTo, cv::cuda::DeviceInfo, cv::Size, MatDepth, MatDepth, U } }; -CUDA_TEST_P(ConvertTo, WithOutScaling) +CUDA_TEST_P(GpuMat_ConvertTo, WithOutScaling) { cv::Mat src = randomMat(size, depth1); @@ -285,7 +288,7 @@ CUDA_TEST_P(ConvertTo, WithOutScaling) } } -CUDA_TEST_P(ConvertTo, WithScaling) +CUDA_TEST_P(GpuMat_ConvertTo, WithScaling) { cv::Mat src = randomMat(size, depth1); double a = randomDouble(0.0, 1.0); @@ -317,7 +320,7 @@ CUDA_TEST_P(ConvertTo, WithScaling) } } -INSTANTIATE_TEST_CASE_P(CUDA_GpuMat, ConvertTo, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA, GpuMat_ConvertTo, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, ALL_DEPTH, @@ -356,6 +359,6 @@ CUDA_TEST_P(EnsureSizeIsEnough, BufferReuse) EXPECT_EQ(reinterpret_cast(old.data), reinterpret_cast(buffer.data)); } -INSTANTIATE_TEST_CASE_P(CUDA_GpuMat, EnsureSizeIsEnough, ALL_DEVICES); +INSTANTIATE_TEST_CASE_P(CUDA, EnsureSizeIsEnough, ALL_DEVICES); #endif // HAVE_CUDA diff --git a/modules/cuda/test/test_opengl.cpp b/modules/core/test/cuda/test_opengl.cpp similarity index 98% rename from modules/cuda/test/test_opengl.cpp rename to modules/core/test/cuda/test_opengl.cpp index 0b4812c209..f4c733d064 100644 --- a/modules/cuda/test/test_opengl.cpp +++ b/modules/core/test/cuda/test_opengl.cpp @@ -40,10 +40,14 @@ // //M*/ -#include "test_precomp.hpp" +#include "../test_precomp.hpp" #if defined(HAVE_CUDA) && defined(HAVE_OPENGL) +#include "opencv2/core/cuda.hpp" +#include "opencv2/core/opengl.hpp" +#include "opencv2/ts/cuda_test.hpp" + using namespace cvtest; ///////////////////////////////////////////// diff --git a/modules/cuda/test/test_stream.cpp b/modules/core/test/cuda/test_stream.cpp similarity index 82% rename from modules/cuda/test/test_stream.cpp rename to modules/core/test/cuda/test_stream.cpp index cdeca71aba..a0e451a62a 100644 --- a/modules/cuda/test/test_stream.cpp +++ b/modules/core/test/cuda/test_stream.cpp @@ -40,22 +40,23 @@ // //M*/ -#include "test_precomp.hpp" +#include "../test_precomp.hpp" #ifdef HAVE_CUDA #include -#if CUDART_VERSION >= 5000 +#include "opencv2/core/cuda.hpp" +#include "opencv2/ts/cuda_test.hpp" using namespace cvtest; struct Async : testing::TestWithParam { - cv::cuda::CudaMem src; + cv::cuda::HostMem src; cv::cuda::GpuMat d_src; - cv::cuda::CudaMem dst; + cv::cuda::HostMem dst; cv::cuda::GpuMat d_dst; virtual void SetUp() @@ -63,7 +64,7 @@ struct Async : testing::TestWithParam cv::cuda::DeviceInfo devInfo = GetParam(); cv::cuda::setDevice(devInfo.deviceID()); - src = cv::cuda::CudaMem(cv::cuda::CudaMem::PAGE_LOCKED); + src = cv::cuda::HostMem(cv::cuda::HostMem::PAGE_LOCKED); cv::Mat m = randomMat(cv::Size(128, 128), CV_8UC1); m.copyTo(src); @@ -76,8 +77,8 @@ void checkMemSet(int status, void* userData) Async* test = reinterpret_cast(userData); - cv::cuda::CudaMem src = test->src; - cv::cuda::CudaMem dst = test->dst; + cv::cuda::HostMem src = test->src; + cv::cuda::HostMem dst = test->dst; cv::Mat dst_gold = cv::Mat::zeros(src.size(), src.type()); @@ -105,8 +106,8 @@ void checkConvert(int status, void* userData) Async* test = reinterpret_cast(userData); - cv::cuda::CudaMem src = test->src; - cv::cuda::CudaMem dst = test->dst; + cv::cuda::HostMem src = test->src; + cv::cuda::HostMem dst = test->dst; cv::Mat dst_gold; src.createMatHeader().convertTo(dst_gold, CV_32S); @@ -128,8 +129,25 @@ CUDA_TEST_P(Async, Convert) stream.waitForCompletion(); } +CUDA_TEST_P(Async, HostMemAllocator) +{ + cv::cuda::Stream stream; + + cv::Mat h_dst; + h_dst.allocator = cv::cuda::HostMem::getAllocator(); + + d_src.upload(src, stream); + d_src.convertTo(d_dst, CV_32S, stream); + d_dst.download(h_dst, stream); + + stream.waitForCompletion(); + + cv::Mat dst_gold; + src.createMatHeader().convertTo(dst_gold, CV_32S); + + ASSERT_MAT_NEAR(dst_gold, h_dst, 0); +} + INSTANTIATE_TEST_CASE_P(CUDA_Stream, Async, ALL_DEVICES); -#endif // CUDART_VERSION >= 5000 - #endif // HAVE_CUDA diff --git a/modules/core/test/test_main.cpp b/modules/core/test/test_main.cpp index d5400e20fd..5ddfb72348 100644 --- a/modules/core/test/test_main.cpp +++ b/modules/core/test/test_main.cpp @@ -7,4 +7,14 @@ #include "test_precomp.hpp" +#ifndef HAVE_CUDA + CV_TEST_MAIN("cv") + +#else + +#include "opencv2/ts/cuda_test.hpp" + +CV_CUDA_TEST_MAIN("cv") + +#endif diff --git a/modules/cuda/perf/perf_buffer_pool.cpp b/modules/cuda/perf/perf_buffer_pool.cpp deleted file mode 100644 index 72bd47a070..0000000000 --- a/modules/cuda/perf/perf_buffer_pool.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" - -#ifdef HAVE_CUDA - -#include "opencv2/cudaarithm.hpp" -#include "opencv2/core/private.cuda.hpp" - -using namespace testing; -using namespace perf; -using namespace cv; -using namespace cv::cuda; - -namespace -{ - void func1(const GpuMat& src, GpuMat& dst, Stream& stream) - { - BufferPool pool(stream); - - GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels())); - - src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); - - cuda::exp(buf, dst, stream); - } - - void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) - { - BufferPool pool(stream); - - GpuMat buf1 = pool.getBuffer(src1.size(), CV_32FC(src1.channels())); - - func1(src1, buf1, stream); - - GpuMat buf2 = pool.getBuffer(src2.size(), CV_32FC(src2.channels())); - - func1(src2, buf2, stream); - - cuda::add(buf1, buf2, dst, noArray(), -1, stream); - } -} - -PERF_TEST_P(Sz, BufferPool, CUDA_TYPICAL_MAT_SIZES) -{ - static bool first = true; - - const Size size = GetParam(); - - const bool useBufferPool = PERF_RUN_CUDA(); - - Mat host_src(size, CV_8UC1); - declare.in(host_src, WARMUP_RNG); - - GpuMat src1(host_src), src2(host_src); - GpuMat dst; - - setBufferPoolUsage(useBufferPool); - if (useBufferPool && first) - { - setBufferPoolConfig(-1, 25 * 1024 * 1024, 2); - first = false; - } - - TEST_CYCLE() - { - func2(src1, src2, dst, Stream::Null()); - } - - Mat h_dst(dst); - SANITY_CHECK(h_dst); -} - -#endif diff --git a/modules/cudaarithm/src/cuda/lut.cu b/modules/cudaarithm/src/cuda/lut.cu index a8d5bc5b06..0b1fe8b0d5 100644 --- a/modules/cudaarithm/src/cuda/lut.cu +++ b/modules/cudaarithm/src/cuda/lut.cu @@ -74,7 +74,7 @@ namespace LookUpTableImpl::LookUpTableImpl(InputArray _lut) { - if (_lut.kind() == _InputArray::GPU_MAT) + if (_lut.kind() == _InputArray::CUDA_GPU_MAT) { d_lut = _lut.getGpuMat(); } diff --git a/modules/cudaimgproc/src/histogram.cpp b/modules/cudaimgproc/src/histogram.cpp index 37edd6e0d1..d63e57de31 100644 --- a/modules/cudaimgproc/src/histogram.cpp +++ b/modules/cudaimgproc/src/histogram.cpp @@ -467,14 +467,14 @@ void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int _levels.create(1, nLevels, CV_32SC1); Mat host_levels; - if (kind == _InputArray::GPU_MAT) + if (kind == _InputArray::CUDA_GPU_MAT) host_levels.create(1, nLevels, CV_32SC1); else host_levels = _levels.getMat(); nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr(), nLevels, lowerLevel, upperLevel) ); - if (kind == _InputArray::GPU_MAT) + if (kind == _InputArray::CUDA_GPU_MAT) _levels.getGpuMatRef().upload(host_levels); } diff --git a/modules/cudaoptflow/src/farneback.cpp b/modules/cudaoptflow/src/farneback.cpp index dc52035255..6b74432632 100644 --- a/modules/cudaoptflow/src/farneback.cpp +++ b/modules/cudaoptflow/src/farneback.cpp @@ -95,6 +95,16 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback }}}} // namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback +namespace +{ + GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) + { + if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) + return mat(Rect(0, 0, cols, rows)); + + return mat = GpuMat(rows, cols, type); + } +} void cv::cuda::FarnebackOpticalFlow::prepareGaussian( int n, double sigma, float *g, float *xg, float *xxg, diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp index e378c52372..665840ec03 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp @@ -51,33 +51,33 @@ namespace cv { namespace cudev { template -__host__ GpuMat_::GpuMat_() - : GpuMat() +__host__ GpuMat_::GpuMat_(Allocator* allocator) + : GpuMat(allocator) { flags = (flags & ~CV_MAT_TYPE_MASK) | DataType::type; } template -__host__ GpuMat_::GpuMat_(int arows, int acols) - : GpuMat(arows, acols, DataType::type) +__host__ GpuMat_::GpuMat_(int arows, int acols, Allocator* allocator) + : GpuMat(arows, acols, DataType::type, allocator) { } template -__host__ GpuMat_::GpuMat_(Size asize) - : GpuMat(asize.height, asize.width, DataType::type) +__host__ GpuMat_::GpuMat_(Size asize, Allocator* allocator) + : GpuMat(asize.height, asize.width, DataType::type, allocator) { } template -__host__ GpuMat_::GpuMat_(int arows, int acols, Scalar val) - : GpuMat(arows, acols, DataType::type, val) +__host__ GpuMat_::GpuMat_(int arows, int acols, Scalar val, Allocator* allocator) + : GpuMat(arows, acols, DataType::type, val, allocator) { } template -__host__ GpuMat_::GpuMat_(Size asize, Scalar val) - : GpuMat(asize.height, asize.width, DataType::type, val) +__host__ GpuMat_::GpuMat_(Size asize, Scalar val, Allocator* allocator) + : GpuMat(asize.height, asize.width, DataType::type, val, allocator) { } @@ -88,8 +88,8 @@ __host__ GpuMat_::GpuMat_(const GpuMat_& m) } template -__host__ GpuMat_::GpuMat_(const GpuMat& m) - : GpuMat() +__host__ GpuMat_::GpuMat_(const GpuMat& m, Allocator* allocator) + : GpuMat(allocator) { flags = (flags & ~CV_MAT_TYPE_MASK) | DataType::type; @@ -134,8 +134,8 @@ __host__ GpuMat_::GpuMat_(const GpuMat_& m, Rect roi) } template -__host__ GpuMat_::GpuMat_(InputArray arr) - : GpuMat() +__host__ GpuMat_::GpuMat_(InputArray arr, Allocator* allocator) + : GpuMat(allocator) { flags = (flags & ~CV_MAT_TYPE_MASK) | DataType::type; upload(arr); @@ -341,7 +341,7 @@ namespace cv { template __host__ _InputArray::_InputArray(const cudev::GpuMat_<_Tp>& m) - : flags(FIXED_TYPE + GPU_MAT + DataType<_Tp>::type), obj((void*)&m) + : flags(FIXED_TYPE + CUDA_GPU_MAT + DataType<_Tp>::type), obj((void*)&m) {} template diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp index 02d8cb7735..983652c53c 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp @@ -63,21 +63,21 @@ public: typedef T value_type; //! default constructor - __host__ GpuMat_(); + __host__ GpuMat_(Allocator* allocator = defaultAllocator()); //! constructs GpuMat of the specified size - __host__ GpuMat_(int arows, int acols); - __host__ explicit GpuMat_(Size asize); + __host__ GpuMat_(int arows, int acols, Allocator* allocator = defaultAllocator()); + __host__ explicit GpuMat_(Size asize, Allocator* allocator = defaultAllocator()); //! constucts GpuMat and fills it with the specified value - __host__ GpuMat_(int arows, int acols, Scalar val); - __host__ GpuMat_(Size asize, Scalar val); + __host__ GpuMat_(int arows, int acols, Scalar val, Allocator* allocator = defaultAllocator()); + __host__ GpuMat_(Size asize, Scalar val, Allocator* allocator = defaultAllocator()); //! copy constructor __host__ GpuMat_(const GpuMat_& m); //! copy/conversion contructor. If m is of different type, it's converted - __host__ explicit GpuMat_(const GpuMat& m); + __host__ explicit GpuMat_(const GpuMat& m, Allocator* allocator = defaultAllocator()); //! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type __host__ GpuMat_(int arows, int acols, T* adata, size_t astep = Mat::AUTO_STEP); @@ -88,7 +88,7 @@ public: __host__ GpuMat_(const GpuMat_& m, Rect roi); //! builds GpuMat from host memory (Blocking call) - __host__ explicit GpuMat_(InputArray arr); + __host__ explicit GpuMat_(InputArray arr, Allocator* allocator = defaultAllocator()); //! assignment operators __host__ GpuMat_& operator =(const GpuMat_& m); diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index f43f86411b..cda019102c 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -297,7 +297,7 @@ void cv::imshow( const String& winname, InputArray _img ) cv::ogl::Texture2D& tex = ownWndTexs[winname]; - if (_img.kind() == _InputArray::GPU_MAT) + if (_img.kind() == _InputArray::CUDA_GPU_MAT) { cv::ogl::Buffer& buf = ownWndBufs[winname]; buf.copyFrom(_img); diff --git a/modules/superres/src/btv_l1_cuda.cpp b/modules/superres/src/btv_l1_cuda.cpp index 1ec71f220c..f72e3846e8 100644 --- a/modules/superres/src/btv_l1_cuda.cpp +++ b/modules/superres/src/btv_l1_cuda.cpp @@ -514,7 +514,7 @@ namespace ++outPos_; const GpuMat& curOutput = at(outPos_, outputs_); - if (_output.kind() == _InputArray::GPU_MAT) + if (_output.kind() == _InputArray::CUDA_GPU_MAT) curOutput.convertTo(_output.getGpuMatRef(), CV_8U); else { diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp index 0f81efd5e1..216e869c14 100644 --- a/modules/superres/src/frame_source.cpp +++ b/modules/superres/src/frame_source.cpp @@ -116,7 +116,7 @@ namespace { if (_frame.kind() == _InputArray::MAT) vc_ >> _frame.getMatRef(); - else if(_frame.kind() == _InputArray::GPU_MAT) + else if(_frame.kind() == _InputArray::CUDA_GPU_MAT) { vc_ >> frame_; arrCopy(frame_, _frame); @@ -226,7 +226,7 @@ namespace void VideoFrameSource_CUDA::nextFrame(OutputArray _frame) { - if (_frame.kind() == _InputArray::GPU_MAT) + if (_frame.kind() == _InputArray::CUDA_GPU_MAT) { bool res = reader_->nextFrame(_frame.getGpuMatRef()); if (!res) diff --git a/modules/superres/src/input_array_utility.cpp b/modules/superres/src/input_array_utility.cpp index 9f4f229360..ec20673b47 100644 --- a/modules/superres/src/input_array_utility.cpp +++ b/modules/superres/src/input_array_utility.cpp @@ -49,7 +49,7 @@ Mat cv::superres::arrGetMat(InputArray arr, Mat& buf) { switch (arr.kind()) { - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: arr.getGpuMat().download(buf); return buf; @@ -66,7 +66,7 @@ UMat cv::superres::arrGetUMat(InputArray arr, UMat& buf) { switch (arr.kind()) { - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: arr.getGpuMat().download(buf); return buf; @@ -83,7 +83,7 @@ GpuMat cv::superres::arrGetGpuMat(InputArray arr, GpuMat& buf) { switch (arr.kind()) { - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: return arr.getGpuMat(); case _InputArray::OPENGL_BUFFER: @@ -184,7 +184,7 @@ namespace switch (src.kind()) { - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: #ifdef HAVE_OPENCV_CUDAIMGPROC cuda::cvtColor(src.getGpuMat(), dst.getGpuMatRef(), code, cn); #else @@ -218,7 +218,7 @@ namespace switch (src.kind()) { - case _InputArray::GPU_MAT: + case _InputArray::CUDA_GPU_MAT: src.getGpuMat().convertTo(dst.getGpuMatRef(), depth, scale); break; diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp index 7227b080fc..fcc9bef347 100644 --- a/modules/superres/src/optical_flow.cpp +++ b/modules/superres/src/optical_flow.cpp @@ -458,7 +458,7 @@ namespace GpuMat input0 = convertToType(frame0, work_type_, buf_[2], buf_[3]); GpuMat input1 = convertToType(frame1, work_type_, buf_[4], buf_[5]); - if (_flow2.needed() && _flow1.kind() == _InputArray::GPU_MAT && _flow2.kind() == _InputArray::GPU_MAT) + if (_flow2.needed() && _flow1.kind() == _InputArray::CUDA_GPU_MAT && _flow2.kind() == _InputArray::CUDA_GPU_MAT) { impl(input0, input1, _flow1.getGpuMatRef(), _flow2.getGpuMatRef()); return; diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index 209cb2915a..c1b68a0c0f 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -569,10 +569,10 @@ void parseCustomOptions(int argc, char **argv); #define CV_TEST_MAIN(resourcesubdir, ...) \ int main(int argc, char **argv) \ { \ + __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ cvtest::TS::ptr()->init(resourcesubdir); \ ::testing::InitGoogleTest(&argc, argv); \ cvtest::printVersionInfo(); \ - __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ TEST_DUMP_OCL_INFO \ parseCustomOptions(argc, argv); \ return RUN_ALL_TESTS(); \ diff --git a/modules/ts/include/opencv2/ts/cuda_test.hpp b/modules/ts/include/opencv2/ts/cuda_test.hpp index 049021b544..b225ab1796 100644 --- a/modules/ts/include/opencv2/ts/cuda_test.hpp +++ b/modules/ts/include/opencv2/ts/cuda_test.hpp @@ -340,6 +340,7 @@ namespace cvtest CV_EXPORTS void dumpImage(const std::string& fileName, const cv::Mat& image); CV_EXPORTS void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); + CV_EXPORTS void parseCudaDeviceOptions(int argc, char **argv); CV_EXPORTS void printCudaInfo(); } @@ -351,53 +352,7 @@ namespace cv { namespace cuda #ifdef HAVE_CUDA #define CV_CUDA_TEST_MAIN(resourcesubdir) \ - int main(int argc, char* argv[]) \ - { \ - try \ - { \ - cv::CommandLineParser cmd(argc, argv, \ - "{ h help ? | | Print help}" \ - "{ i info | | Print information about system and exit }" \ - "{ device | -1 | Device on which tests will be executed (-1 means all devices) }" \ - ); \ - if (cmd.has("help")) \ - { \ - cmd.printMessage(); \ - return 0; \ - } \ - cvtest::printCudaInfo(); \ - if (cmd.has("info")) \ - { \ - return 0; \ - } \ - int device = cmd.get("device"); \ - if (device < 0) \ - { \ - cvtest::DeviceManager::instance().loadAll(); \ - std::cout << "Run tests on all supported devices \n" << std::endl; \ - } \ - else \ - { \ - cvtest::DeviceManager::instance().load(device); \ - cv::cuda::DeviceInfo info(device); \ - std::cout << "Run tests on device " << device << " [" << info.name() << "] \n" << std::endl; \ - } \ - cvtest::TS::ptr()->init( resourcesubdir ); \ - testing::InitGoogleTest(&argc, argv); \ - return RUN_ALL_TESTS(); \ - } \ - catch (const std::exception& e) \ - { \ - std::cerr << e.what() << std::endl; \ - return -1; \ - } \ - catch (...) \ - { \ - std::cerr << "Unknown error" << std::endl; \ - return -1; \ - } \ - return 0; \ - } + CV_TEST_MAIN(resourcesubdir, cvtest::parseCudaDeviceOptions(argc, argv), cvtest::printCudaInfo()) #else // HAVE_CUDA diff --git a/modules/ts/src/cuda_test.cpp b/modules/ts/src/cuda_test.cpp index 1086fd111d..a48e0a0871 100644 --- a/modules/ts/src/cuda_test.cpp +++ b/modules/ts/src/cuda_test.cpp @@ -190,6 +190,33 @@ namespace cvtest } } + void parseCudaDeviceOptions(int argc, char **argv) + { + cv::CommandLineParser cmd(argc, argv, + "{ cuda_device | -1 | CUDA device on which tests will be executed (-1 means all devices) }" + "{ h help | false | Print help info }" + ); + + if (cmd.has("help")) + { + std::cout << "\nAvailable options besides google test option: \n"; + cmd.printMessage(); + } + + int device = cmd.get("cuda_device"); + if (device < 0) + { + cvtest::DeviceManager::instance().loadAll(); + std::cout << "Run tests on all supported CUDA devices \n" << std::endl; + } + else + { + cvtest::DeviceManager::instance().load(device); + cv::cuda::DeviceInfo info(device); + std::cout << "Run tests on CUDA device " << device << " [" << info.name() << "] \n" << std::endl; + } + } + ////////////////////////////////////////////////////////////////////// // Additional assertion @@ -278,7 +305,7 @@ namespace cvtest Mat getMat(InputArray arr) { - if (arr.kind() == _InputArray::GPU_MAT) + if (arr.kind() == _InputArray::CUDA_GPU_MAT) { Mat m; arr.getGpuMat().download(m); diff --git a/samples/gpu/stereo_multi.cpp b/samples/gpu/stereo_multi.cpp index 0997165f1f..bfb3e8a48b 100644 --- a/samples/gpu/stereo_multi.cpp +++ b/samples/gpu/stereo_multi.cpp @@ -278,7 +278,7 @@ public: StereoMultiGpuStream(); ~StereoMultiGpuStream(); - void compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity); + void compute(const HostMem& leftFrame, const HostMem& rightFrame, HostMem& disparity); private: GpuMat d_leftFrames[2]; @@ -316,7 +316,7 @@ StereoMultiGpuStream::~StereoMultiGpuStream() streams[1].release(); } -void StereoMultiGpuStream::compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity) +void StereoMultiGpuStream::compute(const HostMem& leftFrame, const HostMem& rightFrame, HostMem& disparity) { disparity.create(leftFrame.size(), CV_8UC1); @@ -403,7 +403,7 @@ int main(int argc, char** argv) cout << endl; Mat leftFrame, rightFrame; - CudaMem leftGrayFrame, rightGrayFrame; + HostMem leftGrayFrame, rightGrayFrame; StereoSingleGpu gpu0Alg(0); StereoSingleGpu gpu1Alg(1); @@ -413,7 +413,7 @@ int main(int argc, char** argv) Mat disparityGpu0; Mat disparityGpu1; Mat disparityMultiThread; - CudaMem disparityMultiStream; + HostMem disparityMultiStream; Mat disparityGpu0Show; Mat disparityGpu1Show;