diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 82558846d6..9800877a77 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -340,6 +340,201 @@ public: Allocator* allocator; }; +struct CV_EXPORTS_W GpuData +{ + explicit GpuData(size_t _size); + ~GpuData(); + + GpuData(const GpuData&) = delete; + GpuData& operator=(const GpuData&) = delete; + + GpuData(GpuData&&) = delete; + GpuData& operator=(GpuData&&) = delete; + + uchar* data; + size_t size; +}; + +class CV_EXPORTS_W GpuMatND +{ +public: + using SizeArray = std::vector; + using StepArray = std::vector; + using IndexArray = std::vector; + + //! destructor + ~GpuMatND(); + + //! default constructor + GpuMatND(); + + /** @overload + @param size Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + GpuMatND(SizeArray size, int type); + + /** @overload + @param size Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param step Array of _size.size()-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray()); + + /** @brief Allocates GPU memory. + Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that + GPU memory under the specific condition: it must be of the same size and type, not externally allocated, + the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND + (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by + this method is always continuous and is not a sub-region of another GpuMatND. + */ + void create(SizeArray size, int type); + + void release(); + + void swap(GpuMatND& m) noexcept; + + /** @brief Creates a full copy of the array and the underlying data. + The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e. + the original step is not taken into account. So, the array copy is a continuous array + occupying total()\*elemSize() bytes. + */ + GpuMatND clone() const; + + /** @overload + This overload is non-blocking, so it may return even if the copy operation is not finished. + */ + GpuMatND clone(Stream& stream) const; + + /** @brief Extracts a sub-matrix. + The operator makes a new header for the specified sub-array of \*this. + The operator is an O(1) operation, that is, no matrix data is copied. + @param ranges Array of selected ranges along each dimension. + */ + GpuMatND operator()(const std::vector& ranges) const; + + /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix. + @note The returned GpuMat is constructed with the constructor for user-allocated data. + That is, It does not perform reference counting. + @note This function does not increment this GpuMatND's reference counter. + */ + GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const; + + /** @overload + Creates a GpuMat header if this GpuMatND is effectively 2D. + @note The returned GpuMat is constructed with the constructor for user-allocated data. + That is, It does not perform reference counting. + @note This function does not increment this GpuMatND's reference counter. + */ + GpuMat createGpuMatHeader() const; + + /** @brief Extracts a 2D plane part of an n-dim matrix. + It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this + GpuMatND to the returned GpuMat. + @note This operator does not increment this GpuMatND's reference counter; + */ + GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const; + + /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D. + It differs from createGpuMatHeader() in that it clones a part of this GpuMatND. + @note This operator does not increment this GpuMatND's reference counter; + */ + operator GpuMat() const; + + GpuMatND(const GpuMatND&) = default; + GpuMatND& operator=(const GpuMatND&) = default; + + GpuMatND(GpuMatND&&) noexcept = default; + GpuMatND& operator=(GpuMatND&&) noexcept = default; + + void upload(InputArray src); + void upload(InputArray src, Stream& stream); + void download(OutputArray dst) const; + void download(OutputArray dst, Stream& stream) const; + + //! returns true iff the GpuMatND data is continuous + //! (i.e. when there are no gaps between successive rows) + bool isContinuous() const; + + //! returns true if the matrix is a sub-matrix of another matrix + bool isSubmatrix() const; + + //! returns element size in bytes + size_t elemSize() const; + + //! returns the size of element channel in bytes + size_t elemSize1() const; + + //! returns true if data is null + bool empty() const; + + //! returns true if not empty and points to external(user-allocated) gpu memory + bool external() const; + + //! returns pointer to the first byte of the GPU memory + uchar* getDevicePtr() const; + + //! returns the total number of array elements + size_t total() const; + + //! returns the size of underlying memory in bytes + size_t totalMemSize() const; + + //! returns element type + int type() const; + +private: + //! internal use + void setFields(SizeArray size, int type, StepArray step = StepArray()); + +public: + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! matrix dimensionality + int dims; + + //! shape of this array + SizeArray size; + + /*! step values + Their semantics is identical to the semantics of step for Mat. + */ + StepArray step; + +private: + /*! internal use + If this GpuMatND holds external memory, this is empty. + */ + std::shared_ptr data_; + + /*! internal use + If this GpuMatND manages memory with reference counting, this value is + always equal to data_->data. If this GpuMatND holds external memory, + data_ is empty and data points to the external memory. + */ + uchar* data; + + /*! internal use + If this GpuMatND is a sub-matrix of a larger matrix, this value is the + difference of the first byte between the sub-matrix and the whole matrix. + */ + size_t offset; +}; + /** @brief Creates a continuous matrix. @param rows Row count. diff --git a/modules/core/include/opencv2/core/cuda.inl.hpp b/modules/core/include/opencv2/core/cuda.inl.hpp index 30fc0aee22..3f2a0c7240 100644 --- a/modules/core/include/opencv2/core/cuda.inl.hpp +++ b/modules/core/include/opencv2/core/cuda.inl.hpp @@ -383,6 +383,92 @@ void swap(GpuMat& a, GpuMat& b) a.swap(b); } +//=================================================================================== +// GpuMatND +//=================================================================================== + +inline +GpuMatND::GpuMatND() : + flags(0), dims(0), data(nullptr), offset(0) +{ +} + +inline +GpuMatND::GpuMatND(SizeArray _size, int _type) : + flags(0), dims(0), data(nullptr), offset(0) +{ + create(std::move(_size), _type); +} + +inline +void GpuMatND::swap(GpuMatND& m) noexcept +{ + std::swap(*this, m); +} + +inline +bool GpuMatND::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +inline +bool GpuMatND::isSubmatrix() const +{ + return (flags & Mat::SUBMATRIX_FLAG) != 0; +} + +inline +size_t GpuMatND::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t GpuMatND::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +bool GpuMatND::empty() const +{ + return data == nullptr; +} + +inline +bool GpuMatND::external() const +{ + return !empty() && data_.use_count() == 0; +} + +inline +uchar* GpuMatND::getDevicePtr() const +{ + return data + offset; +} + +inline +size_t GpuMatND::total() const +{ + size_t p = 1; + for(auto s : size) + p *= s; + return p; +} + +inline +size_t GpuMatND::totalMemSize() const +{ + return size[0] * step[0]; +} + +inline +int GpuMatND::type() const +{ + return CV_MAT_TYPE(flags); +} + //=================================================================================== // HostMem //=================================================================================== diff --git a/modules/core/src/cuda/gpu_mat_nd.cu b/modules/core/src/cuda/gpu_mat_nd.cu new file mode 100644 index 0000000000..3f51fd8afa --- /dev/null +++ b/modules/core/src/cuda/gpu_mat_nd.cu @@ -0,0 +1,269 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/core/cuda.hpp" +#include "opencv2/cudev.hpp" + +using namespace cv; +using namespace cv::cuda; + +GpuData::GpuData(const size_t _size) + : data(nullptr), size(_size) +{ + CV_CUDEV_SAFE_CALL(cudaMalloc(&data, _size)); +} + +GpuData::~GpuData() +{ + CV_CUDEV_SAFE_CALL(cudaFree(data)); +} + +///////////////////////////////////////////////////// +/// create + +void GpuMatND::create(SizeArray _size, int _type) +{ + { + auto elements_nonzero = [](SizeArray& v) + { + return std::all_of(v.begin(), v.end(), + [](unsigned u){ return u > 0; }); + }; + CV_Assert(!_size.empty()); + CV_Assert(elements_nonzero(_size)); + } + + _type &= Mat::TYPE_MASK; + + if (size == _size && type() == _type && !empty() && !external() && isContinuous() && !isSubmatrix()) + return; + + release(); + + setFields(std::move(_size), _type); + + data_ = std::make_shared(totalMemSize()); + data = data_->data; + offset = 0; +} + +///////////////////////////////////////////////////// +/// release + +void GpuMatND::release() +{ + data = nullptr; + data_.reset(); + + flags = dims = offset = 0; + size.clear(); + step.clear(); +} + +///////////////////////////////////////////////////// +/// clone + +static bool next(uchar*& d, const uchar*& s, std::vector& idx, const int dims, const GpuMatND& dst, const GpuMatND& src) +{ + int inc = dims-3; + + while (true) + { + if (idx[inc] == src.size[inc] - 1) + { + if (inc == 0) + { + return false; + } + + idx[inc] = 0; + d -= (dst.size[inc] - 1) * dst.step[inc]; + s -= (src.size[inc] - 1) * src.step[inc]; + inc--; + } + else + { + idx[inc]++; + d += dst.step[inc]; + s += src.step[inc]; + break; + } + } + + return true; +} + +GpuMatND GpuMatND::clone() const +{ + CV_DbgAssert(!empty()); + + GpuMatND ret(size, type()); + + if (isContinuous()) + { + CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.getDevicePtr(), getDevicePtr(), ret.totalMemSize(), cudaMemcpyDeviceToDevice)); + } + else + { + // 1D arrays are always continuous + + if (dims == 2) + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2D(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0], + size[1]*step[1], size[0], cudaMemcpyDeviceToDevice) + ); + } + else + { + std::vector idx(dims-2, 0); + + uchar* d = ret.getDevicePtr(); + const uchar* s = getDevicePtr(); + + // iterate each 2D plane + do + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2DAsync( + d, ret.step[dims-2], s, step[dims-2], + size[dims-1]*step[dims-1], size[dims-2], cudaMemcpyDeviceToDevice) + ); + } + while (next(d, s, idx, dims, ret, *this)); + + CV_CUDEV_SAFE_CALL(cudaStreamSynchronize(0)); + } + } + + return ret; +} + +GpuMatND GpuMatND::clone(Stream& stream) const +{ + CV_DbgAssert(!empty()); + + GpuMatND ret(size, type()); + + cudaStream_t _stream = StreamAccessor::getStream(stream); + + if (isContinuous()) + { + CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.getDevicePtr(), getDevicePtr(), ret.totalMemSize(), cudaMemcpyDeviceToDevice, _stream)); + } + else + { + // 1D arrays are always continuous + + if (dims == 2) + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2DAsync(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0], + size[1]*step[1], size[0], cudaMemcpyDeviceToDevice, _stream) + ); + } + else + { + std::vector idx(dims-2, 0); + + uchar* d = ret.getDevicePtr(); + const uchar* s = getDevicePtr(); + + // iterate each 2D plane + do + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2DAsync( + d, ret.step[dims-2], s, step[dims-2], + size[dims-1]*step[dims-1], size[dims-2], cudaMemcpyDeviceToDevice, _stream) + ); + } + while (next(d, s, idx, dims, ret, *this)); + } + } + + return ret; +} + +///////////////////////////////////////////////////// +/// upload + +void GpuMatND::upload(InputArray src) +{ + Mat mat = src.getMat(); + + CV_DbgAssert(!mat.empty()); + + if (!mat.isContinuous()) + mat = mat.clone(); + + SizeArray _size(mat.dims); + std::copy_n(mat.size.p, mat.dims, _size.data()); + + create(std::move(_size), mat.type()); + + CV_CUDEV_SAFE_CALL(cudaMemcpy(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice)); +} + +void GpuMatND::upload(InputArray src, Stream& stream) +{ + Mat mat = src.getMat(); + + CV_DbgAssert(!mat.empty()); + + if (!mat.isContinuous()) + mat = mat.clone(); + + SizeArray _size(mat.dims); + std::copy_n(mat.size.p, mat.dims, _size.data()); + + create(std::move(_size), mat.type()); + + cudaStream_t _stream = StreamAccessor::getStream(stream); + CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream)); +} + +///////////////////////////////////////////////////// +/// download + +void GpuMatND::download(OutputArray dst) const +{ + CV_DbgAssert(!empty()); + + dst.create(dims, size.data(), type()); + Mat mat = dst.getMat(); + + GpuMatND gmat = *this; + + if (!gmat.isContinuous()) + gmat = gmat.clone(); + + CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.getDevicePtr(), mat.total() * mat.elemSize(), cudaMemcpyDeviceToHost)); +} + +void GpuMatND::download(OutputArray dst, Stream& stream) const +{ + CV_DbgAssert(!empty()); + + dst.create(dims, size.data(), type()); + Mat mat = dst.getMat(); + + GpuMatND gmat = *this; + + if (!gmat.isContinuous()) + gmat = gmat.clone(stream); + + cudaStream_t _stream = StreamAccessor::getStream(stream); + CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.getDevicePtr(), mat.total() * mat.elemSize(), cudaMemcpyDeviceToHost, _stream)); +} + +#endif diff --git a/modules/core/src/cuda_gpu_mat_nd.cpp b/modules/core/src/cuda_gpu_mat_nd.cpp new file mode 100644 index 0000000000..8440f179ea --- /dev/null +++ b/modules/core/src/cuda_gpu_mat_nd.cpp @@ -0,0 +1,180 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::cuda; + +GpuMatND::~GpuMatND() = default; + +GpuMatND::GpuMatND(SizeArray _size, int _type, void* _data, StepArray _step) : + flags(0), dims(0), data(static_cast(_data)), offset(0) +{ + CV_Assert(_step.empty() || _size.size() == _step.size() + 1); + + setFields(std::move(_size), _type, std::move(_step)); +} + +GpuMatND GpuMatND::operator()(const std::vector& ranges) const +{ + CV_Assert(dims == (int)ranges.size()); + + for (int i = 0; i < dims; ++i) + { + Range r = ranges[i]; + CV_Assert(r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= size[i])); + } + + GpuMatND ret = *this; + + for (int i = 0; i < dims; ++i) + { + Range r = ranges[i]; + if (r != Range::all() && r != Range(0, ret.size[i])) + { + ret.offset += r.start * ret.step[i]; + ret.size[i] = r.size(); + ret.flags |= Mat::SUBMATRIX_FLAG; + } + } + + ret.flags = cv::updateContinuityFlag(ret.flags, dims, ret.size.data(), ret.step.data()); + + return ret; +} + +GpuMat GpuMatND::createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const +{ + CV_Assert((int)idx.size() == dims - 2); + + std::vector ranges; + for (int i : idx) + ranges.emplace_back(i, i+1); + ranges.push_back(rowRange); + ranges.push_back(colRange); + + return (*this)(ranges).createGpuMatHeader(); +} + +GpuMat GpuMatND::createGpuMatHeader() const +{ + auto Effectively2D = [](GpuMatND m) + { + for (int i = 0; i < m.dims - 2; ++i) + if (m.size[i] > 1) + return false; + return true; + }; + CV_Assert(Effectively2D(*this)); + + return GpuMat(size[dims-2], size[dims-1], type(), getDevicePtr(), step[dims-2]); +} + +GpuMat GpuMatND::operator()(IndexArray idx, Range rowRange, Range colRange) const +{ + return createGpuMatHeader(idx, rowRange, colRange).clone(); +} + +GpuMatND::operator GpuMat() const +{ + return createGpuMatHeader().clone(); +} + +void GpuMatND::setFields(SizeArray _size, int _type, StepArray _step) +{ + _type &= Mat::TYPE_MASK; + + flags = Mat::MAGIC_VAL + _type; + dims = static_cast(_size.size()); + size = std::move(_size); + + if (_step.empty()) + { + step = StepArray(dims); + + step.back() = elemSize(); + for (int _i = dims - 2; _i >= 0; --_i) + { + const size_t i = _i; + step[i] = step[i+1] * size[i+1]; + } + + flags |= Mat::CONTINUOUS_FLAG; + } + else + { + step = std::move(_step); + step.push_back(elemSize()); + + flags = cv::updateContinuityFlag(flags, dims, size.data(), step.data()); + } + + CV_Assert(size.size() == step.size()); + CV_Assert(step.back() == elemSize()); +} + +#ifndef HAVE_CUDA + +GpuData::GpuData(const size_t _size) + : data(nullptr), size(0) +{ + CV_UNUSED(_size); + throw_no_cuda(); +} + +GpuData::~GpuData() +{ +} + +void GpuMatND::create(SizeArray _size, int _type) +{ + CV_UNUSED(_size); + CV_UNUSED(_type); + throw_no_cuda(); +} + +void GpuMatND::release() +{ + throw_no_cuda(); +} + +GpuMatND GpuMatND::clone() const +{ + throw_no_cuda(); +} + +GpuMatND GpuMatND::clone(Stream& stream) const +{ + CV_UNUSED(stream); + throw_no_cuda(); +} + +void GpuMatND::upload(InputArray src) +{ + CV_UNUSED(src); + throw_no_cuda(); +} + +void GpuMatND::upload(InputArray src, Stream& stream) +{ + CV_UNUSED(src); + CV_UNUSED(stream); + throw_no_cuda(); +} + +void GpuMatND::download(OutputArray dst) const +{ + CV_UNUSED(dst); + throw_no_cuda(); +} + +void GpuMatND::download(OutputArray dst, Stream& stream) const +{ + CV_UNUSED(dst); + CV_UNUSED(stream); + throw_no_cuda(); +} + +#endif