diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 8996372b59..2bf60eff32 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -170,17 +170,17 @@ CV_EXPORTS void merge(const std::vector& src, OutputArray dst, Stream& s CV_EXPORTS void split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null()); CV_EXPORTS void split(InputArray src, std::vector& dst, Stream& stream = Stream::Null()); -//! implements generalized matrix product algorithm GEMM from BLAS -CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, - const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); - //! transposes the matrix //! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc) -CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null()); +CV_EXPORTS void transpose(InputArray src1, OutputArray dst, Stream& stream = Stream::Null()); //! reverses the order of the rows, columns or both in a matrix //! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth -CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null()); +CV_EXPORTS void flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null()); + +//! implements generalized matrix product algorithm GEMM from BLAS +CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, + const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) //! destination array will have the depth type as lut and the same channels number as source diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp index 92f085ebc3..c8ef966e51 100644 --- a/modules/gpuarithm/src/core.cpp +++ b/modules/gpuarithm/src/core.cpp @@ -53,9 +53,9 @@ void cv::gpu::merge(const std::vector&, OutputArray, Stream&) { throw_no void cv::gpu::split(InputArray, GpuMat*, Stream&) { throw_no_cuda(); } void cv::gpu::split(InputArray, std::vector&, Stream&) { throw_no_cuda(); } -void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); } @@ -182,13 +182,16 @@ namespace arithm template void transpose(PtrStepSz src, PtrStepSz dst, cudaStream_t stream); } -void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) +void cv::gpu::transpose(InputArray _src, OutputArray _dst, Stream& _stream) { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8 ); - dst.create( src.cols, src.rows, src.type() ); + _dst.create( src.cols, src.rows, src.type() ); + GpuMat dst = _dst.getGpuMat(); - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); if (src.elemSize() == 1) { @@ -260,7 +263,7 @@ namespace }; } -void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) +void cv::gpu::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stream) { typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream); static const func_t funcs[6][4] = @@ -273,10 +276,13 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) {NppMirror::call, 0, NppMirror::call, NppMirror::call} }; + GpuMat src = _src.getGpuMat(); + CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F); CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); } diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 8b4623f41b..a5343df4e9 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -130,12 +130,12 @@ void Worker::operator()(int device_id) const rng.fill(src, RNG::UNIFORM, 0, 1); // CPU works - transpose(src, dst); + cv::transpose(src, dst); // GPU works GpuMat d_src(src); GpuMat d_dst; - transpose(d_src, d_dst); + gpu::transpose(d_src, d_dst); // Check results bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3; diff --git a/samples/gpu/multi.cpp b/samples/gpu/multi.cpp index 34b111829c..0e9bef6368 100644 --- a/samples/gpu/multi.cpp +++ b/samples/gpu/multi.cpp @@ -87,12 +87,12 @@ void Worker::operator()(int device_id) const rng.fill(src, RNG::UNIFORM, 0, 1); // CPU works - transpose(src, dst); + cv::transpose(src, dst); // GPU works GpuMat d_src(src); GpuMat d_dst; - transpose(d_src, d_dst); + gpu::transpose(d_src, d_dst); // Check results bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;