From b2cdb7fa3917c5861d1287e9236a3c0d9376d7f6 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 20 Sep 2010 13:20:25 +0000 Subject: [PATCH] implemented cv::gpu::merge and cv::gpu::split functions --- modules/gpu/include/opencv2/gpu/gpu.hpp | 24 ++ modules/gpu/src/cuda/split_merge.cu | 452 ++++++++++++++++++++++++ modules/gpu/src/split_merge.cpp | 151 ++++++++ tests/gpu/src/split_merge.cpp | 275 ++++++++++++++ 4 files changed, 902 insertions(+) create mode 100644 modules/gpu/src/cuda/split_merge.cu create mode 100644 modules/gpu/src/split_merge.cpp create mode 100644 tests/gpu/src/split_merge.cpp diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 51ce18c0ea..0a5d8eeb3c 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -408,6 +408,30 @@ namespace cv //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR); + //! makes multi-channel array out of several single-channel arrays + CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst); + + //! makes multi-channel array out of several single-channel arrays + CV_EXPORTS void merge(const vector& src, GpuMat& dst); + + //! makes multi-channel array out of several single-channel arrays (async version) + CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, const Stream& stream); + + //! makes multi-channel array out of several single-channel arrays (async version) + CV_EXPORTS void merge(const vector& src, GpuMat& dst, const Stream& stream); + + //! copies each plane of a multi-channel array to a dedicated array + CV_EXPORTS void split(const GpuMat& src, GpuMat* dst); + + //! copies each plane of a multi-channel array to a dedicated array + CV_EXPORTS void split(const GpuMat& src, vector& dst); + + //! copies each plane of a multi-channel array to a dedicated array (async version) + CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, const Stream& stream); + + //! copies each plane of a multi-channel array to a dedicated array (async version) + CV_EXPORTS void split(const GpuMat& src, vector& dst, const Stream& stream); + ////////////////////////////// Image processing ////////////////////////////// // DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation. diff --git a/modules/gpu/src/cuda/split_merge.cu b/modules/gpu/src/cuda/split_merge.cu new file mode 100644 index 0000000000..b377372c59 --- /dev/null +++ b/modules/gpu/src/cuda/split_merge.cu @@ -0,0 +1,452 @@ +#include "opencv2/gpu/devmem2d.hpp" +#include "cuda_shared.hpp" + +namespace cv { namespace gpu { namespace split_merge { + + template + struct TypeTraits + { + typedef T type; + typedef T type2; + typedef T type3; + typedef T type4; + }; + + template + struct TypeTraits + { + typedef char type; + typedef char2 type2; + typedef char3 type3; + typedef char4 type4; + }; + + template + struct TypeTraits + { + typedef short type; + typedef short2 type2; + typedef short3 type3; + typedef short4 type4; + }; + + template + struct TypeTraits + { + typedef int type; + typedef int2 type2; + typedef int3 type3; + typedef int4 type4; + }; + + template + struct TypeTraits + { + typedef double type; + typedef double2 type2; + //typedef double3 type3; + //typedef double4 type3; + }; + + typedef void (*MergeFunction)(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream); + typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream); + + //------------------------------------------------------------ + // Merge + + template + static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); + mergeC2_<<>>( + src[0].ptr, src[0].step, + src[1].ptr, src[1].step, + dst.rows, dst.cols, dst.ptr, dst.step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); + mergeC3_<<>>( + src[0].ptr, src[0].step, + src[1].ptr, src[1].step, + src[2].ptr, src[2].step, + dst.rows, dst.cols, dst.ptr, dst.step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); + mergeC4_<<>>( + src[0].ptr, src[0].step, + src[1].ptr, src[1].step, + src[2].ptr, src[2].step, + src[3].ptr, src[3].step, + dst.rows, dst.cols, dst.ptr, dst.step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst, + int total_channels, int elem_size, + const cudaStream_t& stream) + { + static MergeFunction merge_func_tbl[] = + { + mergeC2_, mergeC2_, mergeC2_, 0, mergeC2_, + mergeC3_, mergeC3_, mergeC3_, 0, mergeC3_, + mergeC4_, mergeC4_, mergeC4_, 0, mergeC4_, + }; + + int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1); + MergeFunction merge_func = merge_func_tbl[merge_func_id]; + + if (merge_func == 0) + cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__); + + merge_func(src, dst, stream); + } + + + template + __global__ void mergeC2_(const uchar* src0, size_t src0_step, + const uchar* src1, size_t src1_step, + int rows, int cols, uchar* dst, size_t dst_step) + { + typedef typename TypeTraits::type2 dst_type; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const T* src0_y = (const T*)(src0 + y * src0_step); + const T* src1_y = (const T*)(src1 + y * src1_step); + dst_type* dst_y = (dst_type*)(dst + y * dst_step); + + if (x < cols && y < rows) + { + dst_type dst_elem; + dst_elem.x = src0_y[x]; + dst_elem.y = src1_y[x]; + dst_y[x] = dst_elem; + } + } + + + template + __global__ void mergeC3_(const uchar* src0, size_t src0_step, + const uchar* src1, size_t src1_step, + const uchar* src2, size_t src2_step, + int rows, int cols, uchar* dst, size_t dst_step) + { + typedef typename TypeTraits::type3 dst_type; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const T* src0_y = (const T*)(src0 + y * src0_step); + const T* src1_y = (const T*)(src1 + y * src1_step); + const T* src2_y = (const T*)(src2 + y * src2_step); + dst_type* dst_y = (dst_type*)(dst + y * dst_step); + + if (x < cols && y < rows) + { + dst_type dst_elem; + dst_elem.x = src0_y[x]; + dst_elem.y = src1_y[x]; + dst_elem.z = src2_y[x]; + dst_y[x] = dst_elem; + } + } + + + template <> + __global__ void mergeC3_(const uchar* src0, size_t src0_step, + const uchar* src1, size_t src1_step, + const uchar* src2, size_t src2_step, + int rows, int cols, uchar* dst, size_t dst_step) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const double* src0_y = (const double*)(src0 + y * src0_step); + const double* src1_y = (const double*)(src1 + y * src1_step); + const double* src2_y = (const double*)(src2 + y * src2_step); + double* dst_y = (double*)(dst + y * dst_step); + + if (x < cols && y < rows) + { + dst_y[3 * x] = src0_y[x]; + dst_y[3 * x + 1] = src1_y[x]; + dst_y[3 * x + 2] = src2_y[x]; + } + } + + + template + __global__ void mergeC4_(const uchar* src0, size_t src0_step, + const uchar* src1, size_t src1_step, + const uchar* src2, size_t src2_step, + const uchar* src3, size_t src3_step, + int rows, int cols, uchar* dst, size_t dst_step) + { + typedef typename TypeTraits::type4 dst_type; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const T* src0_y = (const T*)(src0 + y * src0_step); + const T* src1_y = (const T*)(src1 + y * src1_step); + const T* src2_y = (const T*)(src2 + y * src2_step); + const T* src3_y = (const T*)(src3 + y * src3_step); + dst_type* dst_y = (dst_type*)(dst + y * dst_step); + + if (x < cols && y < rows) + { + dst_type dst_elem; + dst_elem.x = src0_y[x]; + dst_elem.y = src1_y[x]; + dst_elem.z = src2_y[x]; + dst_elem.w = src3_y[x]; + dst_y[x] = dst_elem; + } + } + + + template <> + __global__ void mergeC4_(const uchar* src0, size_t src0_step, + const uchar* src1, size_t src1_step, + const uchar* src2, size_t src2_step, + const uchar* src3, size_t src3_step, + int rows, int cols, uchar* dst, size_t dst_step) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const double* src0_y = (const double*)(src0 + y * src0_step); + const double* src1_y = (const double*)(src1 + y * src1_step); + const double* src2_y = (const double*)(src2 + y * src2_step); + const double* src3_y = (const double*)(src3 + y * src3_step); + double2* dst_y = (double2*)(dst + y * dst_step); + + if (x < cols && y < rows) + { + dst_y[2 * x] = make_double2(src0_y[x], src1_y[x]); + dst_y[2 * x + 1] = make_double2(src2_y[x], src3_y[x]); + } + } + + //------------------------------------------------------------ + // Split + + + template + static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); + splitC2_<<>>( + src.ptr, src.step, src.rows, src.cols, + dst[0].ptr, dst[0].step, + dst[1].ptr, dst[1].step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); + splitC3_<<>>( + src.ptr, src.step, src.rows, src.cols, + dst[0].ptr, dst[0].step, + dst[1].ptr, dst[1].step, + dst[2].ptr, dst[2].step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); + splitC4_<<>>( + src.ptr, src.step, src.rows, src.cols, + dst[0].ptr, dst[0].step, + dst[1].ptr, dst[1].step, + dst[2].ptr, dst[2].step, + dst[3].ptr, dst[3].step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, + int num_channels, int elem_size1, + const cudaStream_t& stream) + { + static SplitFunction split_func_tbl[] = + { + splitC2_, splitC2_, splitC2_, 0, splitC2_, + splitC3_, splitC3_, splitC3_, 0, splitC3_, + splitC4_, splitC4_, splitC4_, 0, splitC4_, + }; + + int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1); + SplitFunction split_func = split_func_tbl[split_func_id]; + + if (split_func == 0) + cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__); + + split_func(src, dst, stream); + } + + + template + __global__ void splitC2_(const uchar* src, size_t src_step, + int rows, int cols, + uchar* dst0, size_t dst0_step, + uchar* dst1, size_t dst1_step) + { + typedef typename TypeTraits::type2 src_type; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const src_type* src_y = (const src_type*)(src + y * src_step); + T* dst0_y = (T*)(dst0 + y * dst0_step); + T* dst1_y = (T*)(dst1 + y * dst1_step); + + if (x < cols && y < rows) + { + src_type src_elem = src_y[x]; + dst0_y[x] = src_elem.x; + dst1_y[x] = src_elem.y; + } + } + + + template + __global__ void splitC3_(const uchar* src, size_t src_step, + int rows, int cols, + uchar* dst0, size_t dst0_step, + uchar* dst1, size_t dst1_step, + uchar* dst2, size_t dst2_step) + { + typedef typename TypeTraits::type3 src_type; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const src_type* src_y = (const src_type*)(src + y * src_step); + T* dst0_y = (T*)(dst0 + y * dst0_step); + T* dst1_y = (T*)(dst1 + y * dst1_step); + T* dst2_y = (T*)(dst2 + y * dst2_step); + + if (x < cols && y < rows) + { + src_type src_elem = src_y[x]; + dst0_y[x] = src_elem.x; + dst1_y[x] = src_elem.y; + dst2_y[x] = src_elem.z; + } + } + + + template <> + __global__ void splitC3_( + const uchar* src, size_t src_step, int rows, int cols, + uchar* dst0, size_t dst0_step, + uchar* dst1, size_t dst1_step, + uchar* dst2, size_t dst2_step) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const double* src_y = (const double*)(src + y * src_step); + double* dst0_y = (double*)(dst0 + y * dst0_step); + double* dst1_y = (double*)(dst1 + y * dst1_step); + double* dst2_y = (double*)(dst2 + y * dst2_step); + + if (x < cols && y < rows) + { + dst0_y[x] = src_y[3 * x]; + dst1_y[x] = src_y[3 * x + 1]; + dst2_y[x] = src_y[3 * x + 2]; + } + } + + + template + __global__ void splitC4_(const uchar* src, size_t src_step, int rows, int cols, + uchar* dst0, size_t dst0_step, + uchar* dst1, size_t dst1_step, + uchar* dst2, size_t dst2_step, + uchar* dst3, size_t dst3_step) + { + typedef typename TypeTraits::type4 src_type; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const src_type* src_y = (const src_type*)(src + y * src_step); + T* dst0_y = (T*)(dst0 + y * dst0_step); + T* dst1_y = (T*)(dst1 + y * dst1_step); + T* dst2_y = (T*)(dst2 + y * dst2_step); + T* dst3_y = (T*)(dst3 + y * dst3_step); + + if (x < cols && y < rows) + { + src_type src_elem = src_y[x]; + dst0_y[x] = src_elem.x; + dst1_y[x] = src_elem.y; + dst2_y[x] = src_elem.z; + dst3_y[x] = src_elem.w; + } + } + + + template <> + __global__ void splitC4_( + const uchar* src, size_t src_step, int rows, int cols, + uchar* dst0, size_t dst0_step, + uchar* dst1, size_t dst1_step, + uchar* dst2, size_t dst2_step, + uchar* dst3, size_t dst3_step) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const double2* src_y = (const double2*)(src + y * src_step); + double* dst0_y = (double*)(dst0 + y * dst0_step); + double* dst1_y = (double*)(dst1 + y * dst1_step); + double* dst2_y = (double*)(dst2 + y * dst2_step); + double* dst3_y = (double*)(dst3 + y * dst3_step); + + if (x < cols && y < rows) + { + double2 src_elem1 = src_y[2 * x]; + double2 src_elem2 = src_y[2 * x + 1]; + dst0_y[x] = src_elem1.x; + dst1_y[x] = src_elem1.y; + dst2_y[x] = src_elem2.x; + dst3_y[x] = src_elem2.y; + } + } + +}}} // namespace cv::gpu::split_merge \ No newline at end of file diff --git a/modules/gpu/src/split_merge.cpp b/modules/gpu/src/split_merge.cpp new file mode 100644 index 0000000000..8bdc2fb988 --- /dev/null +++ b/modules/gpu/src/split_merge.cpp @@ -0,0 +1,151 @@ +#include "precomp.hpp" +#include + +using namespace std; + +#if !defined (HAVE_CUDA) + +void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/) { throw_nogpu(); } +void cv::gpu::merge(const vector& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } +void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/, const Stream& /*stream*/) { throw_nogpu(); } +void cv::gpu::merge(const vector& /*src*/, GpuMat& /*dst*/, const Stream& /*stream*/) { throw_nogpu(); } +void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/) { throw_nogpu(); } +void cv::gpu::split(const GpuMat& /*src*/, vector& /*dst*/) { throw_nogpu(); } +void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/, const Stream& /*stream*/) { throw_nogpu(); } +void cv::gpu::split(const GpuMat& /*src*/, vector& /*dst*/, const Stream& /*stream*/) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace split_merge +{ + extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst, + int total_channels, int elem_size, + const cudaStream_t& stream); + + extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, + int num_channels, int elem_size1, + const cudaStream_t& stream); + + void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream) + { + CV_Assert(src); + CV_Assert(n > 0); + + int depth = src[0].depth(); + Size size = src[0].size(); + + bool single_channel_only = true; + int total_channels = 0; + + for (size_t i = 0; i < n; ++i) + { + CV_Assert(src[i].size() == size); + CV_Assert(src[i].depth() == depth); + single_channel_only = single_channel_only && src[i].channels() == 1; + total_channels += src[i].channels(); + } + + CV_Assert(single_channel_only); + CV_Assert(total_channels <= 4); + + if (total_channels == 1) + src[0].copyTo(dst); + else + { + dst.create(size, CV_MAKETYPE(depth, total_channels)); + + DevMem2D src_as_devmem[4]; + for(size_t i = 0; i < n; ++i) + src_as_devmem[i] = src[i]; + + split_merge::merge_caller(src_as_devmem, (DevMem2D)dst, + total_channels, CV_ELEM_SIZE(depth), + stream); + } + } + + + void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream) + { + CV_Assert(dst); + + int depth = src.depth(); + int num_channels = src.channels(); + Size size = src.size(); + + if (num_channels == 1) + { + src.copyTo(dst[0]); + return; + } + + for (int i = 0; i < num_channels; ++i) + dst[i].create(src.size(), depth); + + CV_Assert(num_channels <= 4); + + DevMem2D dst_as_devmem[4]; + for (int i = 0; i < num_channels; ++i) + dst_as_devmem[i] = dst[i]; + + split_merge::split_caller((DevMem2D)src, dst_as_devmem, + num_channels, src.elemSize1(), + stream); + } + + +}}} + + +void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst) +{ + split_merge::merge(src, n, dst, 0); +} + + +void cv::gpu::merge(const vector& src, GpuMat& dst) +{ + split_merge::merge(&src[0], src.size(), dst, 0); +} + + +void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, const Stream& stream) +{ + split_merge::merge(src, n, dst, StreamAccessor::getStream(stream)); +} + + +void cv::gpu::merge(const vector& src, GpuMat& dst, const Stream& stream) +{ + split_merge::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream)); +} + + +void cv::gpu::split(const GpuMat& src, GpuMat* dst) +{ + split_merge::split(src, dst, 0); +} + + +void cv::gpu::split(const GpuMat& src, vector& dst) +{ + dst.resize(src.channels()); + if(src.channels() > 0) + split_merge::split(src, &dst[0], 0); +} + + +void cv::gpu::split(const GpuMat& src, GpuMat* dst, const Stream& stream) +{ + split_merge::split(src, dst, StreamAccessor::getStream(stream)); +} + + +void cv::gpu::split(const GpuMat& src, vector& dst, const Stream& stream) +{ + dst.resize(src.channels()); + if(src.channels() > 0) + split_merge::split(src, &dst[0], StreamAccessor::getStream(stream)); +} + +#endif /* !defined (HAVE_CUDA) */ \ No newline at end of file diff --git a/tests/gpu/src/split_merge.cpp b/tests/gpu/src/split_merge.cpp new file mode 100644 index 0000000000..fcbec178a0 --- /dev/null +++ b/tests/gpu/src/split_merge.cpp @@ -0,0 +1,275 @@ +#include "gputest.hpp" +#include +#include + +#include +#include +#include + +using namespace std; +using namespace cv; + +struct CV_MergeTest : public CvTest +{ + CV_MergeTest() : CvTest("GPU-Merge", "merge") {} + void can_merge(size_t rows, size_t cols); + void can_merge_submatrixes(size_t rows, size_t cols); + void run(int); +} merge_test; + + +void CV_MergeTest::can_merge(size_t rows, size_t cols) +{ + for (size_t num_channels = 1; num_channels <= 4; ++num_channels) + for (size_t depth = CV_8U; depth <= CV_64F; ++depth) + { + vector src; + for (size_t i = 0; i < num_channels; ++i) + src.push_back(Mat(rows, cols, depth, Scalar::all(static_cast(i)))); + + Mat dst(rows, cols, CV_MAKETYPE(depth, num_channels)); + + cv::merge(src, dst); + + vector dev_src; + for (size_t i = 0; i < num_channels; ++i) + dev_src.push_back(gpu::GpuMat(src[i])); + + gpu::GpuMat dev_dst(rows, cols, CV_MAKETYPE(depth, num_channels)); + cv::gpu::merge(dev_src, dev_dst); + + Mat host_dst = dev_dst; + + double err = norm(dst, host_dst, NORM_INF); + + if (err > 1e-3) + { + //ts->printf(CvTS::CONSOLE, "\nNorm: %f\n", err); + //ts->printf(CvTS::CONSOLE, "Depth: %d\n", depth); + //ts->printf(CvTS::CONSOLE, "Rows: %d\n", rows); + //ts->printf(CvTS::CONSOLE, "Cols: %d\n", cols); + //ts->printf(CvTS::CONSOLE, "NumChannels: %d\n", num_channels); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + } +} + + +void CV_MergeTest::can_merge_submatrixes(size_t rows, size_t cols) +{ + for (size_t num_channels = 1; num_channels <= 4; ++num_channels) + for (size_t depth = CV_8U; depth <= CV_64F; ++depth) + { + vector src; + for (size_t i = 0; i < num_channels; ++i) + { + Mat m(rows * 2, cols * 2, depth, Scalar::all(static_cast(i))); + src.push_back(m(Range(rows / 2, rows / 2 + rows), Range(cols / 2, cols / 2 + cols))); + } + + Mat dst(rows, cols, CV_MAKETYPE(depth, num_channels)); + + cv::merge(src, dst); + + vector dev_src; + for (size_t i = 0; i < num_channels; ++i) + dev_src.push_back(gpu::GpuMat(src[i])); + + gpu::GpuMat dev_dst(rows, cols, CV_MAKETYPE(depth, num_channels)); + cv::gpu::merge(dev_src, dev_dst); + + Mat host_dst = dev_dst; + + double err = norm(dst, host_dst, NORM_INF); + + if (err > 1e-3) + { + //ts->printf(CvTS::CONSOLE, "\nNorm: %f\n", err); + //ts->printf(CvTS::CONSOLE, "Depth: %d\n", depth); + //ts->printf(CvTS::CONSOLE, "Rows: %d\n", rows); + //ts->printf(CvTS::CONSOLE, "Cols: %d\n", cols); + //ts->printf(CvTS::CONSOLE, "NumChannels: %d\n", num_channels); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + } +} + + +void CV_MergeTest::run(int) +{ + try + { + can_merge(1, 1); + can_merge(1, 7); + can_merge(53, 7); + can_merge_submatrixes(1, 1); + can_merge_submatrixes(1, 7); + can_merge_submatrixes(53, 7); + } + catch(const cv::Exception& e) + { + if (!check_and_treat_gpu_exception(e, ts)) + throw; + } +} + + +struct CV_SplitTest : public CvTest +{ + CV_SplitTest() : CvTest("GPU-Split", "split") {} + void can_split(size_t rows, size_t cols); + void can_split_submatrix(size_t rows, size_t cols); + void run(int); +} split_test; + + +void CV_SplitTest::can_split(size_t rows, size_t cols) +{ + for (size_t num_channels = 1; num_channels <= 4; ++num_channels) + for (size_t depth = CV_8U; depth <= CV_64F; ++depth) + { + Mat src(rows, cols, CV_MAKETYPE(depth, num_channels), Scalar(1.0, 2.0, 3.0, 4.0)); + vector dst; + cv::split(src, dst); + + gpu::GpuMat dev_src(src); + vector dev_dst; + cv::gpu::split(dev_src, dev_dst); + + if (dev_dst.size() != dst.size()) + { + ts->printf(CvTS::CONSOLE, "Bad output sizes"); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + } + + for (size_t i = 0; i < num_channels; ++i) + { + Mat host_dst = dev_dst[i]; + double err = norm(dst[i], host_dst, NORM_INF); + + if (err > 1e-3) + { + //ts->printf(CvTS::CONSOLE, "\nNorm: %f\n", err); + //ts->printf(CvTS::CONSOLE, "Depth: %d\n", depth); + //ts->printf(CvTS::CONSOLE, "Rows: %d\n", rows); + //ts->printf(CvTS::CONSOLE, "Cols: %d\n", cols); + //ts->printf(CvTS::CONSOLE, "NumChannels: %d\n", num_channels); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + } + } +} + + + +void CV_SplitTest::can_split_submatrix(size_t rows, size_t cols) +{ + for (size_t num_channels = 1; num_channels <= 4; ++num_channels) + for (size_t depth = CV_8U; depth <= CV_64F; ++depth) + { + Mat src_data(rows * 2, cols * 2, CV_MAKETYPE(depth, num_channels), Scalar(1.0, 2.0, 3.0, 4.0)); + Mat src(src_data(Range(rows / 2, rows / 2 + rows), Range(cols / 2, cols / 2 + cols))); + vector dst; + cv::split(src, dst); + + gpu::GpuMat dev_src(src); + vector dev_dst; + cv::gpu::split(dev_src, dev_dst); + + if (dev_dst.size() != dst.size()) + { + ts->printf(CvTS::CONSOLE, "Bad output sizes"); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + } + + for (size_t i = 0; i < num_channels; ++i) + { + Mat host_dst = dev_dst[i]; + double err = norm(dst[i], host_dst, NORM_INF); + + if (err > 1e-3) + { + //ts->printf(CvTS::CONSOLE, "\nNorm: %f\n", err); + //ts->printf(CvTS::CONSOLE, "Depth: %d\n", depth); + //ts->printf(CvTS::CONSOLE, "Rows: %d\n", rows); + //ts->printf(CvTS::CONSOLE, "Cols: %d\n", cols); + //ts->printf(CvTS::CONSOLE, "NumChannels: %d\n", num_channels); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + } + } +} + + +void CV_SplitTest::run(int) +{ + try + { + can_split(1, 1); + can_split(1, 7); + can_split(7, 53); + can_split_submatrix(1, 1); + can_split_submatrix(1, 7); + can_split_submatrix(7, 53); + } + catch(const cv::Exception& e) + { + if (!check_and_treat_gpu_exception(e, ts)) + throw; + } +} + + +struct CV_SplitMergeTest : public CvTest +{ + CV_SplitMergeTest() : CvTest("GPU-SplitMerge", "split merge") {} + void can_split_merge(size_t rows, size_t cols); + void run(int); +} split_merge_test; + + +void CV_SplitMergeTest::can_split_merge(size_t rows, size_t cols) { + for (size_t num_channels = 1; num_channels <= 4; ++num_channels) + for (size_t depth = CV_8U; depth <= CV_64F; ++depth) + { + Mat orig(rows, cols, CV_MAKETYPE(depth, num_channels), Scalar(1.0, 2.0, 3.0, 4.0)); + gpu::GpuMat dev_orig(orig); + vector dev_vec; + cv::gpu::split(dev_orig, dev_vec); + + gpu::GpuMat dev_final(rows, cols, CV_MAKETYPE(depth, num_channels)); + cv::gpu::merge(dev_vec, dev_final); + + double err = cv::norm((Mat)dev_orig, (Mat)dev_final, NORM_INF); + if (err > 1e-3) + { + //ts->printf(CvTS::CONSOLE, "\nNorm: %f\n", err); + //ts->printf(CvTS::CONSOLE, "Depth: %d\n", depth); + //ts->printf(CvTS::CONSOLE, "Rows: %d\n", rows); + //ts->printf(CvTS::CONSOLE, "Cols: %d\n", cols); + //ts->printf(CvTS::CONSOLE, "NumChannels: %d\n", num_channels); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + } +} + + +void CV_SplitMergeTest::run(int) +{ + try + { + can_split_merge(1, 1); + can_split_merge(1, 7); + can_split_merge(7, 53); + } + catch(const cv::Exception& e) + { + if (!check_and_treat_gpu_exception(e, ts)) + throw; + } +}