From cd3f7fbf053ad3d4d18b925c7d90f0d5e0db3749 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sun, 8 Dec 2013 01:32:17 +0400 Subject: [PATCH 1/2] added cv::merge to T-API --- modules/core/src/convert.cpp | 54 ++++++ modules/core/src/matrix.cpp | 14 ++ modules/core/src/ocl.cpp | 23 ++- modules/core/src/opencl/split_merge.cl | 83 +++++++++ modules/core/test/ocl/test_arithm.cpp | 4 - modules/core/test/ocl/test_split_merge.cpp | 201 +++++++++++++++++++++ 6 files changed, 367 insertions(+), 12 deletions(-) create mode 100644 modules/core/src/opencl/split_merge.cl create mode 100644 modules/core/test/ocl/test_split_merge.cpp diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index af7e042e18..c6cc0fd747 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -353,8 +353,62 @@ void cv::merge(const Mat* mv, size_t n, OutputArray _dst) } } +namespace cv { + +static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst ) +{ + const std::vector & src = *(const std::vector *)(_mv.getObj()); + CV_Assert(!src.empty()); + + int type = src[0].type(), depth = CV_MAT_DEPTH(type); + Size size = src[0].size(); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if (doubleSupport && depth == CV_64F) + return false; + + size_t srcsize = src.size(); + for (size_t i = 0; i < srcsize; ++i) + { + int itype = src[i].type(), icn = CV_MAT_CN(itype), idepth = CV_MAT_DEPTH(itype); + if (src[i].dims > 2 || icn != 1) + return false; + CV_Assert(size == src[i].size() && depth == idepth); + } + + String srcargs, srcdecl, processelem; + for (size_t i = 0; i < srcsize; ++i) + { + srcargs += format("DECLARE_SRC_PARAM(%d)", i); + srcdecl += format("DECLARE_DATA(%d)", i); + processelem += format("PROCESS_ELEM(%d)", i); + } + + ocl::Kernel k("merge", ocl::core::split_merge_oclsrc, + format("-D OP_MERGE -D cn=%d -D T=%s -D DECLARE_SRC_PARAMS_N=%s -D DECLARE_DATA_N=%s -D PROCESS_ELEMS_N=%s", + (int)srcsize, ocl::memopTypeToStr(depth), srcargs.c_str(), srcdecl.c_str(), processelem.c_str())); + if (k.empty()) + return false; + + _dst.create(size, CV_MAKE_TYPE(depth, srcsize)); + UMat dst = _dst.getUMat(); + + int argidx = 0; + for (size_t i = 0; i < srcsize; ++i) + argidx = k.set(argidx, ocl::KernelArg::ReadOnlyNoSize(src[i])); + k.set(argidx, ocl::KernelArg::WriteOnly(dst)); + + size_t globalsize[2] = { dst.cols, dst.rows }; + return k.run(2, globalsize, NULL, false); +} + +} + void cv::merge(InputArrayOfArrays _mv, OutputArray _dst) { + if (ocl::useOpenCL() && _mv.isUMatVector() && _dst.isUMat() && ocl_merge(_mv, _dst)) + return; + std::vector mv; _mv.getMatVector(mv); merge(!mv.empty() ? &mv[0] : 0, mv.size(), _dst); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 72c6c4756d..871fb385d6 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -1822,6 +1822,13 @@ size_t _InputArray::offset(int i) const return (size_t)(vv[i].data - vv[i].datastart); } + if( k == STD_VECTOR_UMAT ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert((size_t)i < vv.size()); + return vv[i].offset; + } + if( k == GPU_MAT ) { CV_Assert( i < 0 ); @@ -1861,6 +1868,13 @@ size_t _InputArray::step(int i) const return vv[i].step; } + if( k == STD_VECTOR_UMAT ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert((size_t)i < vv.size()); + return vv[i].step; + } + if( k == GPU_MAT ) { CV_Assert( i < 0 ); diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index f733dd11fb..5fb0d35760 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1893,7 +1893,7 @@ Context2& Context2::getDefault() // First, try to retrieve existing context of the same type. // In its turn, Platform::getContext() may call Context2::create() // if there is no such context. - ctx.create(Device::TYPE_ACCELERATOR); + ctx.create(Device::TYPE_CPU); if(!ctx.p) ctx.create(Device::TYPE_DGPU); if(!ctx.p) @@ -2189,8 +2189,13 @@ int Kernel::set(int i, const void* value, size_t sz) CV_Assert(i >= 0); if( i == 0 ) p->cleanupUMats(); - if( !p || !p->handle || clSetKernelArg(p->handle, (cl_uint)i, sz, value) < 0 ) + cl_int retval; + if( !p || !p->handle || (retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value)) < 0 ) + { + printf("%d\n", retval); return -1; + } + printf("%d\n", retval); return i+1; } @@ -2201,6 +2206,7 @@ int Kernel::set(int i, const UMat& m) int Kernel::set(int i, const KernelArg& arg) { + printf("Setting to index %d\n", i); CV_Assert( i >= 0 ); if( !p || !p->handle ) return -1; @@ -2214,20 +2220,21 @@ int Kernel::set(int i, const KernelArg& arg) cl_mem h = (cl_mem)arg.m->handle(accessFlags); if (ptronly) - clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h); + printf("%d\n", clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h)); else if( arg.m->dims <= 2 ) { UMat2D u2d(*arg.m); - clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h); - clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step); - clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset); + printf("setting ... \n"); + printf("%d\n", clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h)); + printf("%d\n", clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step)); + printf("%d\n", clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset)); i += 3; if( !(arg.flags & KernelArg::NO_SIZE) ) { int cols = u2d.cols*arg.wscale; - clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows); - clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols); + printf("%d\n", clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows)); + printf("%d\n", clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols)); i += 2; } } diff --git a/modules/core/src/opencl/split_merge.cl b/modules/core/src/opencl/split_merge.cl new file mode 100644 index 0000000000..2fd7b515cc --- /dev/null +++ b/modules/core/src/opencl/split_merge.cl @@ -0,0 +1,83 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef OP_MERGE + +#define DECLARE_SRC_PARAM(index) __global const uchar * src##index##ptr, int src##index##_step, int src##index##_offset, +#define DECLARE_DATA(index) __global const T * src##index = \ + (__global T *)(src##index##ptr + mad24(src##index##_step, y, x * (int)sizeof(T) + src##index##_offset)); +#define PROCESS_ELEM(index) dst[index] = src##index[0]; + +__kernel void merge(DECLARE_SRC_PARAMS_N + __global uchar * dstptr, int dst_step, int dst_offset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + DECLARE_DATA_N + __global T * dst = (__global T *)(dstptr + mad24(dst_step, y, x * (int)sizeof(T) * cn + dst_offset)); + PROCESS_ELEMS_N + } +} + +#elif defined OP_SPLIT + +__kernel void set(__global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols, dstT value ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); + *(__global dstT*)(dstptr + dst_index) = value; + } +} + +#else +#error "No operation" +#endif diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 844be7bdf2..9ef0d21313 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -42,8 +42,6 @@ #include "test_precomp.hpp" #include "opencv2/ts/ocl_test.hpp" -#ifdef HAVE_OPENCL - namespace cvtest { namespace ocl { @@ -1036,5 +1034,3 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F, OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); } } // namespace cvtest::ocl - -#endif // HAVE_OPENCL diff --git a/modules/core/test/ocl/test_split_merge.cpp b/modules/core/test/ocl/test_split_merge.cpp new file mode 100644 index 0000000000..70ba2d5020 --- /dev/null +++ b/modules/core/test/ocl/test_split_merge.cpp @@ -0,0 +1,201 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +namespace cvtest { +namespace ocl { + +PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool) +{ + int depth, cn; + bool use_roi; + + TEST_DECLARE_INPUT_PARAMETER(src1) + TEST_DECLARE_INPUT_PARAMETER(src2) + TEST_DECLARE_INPUT_PARAMETER(src3) + TEST_DECLARE_INPUT_PARAMETER(src4) + TEST_DECLARE_OUTPUT_PARAMETER(dst) + + std::vector src_roi; + std::vector usrc_roi; + + virtual void SetUp() + { + depth = GET_PARAM(0); + cn = GET_PARAM(1); + use_roi = GET_PARAM(2); + } + + virtual void random_roi() + { + CV_Assert(cn >= 1 && cn <= 4); + Size roiSize = randomSize(1, MAX_VALUE); + + { + Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src1, src1_roi, roiSize, src1Border, depth, 2, 11); + + Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src2, src2_roi, roiSize, src2Border, depth, -1540, 1740); + + Border src3Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src3, src3_roi, roiSize, src3Border, depth, -1540, 1740); + + Border src4Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src4, src4_roi, roiSize, src4Border, depth, -1540, 1740); + } + + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, CV_MAKE_TYPE(depth, cn), 5, 16); + + UMAT_UPLOAD_INPUT_PARAMETER(src1) + UMAT_UPLOAD_INPUT_PARAMETER(src2) + UMAT_UPLOAD_INPUT_PARAMETER(src3) + UMAT_UPLOAD_INPUT_PARAMETER(src4) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst) + + src_roi.push_back(src1_roi), usrc_roi.push_back(usrc1_roi); + if (cn >= 2) + src_roi.push_back(src2_roi), usrc_roi.push_back(usrc2_roi); + if (cn >= 3) + src_roi.push_back(src3_roi), usrc_roi.push_back(usrc3_roi); + if (cn >= 4) + src_roi.push_back(src4_roi), usrc_roi.push_back(usrc4_roi); + } + + void Near(double threshold = 0.) + { + EXPECT_MAT_NEAR(dst, udst, threshold); + EXPECT_MAT_NEAR(dst_roi, udst_roi, threshold); + } +}; + +typedef MergeTestBase Merge; + +OCL_TEST_P(Merge, Accuracy) +{ + for(int j = 0; j < test_loop_times; j++) + { + random_roi(); + + OCL_OFF(cv::merge(src_roi, dst_roi)); + OCL_ON(cv::merge(usrc_roi, udst_roi)); + + Near(); + } +} + +//PARAM_TEST_CASE(SplitTestBase, MatType, int, bool) +//{ +// int type; +// int channels; +// bool use_roi; + +// cv::Mat src, src_roi; +// cv::Mat dst[MAX_CHANNELS], dst_roi[MAX_CHANNELS]; + +// cv::ocl::oclMat gsrc_whole, gsrc_roi; +// cv::ocl::oclMat gdst_whole[MAX_CHANNELS], gdst_roi[MAX_CHANNELS]; + +// virtual void SetUp() +// { +// type = GET_PARAM(0); +// channels = GET_PARAM(1); +// use_roi = GET_PARAM(2); +// } + +// void random_roi() +// { +// Size roiSize = randomSize(1, MAX_VALUE); +// Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); +// randomSubMat(src, src_roi, roiSize, srcBorder, CV_MAKETYPE(type, channels), 0, 256); +// generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder); + +// for (int i = 0; i < channels; ++i) +// { +// Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); +// randomSubMat(dst[i], dst_roi[i], roiSize, dstBorder, CV_MAKETYPE(type, 1), 5, 16); +// generateOclMat(gdst_whole[i], gdst_roi[i], dst[i], roiSize, dstBorder); +// } +// } +//}; + +//struct Split : SplitTestBase {}; + +//#ifdef ANDROID +//// NOTE: The test fail on Android is the top of the iceberg only +//// The real fail reason is memory access vialation somewhere else +//OCL_TEST_P(Split, DISABLED_Accuracy) +//#else +//OCL_TEST_P(Split, Accuracy) +//#endif +//{ +// for(int j = 0; j < LOOP_TIMES; j++) +// { +// random_roi(); + +// cv::split(src_roi, dst_roi); +// cv::ocl::split(gsrc_roi, gdst_roi); + +// for (int i = 0; i < channels; ++i) +// { +// EXPECT_MAT_NEAR(dst[i], gdst_whole[i], 0.0); +// EXPECT_MAT_NEAR(dst_roi[i], gdst_roi[i], 0.0); +// } +// } +//} + + +OCL_INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); + + +//INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine( +// Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F), Values(1, 2, 3, 4), Bool())); + + +} } // namespace cvtest::ocl From c16c9a2e8e239b2b50144046928549778038c1c3 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sun, 8 Dec 2013 14:45:25 +0400 Subject: [PATCH 2/2] added cv::split to T-API --- modules/core/src/convert.cpp | 48 +++++++- modules/core/src/ocl.cpp | 23 ++-- modules/core/src/opencl/split_merge.cl | 13 +- modules/core/test/ocl/test_arithm.cpp | 4 + modules/core/test/ocl/test_split_merge.cpp | 136 ++++++++++++--------- 5 files changed, 143 insertions(+), 81 deletions(-) diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index c6cc0fd747..1b1ceac2c4 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -264,8 +264,50 @@ void cv::split(const Mat& src, Mat* mv) } } +namespace cv { + +static bool ocl_split( InputArray _m, OutputArrayOfArrays _mv ) +{ + int type = _m.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + + String dstargs, dstdecl, processelem; + for (int i = 0; i < cn; ++i) + { + dstargs += format("DECLARE_DST_PARAM(%d)", i); + dstdecl += format("DECLARE_DATA(%d)", i); + processelem += format("PROCESS_ELEM(%d)", i); + } + + ocl::Kernel k("split", ocl::core::split_merge_oclsrc, + format("-D T=%s -D OP_SPLIT -D cn=%d -D DECLARE_DST_PARAMS=%s " + "-D DECLARE_DATA_N=%s -D PROCESS_ELEMS_N=%s", + ocl::memopTypeToStr(depth), cn, dstargs.c_str(), + dstdecl.c_str(), processelem.c_str())); + if (k.empty()) + return false; + + Size size = _m.size(); + std::vector & dst = *(std::vector *)_mv.getObj(); + dst.resize(cn); + for (int i = 0; i < cn; ++i) + dst[i].create(size, depth); + + int argidx = k.set(0, ocl::KernelArg::ReadOnly(_m.getUMat())); + for (int i = 0; i < cn; ++i) + argidx = k.set(argidx, ocl::KernelArg::WriteOnlyNoSize(dst[i])); + + size_t globalsize[2] = { size.width, size.height }; + return k.run(2, globalsize, NULL, false); +} + +} + void cv::split(InputArray _m, OutputArrayOfArrays _mv) { + if (ocl::useOpenCL() && _m.dims() <= 2 && _mv.isUMatVector() && + ocl_split(_m, _mv)) + return; + Mat m = _m.getMat(); if( m.empty() ) { @@ -362,10 +404,6 @@ static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst ) int type = src[0].type(), depth = CV_MAT_DEPTH(type); Size size = src[0].size(); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if (doubleSupport && depth == CV_64F) - return false; size_t srcsize = src.size(); for (size_t i = 0; i < srcsize; ++i) @@ -390,7 +428,7 @@ static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst ) if (k.empty()) return false; - _dst.create(size, CV_MAKE_TYPE(depth, srcsize)); + _dst.create(size, CV_MAKE_TYPE(depth, (int)srcsize)); UMat dst = _dst.getUMat(); int argidx = 0; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 5fb0d35760..f733dd11fb 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1893,7 +1893,7 @@ Context2& Context2::getDefault() // First, try to retrieve existing context of the same type. // In its turn, Platform::getContext() may call Context2::create() // if there is no such context. - ctx.create(Device::TYPE_CPU); + ctx.create(Device::TYPE_ACCELERATOR); if(!ctx.p) ctx.create(Device::TYPE_DGPU); if(!ctx.p) @@ -2189,13 +2189,8 @@ int Kernel::set(int i, const void* value, size_t sz) CV_Assert(i >= 0); if( i == 0 ) p->cleanupUMats(); - cl_int retval; - if( !p || !p->handle || (retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value)) < 0 ) - { - printf("%d\n", retval); + if( !p || !p->handle || clSetKernelArg(p->handle, (cl_uint)i, sz, value) < 0 ) return -1; - } - printf("%d\n", retval); return i+1; } @@ -2206,7 +2201,6 @@ int Kernel::set(int i, const UMat& m) int Kernel::set(int i, const KernelArg& arg) { - printf("Setting to index %d\n", i); CV_Assert( i >= 0 ); if( !p || !p->handle ) return -1; @@ -2220,21 +2214,20 @@ int Kernel::set(int i, const KernelArg& arg) cl_mem h = (cl_mem)arg.m->handle(accessFlags); if (ptronly) - printf("%d\n", clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h)); + clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h); else if( arg.m->dims <= 2 ) { UMat2D u2d(*arg.m); - printf("setting ... \n"); - printf("%d\n", clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h)); - printf("%d\n", clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step)); - printf("%d\n", clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset)); + clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step); + clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset); i += 3; if( !(arg.flags & KernelArg::NO_SIZE) ) { int cols = u2d.cols*arg.wscale; - printf("%d\n", clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows)); - printf("%d\n", clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols)); + clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols); i += 2; } } diff --git a/modules/core/src/opencl/split_merge.cl b/modules/core/src/opencl/split_merge.cl index 2fd7b515cc..d2462750ce 100644 --- a/modules/core/src/opencl/split_merge.cl +++ b/modules/core/src/opencl/split_merge.cl @@ -65,16 +65,21 @@ __kernel void merge(DECLARE_SRC_PARAMS_N #elif defined OP_SPLIT -__kernel void set(__global uchar* dstptr, int dststep, int dstoffset, - int rows, int cols, dstT value ) +#define DECLARE_DST_PARAM(index) , __global uchar * dst##index##ptr, int dst##index##_step, int dst##index##_offset +#define DECLARE_DATA(index) __global T * dst##index = \ + (__global T *)(dst##index##ptr + mad24(y, dst##index##_step, x * (int)sizeof(T) + dst##index##_offset)); +#define PROCESS_ELEM(index) dst##index[0] = src[index]; + +__kernel void split(__global uchar* srcptr, int src_step, int src_offset, int rows, int cols DECLARE_DST_PARAMS) { int x = get_global_id(0); int y = get_global_id(1); if (x < cols && y < rows) { - int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); - *(__global dstT*)(dstptr + dst_index) = value; + DECLARE_DATA_N + __global const T * src = (__global const T *)(srcptr + mad24(y, src_step, x * cn * (int)sizeof(T) + src_offset)); + PROCESS_ELEMS_N } } diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 9ef0d21313..844be7bdf2 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -42,6 +42,8 @@ #include "test_precomp.hpp" #include "opencv2/ts/ocl_test.hpp" +#ifdef HAVE_OPENCL + namespace cvtest { namespace ocl { @@ -1034,3 +1036,5 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F, OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); } } // namespace cvtest::ocl + +#endif // HAVE_OPENCL diff --git a/modules/core/test/ocl/test_split_merge.cpp b/modules/core/test/ocl/test_split_merge.cpp index 70ba2d5020..224963cd24 100644 --- a/modules/core/test/ocl/test_split_merge.cpp +++ b/modules/core/test/ocl/test_split_merge.cpp @@ -47,6 +47,8 @@ #include "test_precomp.hpp" #include "opencv2/ts/ocl_test.hpp" +#ifdef HAVE_OPENCL + namespace cvtest { namespace ocl { @@ -69,11 +71,12 @@ PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool) depth = GET_PARAM(0); cn = GET_PARAM(1); use_roi = GET_PARAM(2); + + CV_Assert(cn >= 1 && cn <= 4); } - virtual void random_roi() + void random_roi() { - CV_Assert(cn >= 1 && cn <= 4); Size roiSize = randomSize(1, MAX_VALUE); { @@ -130,72 +133,91 @@ OCL_TEST_P(Merge, Accuracy) } } -//PARAM_TEST_CASE(SplitTestBase, MatType, int, bool) -//{ -// int type; -// int channels; -// bool use_roi; +PARAM_TEST_CASE(SplitTestBase, MatType, Channels, bool) +{ + int depth, cn; + bool use_roi; -// cv::Mat src, src_roi; -// cv::Mat dst[MAX_CHANNELS], dst_roi[MAX_CHANNELS]; + TEST_DECLARE_INPUT_PARAMETER(src) + TEST_DECLARE_OUTPUT_PARAMETER(dst1) + TEST_DECLARE_OUTPUT_PARAMETER(dst2) + TEST_DECLARE_OUTPUT_PARAMETER(dst3) + TEST_DECLARE_OUTPUT_PARAMETER(dst4) -// cv::ocl::oclMat gsrc_whole, gsrc_roi; -// cv::ocl::oclMat gdst_whole[MAX_CHANNELS], gdst_roi[MAX_CHANNELS]; + std::vector dst_roi, dst; + std::vector udst_roi, udst; -// virtual void SetUp() -// { -// type = GET_PARAM(0); -// channels = GET_PARAM(1); -// use_roi = GET_PARAM(2); -// } + virtual void SetUp() + { + depth = GET_PARAM(0); + cn = GET_PARAM(1); + use_roi = GET_PARAM(2); -// void random_roi() -// { -// Size roiSize = randomSize(1, MAX_VALUE); -// Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); -// randomSubMat(src, src_roi, roiSize, srcBorder, CV_MAKETYPE(type, channels), 0, 256); -// generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder); + CV_Assert(cn >= 1 && cn <= 4); + } -// for (int i = 0; i < channels; ++i) -// { -// Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); -// randomSubMat(dst[i], dst_roi[i], roiSize, dstBorder, CV_MAKETYPE(type, 1), 5, 16); -// generateOclMat(gdst_whole[i], gdst_roi[i], dst[i], roiSize, dstBorder); -// } -// } -//}; + void random_roi() + { + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, CV_MAKE_TYPE(depth, cn), 5, 16); -//struct Split : SplitTestBase {}; + { + Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst1, dst1_roi, roiSize, dst1Border, depth, 2, 11); -//#ifdef ANDROID -//// NOTE: The test fail on Android is the top of the iceberg only -//// The real fail reason is memory access vialation somewhere else -//OCL_TEST_P(Split, DISABLED_Accuracy) -//#else -//OCL_TEST_P(Split, Accuracy) -//#endif -//{ -// for(int j = 0; j < LOOP_TIMES; j++) -// { -// random_roi(); + Border dst2Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst2, dst2_roi, roiSize, dst2Border, depth, -1540, 1740); -// cv::split(src_roi, dst_roi); -// cv::ocl::split(gsrc_roi, gdst_roi); + Border dst3Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst3, dst3_roi, roiSize, dst3Border, depth, -1540, 1740); -// for (int i = 0; i < channels; ++i) -// { -// EXPECT_MAT_NEAR(dst[i], gdst_whole[i], 0.0); -// EXPECT_MAT_NEAR(dst_roi[i], gdst_roi[i], 0.0); -// } -// } -//} + Border dst4Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst4, dst4_roi, roiSize, dst4Border, depth, -1540, 1740); + } + UMAT_UPLOAD_INPUT_PARAMETER(src) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst1) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst2) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst3) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst4) + + dst_roi.push_back(dst1_roi), udst_roi.push_back(udst1_roi), + dst.push_back(dst1), udst.push_back(udst1); + if (cn >= 2) + dst_roi.push_back(dst2_roi), udst_roi.push_back(udst2_roi), + dst.push_back(dst2), udst.push_back(udst2); + if (cn >= 3) + dst_roi.push_back(dst3_roi), udst_roi.push_back(udst3_roi), + dst.push_back(dst3), udst.push_back(udst3); + if (cn >= 4) + dst_roi.push_back(dst4_roi), udst_roi.push_back(udst4_roi), + dst.push_back(dst4), udst.push_back(udst4); + } +}; + +typedef SplitTestBase Split; + +OCL_TEST_P(Split, Accuracy) +{ + for (int j = 0; j < test_loop_times; j++) + { + random_roi(); + + OCL_OFF(cv::split(src_roi, dst_roi)); + OCL_ON(cv::split(usrc_roi, udst_roi)); + + for (int i = 0; i < cn; ++i) + { + EXPECT_MAT_NEAR(dst[i], udst[i], 0.0); + EXPECT_MAT_NEAR(dst_roi[i], udst_roi[i], 0.0); + } + } +} OCL_INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); - - -//INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine( -// Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F), Values(1, 2, 3, 4), Bool())); - +OCL_INSTANTIATE_TEST_CASE_P(SplitMerge, Split, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); } } // namespace cvtest::ocl + +#endif // HAVE_OPENCL