From 93a818684cd0d2403dcafec392c0e44aba6bf1ed Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 14 Jan 2014 14:10:24 +0400 Subject: [PATCH] ported cv::Canny to T-API --- modules/core/include/opencv2/core/mat.hpp | 1 + modules/core/include/opencv2/core/mat.inl.hpp | 6 + .../core/include/opencv2/core/operations.hpp | 6 + modules/core/src/matrix.cpp | 31 ++ modules/core/src/umatrix.cpp | 3 +- modules/imgproc/perf/opencl/perf_imgproc.cpp | 5 +- modules/imgproc/src/canny.cpp | 171 +++++- modules/imgproc/src/opencl/canny.cl | 514 ++++++++++++++++++ modules/imgproc/test/ocl/test_canny.cpp | 117 ++++ modules/ts/include/opencv2/ts/ocl_test.hpp | 20 +- 10 files changed, 852 insertions(+), 22 deletions(-) create mode 100644 modules/imgproc/src/opencl/canny.cl create mode 100644 modules/imgproc/test/ocl/test_canny.cpp diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 401467534b..a7a4f1b8e4 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -127,6 +127,7 @@ public: virtual int depth(int i=-1) const; virtual int channels(int i=-1) const; virtual bool isContinuous(int i=-1) const; + virtual bool isSubmatrix(int i=-1) const; virtual bool empty() const; virtual void copyTo(const _OutputArray& arr) const; virtual size_t offset(int i=-1) const; diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index 3079548969..10eac9141a 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -186,6 +186,12 @@ inline _OutputArray::_OutputArray(const Mat& m) inline _OutputArray::_OutputArray(const std::vector& vec) { init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(const UMat& m) +{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_WRITE, &m); } + +inline _OutputArray::_OutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } + inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat) { init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_WRITE, &d_mat); } diff --git a/modules/core/include/opencv2/core/operations.hpp b/modules/core/include/opencv2/core/operations.hpp index 5895e4c4a7..d2f49d7ee1 100644 --- a/modules/core/include/opencv2/core/operations.hpp +++ b/modules/core/include/opencv2/core/operations.hpp @@ -423,6 +423,12 @@ int print(const Mat& mtx, FILE* stream = stdout) return print(Formatter::get()->format(mtx), stream); } +static inline +int print(const UMat& mtx, FILE* stream = stdout) +{ + return print(Formatter::get()->format(mtx.getMat(ACCESS_READ)), stream); +} + template static inline int print(const std::vector >& vec, FILE* stream = stdout) { diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 595a62dd51..ade9e35227 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -1808,6 +1808,37 @@ bool _InputArray::isContinuous(int i) const return false; } +bool _InputArray::isSubmatrix(int i) const +{ + int k = kind(); + + if( k == MAT ) + return i < 0 ? ((const Mat*)obj)->isSubmatrix() : false; + + if( k == UMAT ) + return i < 0 ? ((const UMat*)obj)->isSubmatrix() : false; + + if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR) + return false; + + if( k == STD_VECTOR_MAT ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert((size_t)i < vv.size()); + return vv[i].isSubmatrix(); + } + + if( k == STD_VECTOR_UMAT ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert((size_t)i < vv.size()); + return vv[i].isSubmatrix(); + } + + CV_Error(CV_StsNotImplemented, ""); + return false; +} + size_t _InputArray::offset(int i) const { int k = kind(); diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 95e203be9a..997c88117b 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -729,11 +729,12 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (!k.empty()) { + UMat src = *this; _dst.create( size(), _type ); UMat dst = _dst.getUMat(); float alphaf = (float)alpha, betaf = (float)beta; - k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::WriteOnly(dst, cn), alphaf, betaf); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn), alphaf, betaf); size_t globalsize[2] = { dst.cols * cn, dst.rows }; if (k.run(2, globalsize, NULL, false)) diff --git a/modules/imgproc/perf/opencl/perf_imgproc.cpp b/modules/imgproc/perf/opencl/perf_imgproc.cpp index 7c102fa215..fa82b7aa4d 100644 --- a/modules/imgproc/perf/opencl/perf_imgproc.cpp +++ b/modules/imgproc/perf/opencl/perf_imgproc.cpp @@ -234,7 +234,10 @@ OCL_PERF_TEST_P(CannyFixture, Canny, ::testing::Combine(OCL_PERF_ENUM(3, 5), Boo OCL_TEST_CYCLE() cv::Canny(img, edges, 50.0, 100.0, apertureSize, L2Grad); - SANITY_CHECK(edges); + if (apertureSize == 3) + SANITY_CHECK(edges); + else + SANITY_CHECK_NOTHING(); } diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index 44fd42a2a4..b52ca46de9 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -40,6 +40,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" /* #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) @@ -48,9 +49,11 @@ #undef USE_IPP_CANNY #endif */ -#ifdef USE_IPP_CANNY + namespace cv { + +#ifdef USE_IPP_CANNY static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high) { int size = 0, size1 = 0; @@ -83,22 +86,165 @@ static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high) return false; return true; } -} #endif +static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float high_thresh, + int aperture_size, bool L2gradient, int cn, const Size & size) +{ + UMat dx(size, CV_16SC(cn)), dy(size, CV_16SC(cn)); + + if (L2gradient) + { + low_thresh = std::min(32767.0f, low_thresh); + high_thresh = std::min(32767.0f, high_thresh); + + if (low_thresh > 0) low_thresh *= low_thresh; + if (high_thresh > 0) high_thresh *= high_thresh; + } + int low = cvFloor(low_thresh), high = cvFloor(high_thresh); + Size esize(size.width + 2, size.height + 2); + + UMat mag; + size_t globalsize[2] = { size.width * cn, size.height }, localsize[2] = { 16, 16 }; + + if (aperture_size == 3 && !_src.isSubmatrix()) + { + // Sobel calculation + ocl::Kernel calcSobelRowPassKernel("calcSobelRowPass", ocl::imgproc::canny_oclsrc); + if (calcSobelRowPassKernel.empty()) + return false; + + UMat src = _src.getUMat(), dxBuf(size, CV_16SC(cn)), dyBuf(size, CV_16SC(cn)); + calcSobelRowPassKernel.args(ocl::KernelArg::ReadOnly(src), + ocl::KernelArg::WriteOnlyNoSize(dxBuf), + ocl::KernelArg::WriteOnlyNoSize(dyBuf)); + + if (!calcSobelRowPassKernel.run(2, globalsize, localsize, false)) + return false; + + // magnitude calculation + ocl::Kernel magnitudeKernel("calcMagnitude_buf", ocl::imgproc::canny_oclsrc, + L2gradient ? " -D L2GRAD" : ""); + if (magnitudeKernel.empty()) + return false; + + mag = UMat(esize, CV_32SC(cn), Scalar::all(0)); + dx.create(size, CV_16SC(cn)); + dy.create(size, CV_16SC(cn)); + + magnitudeKernel.args(ocl::KernelArg::ReadOnlyNoSize(dxBuf), ocl::KernelArg::ReadOnlyNoSize(dyBuf), + ocl::KernelArg::WriteOnlyNoSize(dx), ocl::KernelArg::WriteOnlyNoSize(dy), + ocl::KernelArg::WriteOnlyNoSize(mag, cn), size.height, size.width); + + if (!magnitudeKernel.run(2, globalsize, localsize, false)) + return false; + } + else + { + dx.create(size, CV_16SC(cn)); + dy.create(size, CV_16SC(cn)); + + Sobel(_src, dx, CV_16SC1, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); + Sobel(_src, dy, CV_16SC1, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); + + // magnitude calculation + ocl::Kernel magnitudeKernel("calcMagnitude", ocl::imgproc::canny_oclsrc, + L2gradient ? " -D L2GRAD" : ""); + if (magnitudeKernel.empty()) + return false; + + mag = UMat(esize, CV_32SC(cn), Scalar::all(0)); + magnitudeKernel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy), + ocl::KernelArg::WriteOnlyNoSize(mag, cn), size.height, size.width); + + if (!magnitudeKernel.run(2, globalsize, NULL, false)) + return false; + } + + // map calculation + ocl::Kernel calcMapKernel("calcMap", ocl::imgproc::canny_oclsrc); + if (calcMapKernel.empty()) + return false; + + UMat map(esize, CV_32SC(cn)); + calcMapKernel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy), + ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::WriteOnlyNoSize(map, cn), + size.height, size.width, low, high); + + if (!calcMapKernel.run(2, globalsize, localsize, false)) + return false; + + // local hysteresis thresholding + ocl::Kernel edgesHysteresisLocalKernel("edgesHysteresisLocal", ocl::imgproc::canny_oclsrc); + if (edgesHysteresisLocalKernel.empty()) + return false; + + UMat stack(1, size.area(), CV_16UC2), counter(1, 1, CV_32SC1, Scalar::all(0)); + edgesHysteresisLocalKernel.args(ocl::KernelArg::ReadOnlyNoSize(map), ocl::KernelArg::PtrReadWrite(stack), + ocl::KernelArg::PtrReadWrite(counter), size.height, size.width); + if (!edgesHysteresisLocalKernel.run(2, globalsize, localsize, false)) + return false; + + // global hysteresis thresholding + UMat stack2(1, size.area(), CV_16UC2); + int count; + + for ( ; ; ) + { + ocl::Kernel edgesHysteresisGlobalKernel("edgesHysteresisGlobal", ocl::imgproc::canny_oclsrc); + if (edgesHysteresisGlobalKernel.empty()) + return false; + + { + Mat _counter = counter.getMat(ACCESS_RW); + count = _counter.at(0, 0); + if (count == 0) + break; + + _counter.at(0, 0) = 0; + } + + edgesHysteresisGlobalKernel.args(ocl::KernelArg::ReadOnlyNoSize(map), ocl::KernelArg::PtrReadWrite(stack), + ocl::KernelArg::PtrReadWrite(stack2), ocl::KernelArg::PtrReadWrite(counter), + size.height, size.width, count); + +#define divUp(total, grain) ((total + grain - 1) / grain) + size_t localsize2[2] = { 128, 1 }, globalsize2[2] = { std::min(count, 65535) * 128, divUp(count, 65535) }; +#undef divUp + + if (!edgesHysteresisGlobalKernel.run(2, globalsize2, localsize2, false)) + return false; + + std::swap(stack, stack2); + } + + // get edges + ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc); + if (getEdgesKernel.empty()) + return false; + + _dst.create(size, CV_8UC(cn)); + UMat dst = _dst.getUMat(); + + getEdgesKernel.args(ocl::KernelArg::ReadOnlyNoSize(map), ocl::KernelArg::WriteOnly(dst)); + return getEdgesKernel.run(2, globalsize, NULL, false); +} + +} + void cv::Canny( InputArray _src, OutputArray _dst, double low_thresh, double high_thresh, int aperture_size, bool L2gradient ) { - Mat src = _src.getMat(); - CV_Assert( src.depth() == CV_8U ); + const int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + const Size size = _src.size(); - _dst.create(src.size(), CV_8U); - Mat dst = _dst.getMat(); + CV_Assert( depth == CV_8U ); + _dst.create(size, CV_8U); if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT) { - //backward compatibility + // backward compatibility aperture_size &= ~CV_CANNY_L2_GRADIENT; L2gradient = true; } @@ -109,6 +255,12 @@ void cv::Canny( InputArray _src, OutputArray _dst, if (low_thresh > high_thresh) std::swap(low_thresh, high_thresh); + if (ocl::useOpenCL() && _dst.isUMat() && cn == 1 && + ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size)) + return; + + Mat src = _src.getMat(), dst = _dst.getMat(); + #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient)) return; @@ -120,12 +272,11 @@ void cv::Canny( InputArray _src, OutputArray _dst, return; #endif - const int cn = src.channels(); Mat dx(src.rows, src.cols, CV_16SC(cn)); Mat dy(src.rows, src.cols, CV_16SC(cn)); - Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE); - Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE); + Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); + Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); if (L2gradient) { diff --git a/modules/imgproc/src/opencl/canny.cl b/modules/imgproc/src/opencl/canny.cl new file mode 100644 index 0000000000..88b406f401 --- /dev/null +++ b/modules/imgproc/src/opencl/canny.cl @@ -0,0 +1,514 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +// Smoothing perpendicular to the derivative direction with a triangle filter +// only support 3x3 Sobel kernel +// h (-1) = 1, h (0) = 2, h (1) = 1 +// h'(-1) = -1, h'(0) = 0, h'(1) = 1 +// thus sobel 2D operator can be calculated as: +// h'(x, y) = h'(x)h(y) for x direction +// +// src input 8bit single channel image data +// dx_buf output dx buffer +// dy_buf output dy buffer + +__kernel void __attribute__((reqd_work_group_size(16, 16, 1))) +calcSobelRowPass + (__global const uchar * src, int src_step, int src_offset, int rows, int cols, + __global uchar * dx_buf, int dx_buf_step, int dx_buf_offset, + __global uchar * dy_buf, int dy_buf_step, int dy_buf_offset) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + int lidx = get_local_id(0); + int lidy = get_local_id(1); + + __local int smem[16][18]; + + smem[lidy][lidx + 1] = src[mad24(src_step, min(gidy, rows - 1), gidx + src_offset)]; + if (lidx == 0) + { + smem[lidy][0] = src[mad24(src_step, min(gidy, rows - 1), max(gidx - 1, 0) + src_offset)]; + smem[lidy][17] = src[mad24(src_step, min(gidy, rows - 1), min(gidx + 16, cols - 1) + src_offset)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (gidy < rows && gidx < cols) + { + *(__global short *)(dx_buf + mad24(gidy, dx_buf_step, gidx * (int)sizeof(short) + dx_buf_offset)) = + smem[lidy][lidx + 2] - smem[lidy][lidx]; + *(__global short *)(dy_buf + mad24(gidy, dy_buf_step, gidx * (int)sizeof(short) + dy_buf_offset)) = + smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2]; + } +} + +inline int calc(short x, short y) +{ +#ifdef L2GRAD + return x * x + y * y; +#else + return (x >= 0 ? x : -x) + (y >= 0 ? y : -y); +#endif +} + +// calculate the magnitude of the filter pass combining both x and y directions +// This is the non-buffered version(non-3x3 sobel) +// +// dx_buf dx buffer, calculated from calcSobelRowPass +// dy_buf dy buffer, calculated from calcSobelRowPass +// dx direvitive in x direction output +// dy direvitive in y direction output +// mag magnitude direvitive of xy output + +__kernel void calcMagnitude(__global const uchar * dxptr, int dx_step, int dx_offset, + __global const uchar * dyptr, int dy_step, int dy_offset, + __global uchar * magptr, int mag_step, int mag_offset, int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y < rows && x < cols) + { + int dx_index = mad24(dx_step, y, x * (int)sizeof(short) + dx_offset); + int dy_index = mad24(dy_step, y, x * (int)sizeof(short) + dy_offset); + int mag_index = mad24(mag_step, y + 1, (x + 1) * (int)sizeof(int) + mag_offset); + + __global const short * dx = (__global const short *)(dxptr + dx_index); + __global const short * dy = (__global const short *)(dyptr + dy_index); + __global int * mag = (__global int *)(magptr + mag_index); + + mag[0] = calc(dx[0], dy[0]); + } +} + +// calculate the magnitude of the filter pass combining both x and y directions +// This is the buffered version(3x3 sobel) +// +// dx_buf dx buffer, calculated from calcSobelRowPass +// dy_buf dy buffer, calculated from calcSobelRowPass +// dx direvitive in x direction output +// dy direvitive in y direction output +// mag magnitude direvitive of xy output +__kernel void __attribute__((reqd_work_group_size(16, 16, 1))) +calcMagnitude_buf + (__global const short * dx_buf, int dx_buf_step, int dx_buf_offset, + __global const short * dy_buf, int dy_buf_step, int dy_buf_offset, + __global short * dx, int dx_step, int dx_offset, + __global short * dy, int dy_step, int dy_offset, + __global int * mag, int mag_step, int mag_offset, + int rows, int cols) +{ + dx_buf_step /= sizeof(*dx_buf); + dx_buf_offset /= sizeof(*dx_buf); + dy_buf_step /= sizeof(*dy_buf); + dy_buf_offset /= sizeof(*dy_buf); + dx_step /= sizeof(*dx); + dx_offset /= sizeof(*dx); + dy_step /= sizeof(*dy); + dy_offset /= sizeof(*dy); + mag_step /= sizeof(*mag); + mag_offset /= sizeof(*mag); + + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + int lidx = get_local_id(0); + int lidy = get_local_id(1); + + __local short sdx[18][16]; + __local short sdy[18][16]; + + sdx[lidy + 1][lidx] = dx_buf[gidx + min(gidy, rows - 1) * dx_buf_step + dx_buf_offset]; + sdy[lidy + 1][lidx] = dy_buf[gidx + min(gidy, rows - 1) * dy_buf_step + dy_buf_offset]; + if (lidy == 0) + { + sdx[0][lidx] = dx_buf[gidx + min(max(gidy - 1, 0), rows - 1) * dx_buf_step + dx_buf_offset]; + sdx[17][lidx] = dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset]; + + sdy[0][lidx] = dy_buf[gidx + min(max(gidy - 1, 0), rows - 1) * dy_buf_step + dy_buf_offset]; + sdy[17][lidx] = dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (gidx < cols && gidy < rows) + { + short x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx]; + short y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx]; + + dx[gidx + gidy * dx_step + dx_offset] = x; + dy[gidx + gidy * dy_step + dy_offset] = y; + + mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y); + } +} + + +////////////////////////////////////////////////////////////////////////////////////////// +// 0.4142135623730950488016887242097 is tan(22.5) + +#define CANNY_SHIFT 15 +#define TG22 (int)(0.4142135623730950488016887242097f*(1< low_thresh) + { + short xs = *(__global const short *)(dx + mad24(gidy, dx_step, dx_offset + (int)sizeof(short) * gidx)); + short ys = *(__global const short *)(dy + mad24(gidy, dy_step, dy_offset + (int)sizeof(short) * gidx)); + int x = abs(xs), y = abs(ys); + + int tg22x = x * TG22; + y <<= CANNY_SHIFT; + + if (y < tg22x) + { + if (m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2]) + edge_type = 1 + (int)(m > high_thresh); + } + else + { + int tg67x = tg22x + (x << (1 + CANNY_SHIFT)); + if (y > tg67x) + { + if (m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1]) + edge_type = 1 + (int)(m > high_thresh); + } + else + { + int s = (xs ^ ys) < 0 ? -1 : 1; + if (m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s]) + edge_type = 1 + (int)(m > high_thresh); + } + } + } + *(__global int *)(map + mad24(map_step, gidy + 1, (gidx + 1) * (int)sizeof(int) + map_offset)) = edge_type; + } +} + +#undef CANNY_SHIFT +#undef TG22 + +struct PtrStepSz +{ + __global uchar * ptr; + int step, rows, cols; +}; + +inline int get(struct PtrStepSz data, int y, int x) +{ + return *(__global int *)(data.ptr + mad24(data.step, y + 1, (int)sizeof(int) * (x + 1))); +} + +inline void set(struct PtrStepSz data, int y, int x, int value) +{ + *(__global int *)(data.ptr + mad24(data.step, y + 1, (int)sizeof(int) * (x + 1))) = value; +} + +// perform Hysteresis for pixel whose edge type is 1 +// +// If candidate pixel (edge type is 1) has a neighbour pixel (in 3x3 area) with type 2, it is believed to be part of an edge and +// marked as edge. Each thread will iterate for 16 times to connect local edges. +// Candidate pixel being identified as edge will then be tested if there is nearby potiential edge points. If there is, counter will +// be incremented by 1 and the point location is stored. These potiential candidates will be processed further in next kernel. +// +// map raw edge type results calculated from calcMap. +// stack the potiential edge points found in this kernel call +// counter the number of potiential edge points + +__kernel void __attribute__((reqd_work_group_size(16,16,1))) +edgesHysteresisLocal + (__global uchar * map_ptr, int map_step, int map_offset, + __global ushort2 * st, __global unsigned int * counter, + int rows, int cols) +{ + struct PtrStepSz map = { map_ptr + map_offset, map_step, rows + 1, cols + 1 }; + + __local int smem[18][18]; + + int2 blockIdx = (int2)(get_group_id(0), get_group_id(1)); + int2 blockDim = (int2)(get_local_size(0), get_local_size(1)); + int2 threadIdx = (int2)(get_local_id(0), get_local_id(1)); + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? get(map, y, x) : 0; + if (threadIdx.y == 0) + smem[0][threadIdx.x + 1] = x < map.cols ? get(map, y - 1, x) : 0; + if (threadIdx.y == blockDim.y - 1) + smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? get(map, y + 1, x) : 0; + if (threadIdx.x == 0) + smem[threadIdx.y + 1][0] = y < map.rows ? get(map, y, x - 1) : 0; + if (threadIdx.x == blockDim.x - 1) + smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols && y < map.rows ? get(map, y, x + 1) : 0; + if (threadIdx.x == 0 && threadIdx.y == 0) + smem[0][0] = y > 0 && x > 0 ? get(map, y - 1, x - 1) : 0; + if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0) + smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? get(map, y - 1, x + 1) : 0; + if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1) + smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? get(map, y + 1, x - 1) : 0; + if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1) + smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? get(map, y + 1, x + 1) : 0; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (x >= cols || y >= rows) + return; + + int n; + + #pragma unroll + for (int k = 0; k < 16; ++k) + { + n = 0; + + if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1) + { + n += smem[threadIdx.y ][threadIdx.x ] == 2; + n += smem[threadIdx.y ][threadIdx.x + 1] == 2; + n += smem[threadIdx.y ][threadIdx.x + 2] == 2; + + n += smem[threadIdx.y + 1][threadIdx.x ] == 2; + n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2; + + n += smem[threadIdx.y + 2][threadIdx.x ] == 2; + n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2; + n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2; + } + + if (n > 0) + smem[threadIdx.y + 1][threadIdx.x + 1] = 2; + } + + const int e = smem[threadIdx.y + 1][threadIdx.x + 1]; + set(map, y, x, e); + n = 0; + + if (e == 2) + { + n += smem[threadIdx.y ][threadIdx.x ] == 1; + n += smem[threadIdx.y ][threadIdx.x + 1] == 1; + n += smem[threadIdx.y ][threadIdx.x + 2] == 1; + + n += smem[threadIdx.y + 1][threadIdx.x ] == 1; + n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1; + + n += smem[threadIdx.y + 2][threadIdx.x ] == 1; + n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1; + n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1; + } + + if (n > 0) + { + const int ind = atomic_inc(counter); + st[ind] = (ushort2)(x + 1, y + 1); + } +} + +__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; +__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; + + +#define stack_size 512 +#define map_index mad24(map_step, pos.y, pos.x * (int)sizeof(int)) + +__kernel void __attribute__((reqd_work_group_size(128, 1, 1))) +edgesHysteresisGlobal(__global uchar * map, int map_step, int map_offset, + __global ushort2 * st1, __global ushort2 * st2, __global int * counter, + int rows, int cols, int count) +{ + map += map_offset; + + int lidx = get_local_id(0); + + int grp_idx = get_group_id(0); + int grp_idy = get_group_id(1); + + __local unsigned int s_counter, s_ind; + __local ushort2 s_st[stack_size]; + + if (lidx == 0) + s_counter = 0; + barrier(CLK_LOCAL_MEM_FENCE); + + int ind = mad24(grp_idy, (int)get_local_size(0), grp_idx); + + if (ind < count) + { + ushort2 pos = st1[ind]; + if (lidx < 8) + { + pos.x += c_dx[lidx]; + pos.y += c_dy[lidx]; + if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && *(__global int *)(map + map_index) == 1) + { + *(__global int *)(map + map_index) = 2; + ind = atomic_inc(&s_counter); + s_st[ind] = pos; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + while (s_counter > 0 && s_counter <= stack_size - get_local_size(0)) + { + const int subTaskIdx = lidx >> 3; + const int portion = min(s_counter, (uint)(get_local_size(0)>> 3)); + + if (subTaskIdx < portion) + pos = s_st[s_counter - 1 - subTaskIdx]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (lidx == 0) + s_counter -= portion; + barrier(CLK_LOCAL_MEM_FENCE); + + if (subTaskIdx < portion) + { + pos.x += c_dx[lidx & 7]; + pos.y += c_dy[lidx & 7]; + if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && *(__global int *)(map + map_index) == 1) + { + *(__global int *)(map + map_index) = 2; + ind = atomic_inc(&s_counter); + s_st[ind] = pos; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (s_counter > 0) + { + if (lidx == 0) + { + ind = atomic_add(counter, s_counter); + s_ind = ind - s_counter; + } + barrier(CLK_LOCAL_MEM_FENCE); + + ind = s_ind; + for (int i = lidx; i < (int)s_counter; i += get_local_size(0)) + st2[ind + i] = s_st[i]; + } + } +} + +#undef map_index +#undef stack_size + +// Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0. +// map edge type mappings +// dst edge output + +__kernel void getEdges(__global const uchar * mapptr, int map_step, int map_offset, + __global uchar * dst, int dst_step, int dst_offset, int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y < rows && x < cols) + { + int map_index = mad24(map_step, y + 1, (x + 1) * (int)sizeof(int) + map_offset); + int dst_index = mad24(dst_step, y, x + dst_offset); + + __global const int * map = (__global const int *)(mapptr + map_index); + + dst[dst_index] = (uchar)(-(map[0] >> 1)); + } +} diff --git a/modules/imgproc/test/ocl/test_canny.cpp b/modules/imgproc/test/ocl/test_canny.cpp new file mode 100644 index 0000000000..e328d2a2fd --- /dev/null +++ b/modules/imgproc/test/ocl/test_canny.cpp @@ -0,0 +1,117 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +//////////////////////////////////////////////////////// +// Canny + +IMPLEMENT_PARAM_CLASS(AppertureSize, int) +IMPLEMENT_PARAM_CLASS(L2gradient, bool) +IMPLEMENT_PARAM_CLASS(UseRoi, bool) + +PARAM_TEST_CASE(Canny, AppertureSize, L2gradient, UseRoi) +{ + int apperture_size; + bool useL2gradient, use_roi; + + TEST_DECLARE_INPUT_PARAMETER(src) + TEST_DECLARE_OUTPUT_PARAMETER(dst) + + virtual void SetUp() + { + apperture_size = GET_PARAM(0); + useL2gradient = GET_PARAM(1); + use_roi = GET_PARAM(2); + } + + void generateTestData() + { + Mat img = readImage("shared/fruits.png", IMREAD_GRAYSCALE); + ASSERT_FALSE(img.empty()) << "cann't load shared/fruits.png"; + + Size roiSize = img.size(); + int type = img.type(); + ASSERT_EQ(CV_8UC1, type); + + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, type, 2, 100); + img.copyTo(src_roi); + + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 16); + + UMAT_UPLOAD_INPUT_PARAMETER(src) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst) + } +}; + +OCL_TEST_P(Canny, Accuracy) +{ + generateTestData(); + + const double low_thresh = 50.0, high_thresh = 100.0; + + OCL_OFF(cv::Canny(src_roi, dst_roi, low_thresh, high_thresh, apperture_size, useL2gradient)); + OCL_ON(cv::Canny(usrc_roi, udst_roi, low_thresh, high_thresh, apperture_size, useL2gradient)); + + EXPECT_MAT_SIMILAR(dst_roi, udst_roi, 1e-2); + EXPECT_MAT_SIMILAR(dst, udst, 1e-2); +} + +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine( + testing::Values(AppertureSize(3), AppertureSize(5)), + testing::Values(L2gradient(false), L2gradient(true)), + testing::Values(UseRoi(false), UseRoi(true)))); + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 2fea52a95e..a4b2ec38cd 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -96,18 +96,18 @@ extern int test_loop_times; #define EXPECT_MAT_NEAR(mat1, mat2, eps) \ { \ - ASSERT_EQ(mat1.type(), mat2.type()); \ - ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkNorm(mat1, mat2), eps) \ - << cv::format("Size: %d x %d", mat1.size().width, mat1.size().height) << std::endl; \ + ASSERT_EQ(mat1.type(), mat2.type()); \ + ASSERT_EQ(mat1.size(), mat2.size()); \ + EXPECT_LE(checkNorm(mat1, mat2), eps) \ + << "Size: " << mat1.size() << std::endl; \ } #define EXPECT_MAT_NEAR_RELATIVE(mat1, mat2, eps) \ { \ - ASSERT_EQ(mat1.type(), mat2.type()); \ - ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkNormRelative(mat1, mat2), eps) \ - << cv::format("Size: %d x %d", mat1.size().width, mat1.size().height) << std::endl; \ + ASSERT_EQ(mat1.type(), mat2.type()); \ + ASSERT_EQ(mat1.size(), mat2.size()); \ + EXPECT_LE(checkNormRelative(mat1, mat2), eps) \ + << "Size: " << mat1.size() << std::endl; \ } #define OCL_EXPECT_MATS_NEAR(name, eps) \ @@ -134,8 +134,8 @@ extern int test_loop_times; { \ ASSERT_EQ(mat1.type(), mat2.type()); \ ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkSimilarity(mat1, mat2), eps); \ - << cv::format("Size: %d x %d", mat1.size().width, mat1.size().height) << std::endl; \ + EXPECT_LE(checkSimilarity(mat1, mat2), eps) \ + << "Size: " << mat1.size() << std::endl; \ } using perf::MatDepth;