/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other oclMaterials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include #include "precomp.hpp" using namespace cv; using namespace cv::ocl; using namespace std; #if !defined (HAVE_OPENCL) void cv::ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) { throw_nogpu(); } void cv::ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false){ throw_nogpu(); } void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); } void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); } #else namespace cv { namespace ocl { ///////////////////////////OpenCL kernel strings/////////////////////////// extern const char *imgproc_canny; } } cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_) { CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size()); create(dx_.size(), -1); } void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size) { dx.create(image_size, CV_32SC1); dy.create(image_size, CV_32SC1); if(apperture_size == 3) { dx_buf.create(image_size, CV_32SC1); dy_buf.create(image_size, CV_32SC1); } else if(apperture_size > 0) { Mat kx, ky; if (!filterDX) { filterDX = createDerivFilter_GPU(CV_32F, CV_32F, 1, 0, apperture_size, BORDER_REPLICATE); } if (!filterDY) { filterDY = createDerivFilter_GPU(CV_32F, CV_32F, 0, 1, apperture_size, BORDER_REPLICATE); } } edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1); trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2); trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2); counter.create(1,1, CV_32SC1); } void cv::ocl::CannyBuf::release() { dx.release(); dy.release(); dx_buf.release(); dy_buf.release(); edgeBuf.release(); trackBuf1.release(); trackBuf2.release(); counter.release(); } namespace cv { namespace ocl { namespace canny { void calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols); void calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad); void calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad); void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh); void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols); void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols); void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols); } }}// cv::ocl namespace { void CannyCaller(CannyBuf& buf, oclMat& dst, float low_thresh, float high_thresh) { using namespace ::cv::ocl::canny; calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh); edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols); edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols); getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols); } } void cv::ocl::Canny(const oclMat& src, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) { CannyBuf buf(src.size(), apperture_size); Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient); } void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) { using namespace ::cv::ocl::canny; CV_Assert(src.type() == CV_8UC1); if( low_thresh > high_thresh ) std::swap( low_thresh, high_thresh ); dst.create(src.size(), CV_8U); dst.setTo(Scalar::all(0)); buf.create(src.size(), apperture_size); buf.edgeBuf.setTo(Scalar::all(0)); buf.counter.setTo(Scalar::all(0)); if (apperture_size == 3) { calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols); calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); } else { // FIXME: // current ocl implementation requires the src and dst having same type // convertTo is time consuming so this may be optimized later. oclMat src_omat32f = src; src.convertTo(src_omat32f, CV_32F); // FIXME buf.filterDX->apply(src_omat32f, buf.dx); buf.filterDY->apply(src_omat32f, buf.dy); buf.dx.convertTo(buf.dx, CV_32S); // FIXME buf.dy.convertTo(buf.dy, CV_32S); // FIXME calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); } CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); } void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient) { CannyBuf buf(dx, dy); Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient); } void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient) { using namespace ::cv::ocl::canny; CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size()); if( low_thresh > high_thresh ) std::swap( low_thresh, high_thresh); dst.create(dx.size(), CV_8U); dst.setTo(Scalar::all(0)); buf.dx = dx; buf.dy = dy; buf.create(dx.size(), -1); buf.edgeBuf.setTo(Scalar::all(0)); buf.counter.setTo(Scalar::all(0)); calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient); CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); } void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols) { Context *clCxt = src.clCxt; string kernelName = "calcSobelRowPass"; vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset)); size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad) { Context *clCxt = dx_buf.clCxt; string kernelName = "calcMagnitude_buf"; vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset)); size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; char build_options [15] = ""; if(L2Grad) { strcat(build_options, "-D L2GRAD"); } openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad) { Context *clCxt = dx.clCxt; string kernelName = "calcMagnitude"; vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset)); size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; char build_options [15] = ""; if(L2Grad) { strcat(build_options, "-D L2GRAD"); } openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh) { Context *clCxt = dx.clCxt; vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_float), (void *)&low_thresh)); args.push_back( make_pair( sizeof(cl_float), (void *)&high_thresh)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); #if CALCMAP_FIXED size_t globalThreads[3] = {cols, rows, 1}; string kernelName = "calcMap"; size_t localThreads[3] = {16, 16, 1}; #else size_t globalThreads[3] = {cols, rows, 1}; string kernelName = "calcMap_2"; size_t localThreads[3] = {256, 1, 1}; #endif openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols) { Context *clCxt = map.clCxt; string kernelName = "edgesHysteresisLocal"; vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols) { unsigned int count = Mat(counter).at(0); Context *clCxt = map.clCxt; string kernelName = "edgesHysteresisGlobal"; vector< pair > args; size_t localThreads[3] = {128, 1, 1}; #define DIVUP(a, b) ((a)+(b)-1)/(b) while(count > 0) { counter.setTo(0); args.clear(); size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1}; args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&count)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); count = Mat(counter).at(0); std::swap(st1, st2); } #undef DIVUP } void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols) { Context *clCxt = map.clCxt; string kernelName = "getEdges"; vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset)); size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } #endif // HAVE_OPENCL