From 52ed6d0d2780cd524310e338f01f47ef965eaf8d Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 17 Jan 2014 19:00:52 +0400 Subject: [PATCH] ported cv::goodFeaturesToTrack to T-API --- modules/imgproc/src/featureselect.cpp | 219 ++++++++++++++++++++----- modules/imgproc/src/morph.cpp | 11 +- modules/imgproc/src/opencl/gftt.cl | 81 +++++++++ modules/imgproc/test/ocl/test_gftt.cpp | 139 ++++++++++++++++ modules/ocl/src/gftt.cpp | 132 ++++----------- modules/ocl/src/opencl/imgproc_gftt.cl | 44 +---- 6 files changed, 439 insertions(+), 187 deletions(-) create mode 100644 modules/imgproc/src/opencl/gftt.cl create mode 100644 modules/imgproc/test/ocl/test_gftt.cpp diff --git a/modules/imgproc/src/featureselect.cpp b/modules/imgproc/src/featureselect.cpp index 8c740382f2..47015b06a6 100644 --- a/modules/imgproc/src/featureselect.cpp +++ b/modules/imgproc/src/featureselect.cpp @@ -38,18 +38,179 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #include "precomp.hpp" +#include "opencl_kernels.hpp" + #include #include +#include namespace cv { -template struct greaterThanPtr +struct greaterThanPtr : + public std::binary_function { - bool operator()(const T* a, const T* b) const { return *a > *b; } + bool operator () (const float * a, const float * b) const + { return *a > *b; } }; +struct Corner +{ + float val; + short y; + short x; + + bool operator < (const Corner & c) const + { return val > c.val; } +}; + +static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners, + int maxCorners, double qualityLevel, double minDistance, + InputArray _mask, int blockSize, + bool useHarrisDetector, double harrisK ) +{ + UMat eig, tmp; + if( useHarrisDetector ) + cornerHarris( _image, eig, blockSize, 3, harrisK ); + else + cornerMinEigenVal( _image, eig, blockSize, 3 ); + + double maxVal = 0; + minMaxLoc( eig, NULL, &maxVal, NULL, NULL, _mask ); + threshold( eig, eig, maxVal*qualityLevel, 0, THRESH_TOZERO ); + dilate( eig, tmp, Mat()); + + Size imgsize = _image.size(); + std::vector tmpCorners; + size_t total, i, j, ncorners = 0, possibleCornersCount = + std::max(1024, static_cast(imgsize.area() * 0.1)); + bool haveMask = !_mask.empty(); + + // collect list of pointers to features - put them into temporary image + { + ocl::Kernel k("findCorners", ocl::imgproc::gftt_oclsrc, + format(haveMask ? "-D HAVE_MASK" : "")); + if (k.empty()) + return false; + + UMat counter(1, 1, CV_32SC1, Scalar::all(0)), + corners(1, possibleCornersCount * sizeof(Corner), CV_8UC1); + ocl::KernelArg eigarg = ocl::KernelArg::ReadOnlyNoSize(eig), + tmparg = ocl::KernelArg::ReadOnlyNoSize(tmp), + cornersarg = ocl::KernelArg::PtrWriteOnly(corners), + counterarg = ocl::KernelArg::PtrReadWrite(counter); + + if (!haveMask) + k.args(eigarg, tmparg, cornersarg, counterarg, + imgsize.height - 2, imgsize.width - 2); + else + { + UMat mask = _mask.getUMat(); + k.args(eigarg, ocl::KernelArg::ReadOnlyNoSize(mask), tmparg, + cornersarg, counterarg, imgsize.height - 2, imgsize.width - 2); + } + + size_t globalsize[2] = { imgsize.width - 2, imgsize.height - 2 }; + if (!k.run(2, globalsize, NULL, false)) + return false; + + total = counter.getMat(ACCESS_READ).at(0, 0); + size_t totalb = sizeof(Corner) * total; + + tmpCorners.resize(total); + Mat mcorners(1, totalb, CV_8UC1, &tmpCorners[0]); + corners.colRange(0, totalb).copyTo(mcorners); + } + + std::sort( tmpCorners.begin(), tmpCorners.end() ); + std::vector corners; + corners.reserve(total); + + if (minDistance >= 1) + { + // Partition the image into larger grids + int w = imgsize.width, h = imgsize.height; + + const int cell_size = cvRound(minDistance); + const int grid_width = (w + cell_size - 1) / cell_size; + const int grid_height = (h + cell_size - 1) / cell_size; + + std::vector > grid(grid_width*grid_height); + minDistance *= minDistance; + + for( i = 0; i < total; i++ ) + { + const Corner & c = tmpCorners[i]; + bool good = true; + + int x_cell = c.x / cell_size; + int y_cell = c.y / cell_size; + + int x1 = x_cell - 1; + int y1 = y_cell - 1; + int x2 = x_cell + 1; + int y2 = y_cell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(grid_width-1, x2); + y2 = std::min(grid_height-1, y2); + + for( int yy = y1; yy <= y2; yy++ ) + for( int xx = x1; xx <= x2; xx++ ) + { + std::vector &m = grid[yy*grid_width + xx]; + + if( m.size() ) + { + for(j = 0; j < m.size(); j++) + { + float dx = c.x - m[j].x; + float dy = c.y - m[j].y; + + if( dx*dx + dy*dy < minDistance ) + { + good = false; + goto break_out; + } + } + } + } + + break_out: + + if (good) + { + grid[y_cell*grid_width + x_cell].push_back(Point2f((float)c.x, (float)c.y)); + + corners.push_back(Point2f((float)c.x, (float)c.y)); + ++ncorners; + + if( maxCorners > 0 && (int)ncorners == maxCorners ) + break; + } + } + } + else + { + for( i = 0; i < total; i++ ) + { + const Corner & c = tmpCorners[i]; + + corners.push_back(Point2f((float)c.x, (float)c.y)); + ++ncorners; + if( maxCorners > 0 && (int)ncorners == maxCorners ) + break; + } + } + + Mat(corners).convertTo(_corners, _corners.fixedType() ? _corners.type() : CV_32F); + return true; +} + } void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners, @@ -57,27 +218,32 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners, InputArray _mask, int blockSize, bool useHarrisDetector, double harrisK ) { - Mat image = _image.getMat(), mask = _mask.getMat(); - CV_Assert( qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0 ); - CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) ); + CV_Assert( _mask.empty() || (_mask.type() == CV_8UC1 && _mask.sameSize(_image)) ); - Mat eig, tmp; + if (ocl::useOpenCL() && _image.dims() <= 2 && _image.isUMat()) + { + CV_Assert(ocl_goodFeaturesToTrack(_image, _corners, maxCorners, qualityLevel, minDistance, + _mask, blockSize, useHarrisDetector, harrisK)); + return; + } + + Mat image = _image.getMat(), eig, tmp; if( useHarrisDetector ) cornerHarris( image, eig, blockSize, 3, harrisK ); else cornerMinEigenVal( image, eig, blockSize, 3 ); double maxVal = 0; - minMaxLoc( eig, 0, &maxVal, 0, 0, mask ); + minMaxLoc( eig, 0, &maxVal, 0, 0, _mask ); threshold( eig, eig, maxVal*qualityLevel, 0, THRESH_TOZERO ); dilate( eig, tmp, Mat()); Size imgsize = image.size(); - std::vector tmpCorners; // collect list of pointers to features - put them into temporary image + Mat mask = _mask.getMat(); for( int y = 1; y < imgsize.height - 1; y++ ) { const float* eig_data = (const float*)eig.ptr(y); @@ -92,11 +258,11 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners, } } - std::sort( tmpCorners.begin(), tmpCorners.end(), greaterThanPtr() ); + std::sort( tmpCorners.begin(), tmpCorners.end(), greaterThanPtr() ); std::vector corners; size_t i, j, total = tmpCorners.size(), ncorners = 0; - if(minDistance >= 1) + if (minDistance >= 1) { // Partition the image into larger grids int w = image.cols; @@ -133,7 +299,6 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners, y2 = std::min(grid_height-1, y2); for( int yy = y1; yy <= y2; yy++ ) - { for( int xx = x1; xx <= x2; xx++ ) { std::vector &m = grid[yy*grid_width + xx]; @@ -153,14 +318,11 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners, } } } - } break_out: - if(good) + if (good) { - // printf("%d: %d %d -> %d %d, %d, %d -- %d %d %d %d, %d %d, c=%d\n", - // i,x, y, x_cell, y_cell, (int)minDistance, cell_size,x1,y1,x2,y2, grid_width,grid_height,c); grid[y_cell*grid_width + x_cell].push_back(Point2f((float)x, (float)y)); corners.push_back(Point2f((float)x, (float)y)); @@ -187,33 +349,6 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners, } Mat(corners).convertTo(_corners, _corners.fixedType() ? _corners.type() : CV_32F); - - /* - for( i = 0; i < total; i++ ) - { - int ofs = (int)((const uchar*)tmpCorners[i] - eig.data); - int y = (int)(ofs / eig.step); - int x = (int)((ofs - y*eig.step)/sizeof(float)); - - if( minDistance > 0 ) - { - for( j = 0; j < ncorners; j++ ) - { - float dx = x - corners[j].x; - float dy = y - corners[j].y; - if( dx*dx + dy*dy < minDistance ) - break; - } - if( j < ncorners ) - continue; - } - - corners.push_back(Point2f((float)x, (float)y)); - ++ncorners; - if( maxCorners > 0 && (int)ncorners == maxCorners ) - break; - } -*/ } CV_IMPL void diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index f024a521c7..7ade970d95 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1404,10 +1404,10 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, int src_type = _src.type(), dst_type = _dst.type(), src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type); - bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type && - _src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) && + bool useOpenCL = cv::ocl::useOpenCL() && _dst.isUMat() && _src.size() == _dst.size() && src_type == dst_type && + _src.dims() <= 2 && (src_cn == 1 || src_cn == 4) && anchor.x == -1 && anchor.y == -1 && (src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) && - (borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue()) && + borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() && (op == MORPH_ERODE || op == MORPH_DILATE); Mat kernel = _kernel.getMat(); @@ -1423,10 +1423,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, if( iterations == 0 || kernel.rows*kernel.cols == 1 ) { - Mat src = _src.getMat(); - _dst.create( src.size(), src.type() ); - Mat dst = _dst.getMat(); - src.copyTo(dst); + _src.copyTo(_dst); return; } diff --git a/modules/imgproc/src/opencl/gftt.cl b/modules/imgproc/src/opencl/gftt.cl new file mode 100644 index 0000000000..5342426567 --- /dev/null +++ b/modules/imgproc/src/opencl/gftt.cl @@ -0,0 +1,81 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Zhang Ying, zhangying913@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void findCorners(__global const uchar * eigptr, int eig_step, int eig_offset, +#ifdef HAVE_MASK + __global const uchar * mask, int mask_step, int mask_offset, +#endif + __global const uchar * tmpptr, int tmp_step, int tmp_offset, + __global uchar * cornersptr, __global int * counter, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + ++x, ++y; + + int eig_index = mad24(y, eig_step, eig_offset + x * (int)sizeof(float)); + int tmp_index = mad24(y, tmp_step, tmp_offset + x * (int)sizeof(float)); +#ifdef HAVE_MASK + int mask_index = mad24(y, mask_step, mask_offset + x); + mask += mask_index; +#endif + + float val = *(__global const float *)(eigptr + eig_index); + float tmp = *(__global const float *)(tmpptr + tmp_index); + + if (val != 0 && val == tmp +#ifdef HAVE_MASK + && mask[0] != 0 +#endif + ) + { + __global float2 * corners = (cornersptr + (int)sizeof(float2) * atomic_inc(counter)); + corners[0] = (float2)(val, as_float( (x<<16) | y )); + } + } +} diff --git a/modules/imgproc/test/ocl/test_gftt.cpp b/modules/imgproc/test/ocl/test_gftt.cpp new file mode 100644 index 0000000000..c924696123 --- /dev/null +++ b/modules/imgproc/test/ocl/test_gftt.cpp @@ -0,0 +1,139 @@ +/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +//////////////////////////// GoodFeaturesToTrack ////////////////////////// + + +PARAM_TEST_CASE(GoodFeaturesToTrack, double, bool) +{ + double minDistance; + bool useRoi; + + static const int maxCorners; + static const double qualityLevel; + + TEST_DECLARE_INPUT_PARAMETER(src) + UMat points, upoints; + + virtual void SetUp() + { + minDistance = GET_PARAM(0); + useRoi = GET_PARAM(1); + } + + void generateTestData() + { + Mat frame = readImage("../gpu/opticalflow/rubberwhale1.png", IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()) << "could not load gpu/opticalflow/rubberwhale1.png"; + + Size roiSize = frame.size(); + Border srcBorder = randomBorder(0, useRoi ? 2 : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, frame.type(), 5, 256); + src_roi.copyTo(frame); + + UMAT_UPLOAD_INPUT_PARAMETER(src) + } +}; + +const int GoodFeaturesToTrack::maxCorners = 1000; +const double GoodFeaturesToTrack::qualityLevel = 0.01; + +OCL_TEST_P(GoodFeaturesToTrack, Accuracy) +{ + for (int j = 0; j < test_loop_times; ++j) + { + generateTestData(); + + std::vector upts, pts; + + OCL_OFF(cv::goodFeaturesToTrack(src_roi, points, maxCorners, qualityLevel, minDistance, noArray())); + ASSERT_FALSE(points.empty()); + pts.resize(points.cols); + points.copyTo(pts); + + OCL_ON(cv::goodFeaturesToTrack(usrc_roi, upoints, maxCorners, qualityLevel, minDistance)); + ASSERT_FALSE(upoints.empty()); + upts.resize(upoints.cols); + upoints.copyTo(upts); + + ASSERT_EQ(upts.size(), pts.size()); + + int mistmatch = 0; + for (size_t i = 0; i < pts.size(); ++i) + { + Point2i a = upts[i], b = pts[i]; + bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; + + if (!eq) + ++mistmatch; + } + + double bad_ratio = static_cast(mistmatch) / pts.size(); + ASSERT_GE(1e-3, bad_ratio); + } +} + +OCL_TEST_P(GoodFeaturesToTrack, EmptyCorners) +{ + generateTestData(); + usrc_roi.setTo(Scalar::all(0)); + + OCL_ON(cv::goodFeaturesToTrack(usrc_roi, upoints, maxCorners, qualityLevel, minDistance)); + + ASSERT_TRUE(upoints.empty()); +} + +OCL_INSTANTIATE_TEST_CASE_P(Imgproc, GoodFeaturesToTrack, + ::testing::Combine(testing::Values(0.0, 3.0), Bool())); + +} } // namespace cvtest::ocl + +#endif diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp index bf1036bb87..13d01942d7 100644 --- a/modules/ocl/src/gftt.cpp +++ b/modules/ocl/src/gftt.cpp @@ -48,20 +48,18 @@ using namespace cv; using namespace cv::ocl; -// currently sort procedure on the host is more efficient -static bool use_cpu_sorter = true; - // compact structure for corners struct DefCorner { float eig; //eigenvalue of corner short x; //x coordinate of corner point short y; //y coordinate of corner point -} ; +}; // compare procedure for corner //it is used for sort on the host side -struct DefCornerCompare +struct DefCornerCompare : + public std::binary_function { bool operator()(const DefCorner a, const DefCorner b) const { @@ -69,37 +67,6 @@ struct DefCornerCompare } }; -// sort corner point using opencl bitonicosrt implementation -static void sortCorners_caller(oclMat& corners, const int count) -{ - Context * cxt = Context::getContext(); - int GS = count/2; - int LS = min(255,GS); - size_t globalThreads[3] = {GS, 1, 1}; - size_t localThreads[3] = {LS, 1, 1}; - - // 2^numStages should be equal to count or the output is invalid - int numStages = 0; - for(int i = count; i > 1; i >>= 1) - { - ++numStages; - } - const int argc = 4; - std::vector< std::pair > args(argc); - std::string kernelname = "sortCorners_bitonicSort"; - args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data); - args[1] = std::make_pair(sizeof(cl_int), (void *)&count); - for(int stage = 0; stage < numStages; ++stage) - { - args[2] = std::make_pair(sizeof(cl_int), (void *)&stage); - for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage) - { - args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage); - openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1); - } - } -} - // find corners on matrix and put it into array static void findCorners_caller( const oclMat& eig_mat, //input matrix worth eigenvalues @@ -158,7 +125,8 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero) int cols = all_cols - invalid_cols , elemnum = cols * src.rows; int offset = src.offset / src.elemSize(); - {// first parallel pass + { + // first parallel pass std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data )); @@ -173,7 +141,8 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero) args, -1, -1, "-D T=float -D DEPTH_5"); } - {// run final "serial" kernel to find accumulate results from threads and reset corner counter + { + // run final "serial" kernel to find accumulate results from threads and reset corner counter std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum )); @@ -200,80 +169,54 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, ensureSizeIsEnough(1,1, CV_32SC1, counter_); // find max eigenvalue and reset detected counters - minMaxEig_caller(eig_,eig_minmax_,counter_); + minMaxEig_caller(eig_, eig_minmax_, counter_); // allocate buffer for kernels int corner_array_size = std::max(1024, static_cast(image.size().area() * 0.05)); - - if(!use_cpu_sorter) - { // round to 2^n - unsigned int n=1; - for(n=1;n<(unsigned int)corner_array_size;n<<=1) ; - corner_array_size = (int)n; - - ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_); - - // set to 0 to be able use bitonic sort on whole 2^n array - tmpCorners_.setTo(0); - } - else - { - ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_); - } + ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_); int total = tmpCorners_.cols; // by default the number of corner is full array - std::vector tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm + std::vector tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm - //find points with high eigenvalue and put it into the output array - findCorners_caller( - eig_, - eig_minmax_, - static_cast(qualityLevel), - mask, - tmpCorners_, - counter_); + // find points with high eigenvalue and put it into the output array + findCorners_caller(eig_, eig_minmax_, static_cast(qualityLevel), mask, tmpCorners_, counter_); - if(!use_cpu_sorter) - {// sort detected corners on deivce side - sortCorners_caller(tmpCorners_, corner_array_size); - } - else - {// send non-blocking request to read real non-zero number of corners to sort it on the HOST side - openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL)); - } - - //blocking read whole corners array (sorted or not sorted) - openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner)); + // send non-blocking request to read real non-zero number of corners to sort it on the HOST side + openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0, sizeof(int), &total, 0, NULL, NULL)); if (total == 0) - {// check for trivial case + { + // check for trivial case corners.release(); return; } - if(use_cpu_sorter) - {// sort detected corners on cpu side. - tmp.resize(total); - std::sort(tmp.begin(), tmp.end(), DefCornerCompare()); - } + // blocking read whole corners array (sorted or not sorted) + openCLReadBuffer(tmpCorners_.clCxt, (cl_mem)tmpCorners_.data, &tmp[0], tmpCorners_.cols * sizeof(DefCorner)); - //estimate maximal size of final output array + // sort detected corners on cpu side. + tmp.resize(total); + printf("total: %d\n", total); + std::sort(tmp.begin(), tmp.end(), DefCornerCompare()); + + // estimate maximal size of final output array int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total; int D2 = (int)ceil(minDistance * minDistance); + // allocate output buffer std::vector tmp2; tmp2.reserve(total_max); if (minDistance < 1) - {// we have not distance restriction. then just copy with conversion maximal allowed points into output array - for(int i=0;i0.0f;++i) - { - tmp2.push_back(Point2f(tmp[i].x,tmp[i].y)); - } + { + // we have not distance restriction. then just copy with conversion maximal allowed points into output array + for (int i = 0; i < total_max; ++i) + tmp2.push_back(Point2f(tmp[i].x, tmp[i].y)); } else - {// we have distance restriction. then start coping to output array from the first element and check distance for each next one + { + // we have distance restriction. then start coping to output array from the first element and check distance for each next one const int cell_size = cvRound(minDistance); const int grid_width = (image.cols + cell_size - 1) / cell_size; const int grid_height = (image.rows + cell_size - 1) / cell_size; @@ -283,10 +226,6 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, for (int i = 0; i < total ; ++i) { DefCorner p = tmp[i]; - - if(p.eig<=0.0f) - break; // condition to stop that is needed for GPU bitonic sort usage. - bool good = true; int x_cell = static_cast(p.x / cell_size); @@ -328,9 +267,8 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, if(good) { - grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x,p.y)); - - tmp2.push_back(Point2f(p.x,p.y)); + grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x, p.y)); + tmp2.push_back(Point2f(p.x, p.y)); if (maxCorners > 0 && tmp2.size() == static_cast(maxCorners)) break; @@ -338,12 +276,14 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, } } + int final_size = static_cast(tmp2.size()); - if(final_size>0) + if (final_size > 0) corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0])); else corners.release(); } + void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector &points_v) { CV_DbgAssert(points.type() == CV_32FC2); diff --git a/modules/ocl/src/opencl/imgproc_gftt.cl b/modules/ocl/src/opencl/imgproc_gftt.cl index 4d5356cfbd..9cd57678f2 100644 --- a/modules/ocl/src/opencl/imgproc_gftt.cl +++ b/modules/ocl/src/opencl/imgproc_gftt.cl @@ -46,6 +46,7 @@ #ifndef WITH_MASK #define WITH_MASK 0 #endif + //macro to read eigenvalue matrix #define GET_SRC_32F(_x, _y) ((__global const float*)(eig + (_y)*eig_pitch))[_x] @@ -107,47 +108,6 @@ __kernel #undef GET_SRC_32F -//bitonic sort -__kernel - void sortCorners_bitonicSort - ( - __global float2 * corners, - const int count, - const int stage, - const int passOfStage - ) -{ - const int threadId = get_global_id(0); - if(threadId >= count / 2) - { - return; - } - - const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent - - const int pairDistance = 1 << (stage - passOfStage); - const int blockWidth = 2 * pairDistance; - - const int leftId = min( (threadId % pairDistance) - + (threadId / pairDistance) * blockWidth, count ); - - const int rightId = min( leftId + pairDistance, count ); - - const float2 leftPt = corners[leftId]; - const float2 rightPt = corners[rightId]; - - const float leftVal = leftPt.x; - const float rightVal = rightPt.x; - - const bool compareResult = leftVal > rightVal; - - float2 greater = compareResult ? leftPt:rightPt; - float2 lesser = compareResult ? rightPt:leftPt; - - corners[leftId] = sortOrder ? lesser : greater; - corners[rightId] = sortOrder ? greater : lesser; -} - // this is simple short serial kernel that makes some short reduction and initialization work // it makes HOST like work to avoid additional sync with HOST to do this short work // data - input/output float2. @@ -166,4 +126,4 @@ __kernel void arithm_op_minMax_final(__global float * data, int groupnum,__globa } data[0] = minVal; data[1] = maxVal; -} \ No newline at end of file +}