ported cv::goodFeaturesToTrack to T-API

2025-08-06 06:26:29 +08:00 · 2014-01-17 19:00:52 +04:00 · 2014-01-17 19:00:52 +04:00 · 52ed6d0d27
commit 52ed6d0d27
parent 6b8bee6e0b
6 changed files with 439 additions and 187 deletions
--- a/modules/imgproc/src/featureselect.cpp
+++ b/modules/imgproc/src/featureselect.cpp
@ -38,18 +38,179 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
+
 #include "precomp.hpp"
+#include "opencl_kernels.hpp"
+
 #include <cstdio>
 #include <vector>
+#include <iostream>

 namespace cv
 {

-template<typename T> struct greaterThanPtr
+struct greaterThanPtr :
+        public std::binary_function<const float *, const float *, bool>
 {
-    bool operator()(const T* a, const T* b) const { return *a > *b; }
+    bool operator () (const float * a, const float * b) const
+    { return *a > *b; }
 };

+struct Corner
+{
+    float val;
+    short y;
+    short x;
+
+    bool operator < (const Corner & c) const
+    {  return val > c.val; }
+};
+
+static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
+                                     int maxCorners, double qualityLevel, double minDistance,
+                                     InputArray _mask, int blockSize,
+                                     bool useHarrisDetector, double harrisK )
+{
+    UMat eig, tmp;
+    if( useHarrisDetector )
+        cornerHarris( _image, eig, blockSize, 3, harrisK );
+    else
+        cornerMinEigenVal( _image, eig, blockSize, 3 );
+
+    double maxVal = 0;
+    minMaxLoc( eig, NULL, &maxVal, NULL, NULL, _mask );
+    threshold( eig, eig, maxVal*qualityLevel, 0, THRESH_TOZERO );
+    dilate( eig, tmp, Mat());
+
+    Size imgsize = _image.size();
+    std::vector<Corner> tmpCorners;
+    size_t total, i, j, ncorners = 0, possibleCornersCount =
+            std::max(1024, static_cast<int>(imgsize.area() * 0.1));
+    bool haveMask = !_mask.empty();
+
+    // collect list of pointers to features - put them into temporary image
+    {
+        ocl::Kernel k("findCorners", ocl::imgproc::gftt_oclsrc,
+                      format(haveMask ? "-D HAVE_MASK" : ""));
+        if (k.empty())
+            return false;
+
+        UMat counter(1, 1, CV_32SC1, Scalar::all(0)),
+                corners(1, possibleCornersCount * sizeof(Corner), CV_8UC1);
+        ocl::KernelArg eigarg = ocl::KernelArg::ReadOnlyNoSize(eig),
+                tmparg = ocl::KernelArg::ReadOnlyNoSize(tmp),
+                cornersarg = ocl::KernelArg::PtrWriteOnly(corners),
+                counterarg = ocl::KernelArg::PtrReadWrite(counter);
+
+        if (!haveMask)
+            k.args(eigarg, tmparg, cornersarg, counterarg,
+                   imgsize.height - 2, imgsize.width - 2);
+        else
+        {
+            UMat mask = _mask.getUMat();
+            k.args(eigarg, ocl::KernelArg::ReadOnlyNoSize(mask), tmparg,
+                   cornersarg, counterarg, imgsize.height - 2, imgsize.width - 2);
+        }
+
+        size_t globalsize[2] = { imgsize.width - 2, imgsize.height - 2 };
+        if (!k.run(2, globalsize, NULL, false))
+            return false;
+
+        total = counter.getMat(ACCESS_READ).at<int>(0, 0);
+        size_t totalb = sizeof(Corner) * total;
+
+        tmpCorners.resize(total);
+        Mat mcorners(1, totalb, CV_8UC1, &tmpCorners[0]);
+        corners.colRange(0, totalb).copyTo(mcorners);
+    }
+
+    std::sort( tmpCorners.begin(), tmpCorners.end() );
+    std::vector<Point2f> corners;
+    corners.reserve(total);
+
+    if (minDistance >= 1)
+    {
+         // Partition the image into larger grids
+        int w = imgsize.width, h = imgsize.height;
+
+        const int cell_size = cvRound(minDistance);
+        const int grid_width = (w + cell_size - 1) / cell_size;
+        const int grid_height = (h + cell_size - 1) / cell_size;
+
+        std::vector<std::vector<Point2f> > grid(grid_width*grid_height);
+        minDistance *= minDistance;
+
+        for( i = 0; i < total; i++ )
+        {
+            const Corner & c = tmpCorners[i];
+            bool good = true;
+
+            int x_cell = c.x / cell_size;
+            int y_cell = c.y / cell_size;
+
+            int x1 = x_cell - 1;
+            int y1 = y_cell - 1;
+            int x2 = x_cell + 1;
+            int y2 = y_cell + 1;
+
+            // boundary check
+            x1 = std::max(0, x1);
+            y1 = std::max(0, y1);
+            x2 = std::min(grid_width-1, x2);
+            y2 = std::min(grid_height-1, y2);
+
+            for( int yy = y1; yy <= y2; yy++ )
+                for( int xx = x1; xx <= x2; xx++ )
+                {
+                    std::vector<Point2f> &m = grid[yy*grid_width + xx];
+
+                    if( m.size() )
+                    {
+                        for(j = 0; j < m.size(); j++)
+                        {
+                            float dx = c.x - m[j].x;
+                            float dy = c.y - m[j].y;
+
+                            if( dx*dx + dy*dy < minDistance )
+                            {
+                                good = false;
+                                goto break_out;
+                            }
+                        }
+                    }
+                }
+
+            break_out:
+
+            if (good)
+            {
+                grid[y_cell*grid_width + x_cell].push_back(Point2f((float)c.x, (float)c.y));
+
+                corners.push_back(Point2f((float)c.x, (float)c.y));
+                ++ncorners;
+
+                if( maxCorners > 0 && (int)ncorners == maxCorners )
+                    break;
+            }
+        }
+    }
+    else
+    {
+        for( i = 0; i < total; i++ )
+        {
+            const Corner & c = tmpCorners[i];
+
+            corners.push_back(Point2f((float)c.x, (float)c.y));
+            ++ncorners;
+            if( maxCorners > 0 && (int)ncorners == maxCorners )
+                break;
+        }
+    }
+
+    Mat(corners).convertTo(_corners, _corners.fixedType() ? _corners.type() : CV_32F);
+    return true;
+}
+
 }

 void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners,
@ -57,27 +218,32 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners,
                              InputArray _mask, int blockSize,
                              bool useHarrisDetector, double harrisK )
 {
-    Mat image = _image.getMat(), mask = _mask.getMat();
-
    CV_Assert( qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0 );
-    CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
+    CV_Assert( _mask.empty() || (_mask.type() == CV_8UC1 && _mask.sameSize(_image)) );

-    Mat eig, tmp;
+    if (ocl::useOpenCL() && _image.dims() <= 2 && _image.isUMat())
+    {
+        CV_Assert(ocl_goodFeaturesToTrack(_image, _corners, maxCorners, qualityLevel, minDistance,
+                                          _mask, blockSize, useHarrisDetector, harrisK));
+        return;
+    }
+
+    Mat image = _image.getMat(), eig, tmp;
    if( useHarrisDetector )
        cornerHarris( image, eig, blockSize, 3, harrisK );
    else
        cornerMinEigenVal( image, eig, blockSize, 3 );

    double maxVal = 0;
-    minMaxLoc( eig, 0, &maxVal, 0, 0, mask );
+    minMaxLoc( eig, 0, &maxVal, 0, 0, _mask );
    threshold( eig, eig, maxVal*qualityLevel, 0, THRESH_TOZERO );
    dilate( eig, tmp, Mat());

    Size imgsize = image.size();
-
    std::vector<const float*> tmpCorners;

    // collect list of pointers to features - put them into temporary image
+    Mat mask = _mask.getMat();
    for( int y = 1; y < imgsize.height - 1; y++ )
    {
        const float* eig_data = (const float*)eig.ptr(y);
@ -92,11 +258,11 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners,
        }
    }

-    std::sort( tmpCorners.begin(), tmpCorners.end(), greaterThanPtr<float>() );
+    std::sort( tmpCorners.begin(), tmpCorners.end(), greaterThanPtr() );
    std::vector<Point2f> corners;
    size_t i, j, total = tmpCorners.size(), ncorners = 0;

-    if(minDistance >= 1)
+    if (minDistance >= 1)
    {
         // Partition the image into larger grids
        int w = image.cols;
@ -133,7 +299,6 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners,
            y2 = std::min(grid_height-1, y2);

            for( int yy = y1; yy <= y2; yy++ )
-            {
                for( int xx = x1; xx <= x2; xx++ )
                {
                    std::vector <Point2f> &m = grid[yy*grid_width + xx];
@ -153,14 +318,11 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners,
                        }
                    }
                }
-            }

            break_out:

-            if(good)
+            if (good)
            {
-                // printf("%d: %d %d -> %d %d, %d, %d -- %d %d %d %d, %d %d, c=%d\n",
-                //    i,x, y, x_cell, y_cell, (int)minDistance, cell_size,x1,y1,x2,y2, grid_width,grid_height,c);
                grid[y_cell*grid_width + x_cell].push_back(Point2f((float)x, (float)y));

                corners.push_back(Point2f((float)x, (float)y));
@ -187,33 +349,6 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners,
    }

    Mat(corners).convertTo(_corners, _corners.fixedType() ? _corners.type() : CV_32F);
-
-    /*
-    for( i = 0; i < total; i++ )
-    {
-        int ofs = (int)((const uchar*)tmpCorners[i] - eig.data);
-        int y = (int)(ofs / eig.step);
-        int x = (int)((ofs - y*eig.step)/sizeof(float));
-
-        if( minDistance > 0 )
-        {
-            for( j = 0; j < ncorners; j++ )
-            {
-                float dx = x - corners[j].x;
-                float dy = y - corners[j].y;
-                if( dx*dx + dy*dy < minDistance )
-                    break;
-            }
-            if( j < ncorners )
-                continue;
-        }
-
-        corners.push_back(Point2f((float)x, (float)y));
-        ++ncorners;
-        if( maxCorners > 0 && (int)ncorners == maxCorners )
-            break;
-    }
-*/
 }

 CV_IMPL void
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@ -1404,10 +1404,10 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
    int src_type = _src.type(), dst_type = _dst.type(),
        src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);

-    bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
-        _src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) &&
+    bool useOpenCL = cv::ocl::useOpenCL() && _dst.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
+        _src.dims() <= 2 && (src_cn == 1 || src_cn == 4) && anchor.x == -1 && anchor.y == -1 &&
        (src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
-        (borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue()) &&
+        borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
        (op == MORPH_ERODE || op == MORPH_DILATE);

    Mat kernel = _kernel.getMat();
@ -1423,10 +1423,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,

    if( iterations == 0 || kernel.rows*kernel.cols == 1 )
    {
-        Mat src = _src.getMat();
-        _dst.create( src.size(), src.type() );
-        Mat dst = _dst.getMat();
-        src.copyTo(dst);
+        _src.copyTo(_dst);
        return;
    }

--- a/modules/imgproc/src/opencl/gftt.cl
+++ b/modules/imgproc/src/opencl/gftt.cl
@ -0,0 +1,81 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Zhang Ying, zhangying913@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void findCorners(__global const uchar * eigptr, int eig_step, int eig_offset,
+#ifdef HAVE_MASK
+                          __global const uchar * mask, int mask_step, int mask_offset,
+#endif
+                          __global const uchar * tmpptr, int tmp_step, int tmp_offset,
+                          __global uchar * cornersptr, __global int * counter,
+                          int rows, int cols)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < cols && y < rows)
+    {
+        ++x, ++y;
+
+        int eig_index = mad24(y, eig_step, eig_offset + x * (int)sizeof(float));
+        int tmp_index = mad24(y, tmp_step, tmp_offset + x * (int)sizeof(float));
+#ifdef HAVE_MASK
+        int mask_index = mad24(y, mask_step, mask_offset + x);
+        mask += mask_index;
+#endif
+
+        float val = *(__global const float *)(eigptr + eig_index);
+        float tmp = *(__global const float *)(tmpptr + tmp_index);
+
+        if (val != 0 && val == tmp
+#ifdef HAVE_MASK
+            && mask[0] != 0
+#endif
+            )
+        {
+            __global float2 * corners = (cornersptr + (int)sizeof(float2) * atomic_inc(counter));
+            corners[0] = (float2)(val, as_float( (x<<16) | y ));
+        }
+    }
+}
--- a/modules/imgproc/test/ocl/test_gftt.cpp
+++ b/modules/imgproc/test/ocl/test_gftt.cpp
@ -0,0 +1,139 @@
+///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+#include "opencv2/ts/ocl_test.hpp"
+
+#ifdef HAVE_OPENCL
+
+namespace cvtest {
+namespace ocl {
+
+//////////////////////////// GoodFeaturesToTrack //////////////////////////
+
+
+PARAM_TEST_CASE(GoodFeaturesToTrack, double, bool)
+{
+    double minDistance;
+    bool useRoi;
+
+    static const int maxCorners;
+    static const double qualityLevel;
+
+    TEST_DECLARE_INPUT_PARAMETER(src)
+    UMat points, upoints;
+
+    virtual void SetUp()
+    {
+        minDistance = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
+    }
+
+    void generateTestData()
+    {
+        Mat frame = readImage("../gpu/opticalflow/rubberwhale1.png", IMREAD_GRAYSCALE);
+        ASSERT_FALSE(frame.empty()) << "could not load gpu/opticalflow/rubberwhale1.png";
+
+        Size roiSize = frame.size();
+        Border srcBorder = randomBorder(0, useRoi ? 2 : 0);
+        randomSubMat(src, src_roi, roiSize, srcBorder, frame.type(), 5, 256);
+        src_roi.copyTo(frame);
+
+        UMAT_UPLOAD_INPUT_PARAMETER(src)
+    }
+};
+
+const int GoodFeaturesToTrack::maxCorners = 1000;
+const double GoodFeaturesToTrack::qualityLevel = 0.01;
+
+OCL_TEST_P(GoodFeaturesToTrack, Accuracy)
+{
+    for (int j = 0; j < test_loop_times; ++j)
+    {
+        generateTestData();
+
+        std::vector<Point2f> upts, pts;
+
+        OCL_OFF(cv::goodFeaturesToTrack(src_roi, points, maxCorners, qualityLevel, minDistance, noArray()));
+        ASSERT_FALSE(points.empty());
+        pts.resize(points.cols);
+        points.copyTo(pts);
+
+        OCL_ON(cv::goodFeaturesToTrack(usrc_roi, upoints, maxCorners, qualityLevel, minDistance));
+        ASSERT_FALSE(upoints.empty());
+        upts.resize(upoints.cols);
+        upoints.copyTo(upts);
+
+        ASSERT_EQ(upts.size(), pts.size());
+
+        int mistmatch = 0;
+        for (size_t i = 0; i < pts.size(); ++i)
+        {
+            Point2i a = upts[i], b = pts[i];
+            bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
+
+            if (!eq)
+                ++mistmatch;
+        }
+
+        double bad_ratio = static_cast<double>(mistmatch) / pts.size();
+        ASSERT_GE(1e-3, bad_ratio);
+    }
+}
+
+OCL_TEST_P(GoodFeaturesToTrack, EmptyCorners)
+{
+    generateTestData();
+    usrc_roi.setTo(Scalar::all(0));
+
+    OCL_ON(cv::goodFeaturesToTrack(usrc_roi, upoints, maxCorners, qualityLevel, minDistance));
+
+    ASSERT_TRUE(upoints.empty());
+}
+
+OCL_INSTANTIATE_TEST_CASE_P(Imgproc, GoodFeaturesToTrack,
+                            ::testing::Combine(testing::Values(0.0, 3.0), Bool()));
+
+} } // namespace cvtest::ocl
+
+#endif
--- a/modules/ocl/src/gftt.cpp
+++ b/modules/ocl/src/gftt.cpp
@ -48,20 +48,18 @@
 using namespace cv;
 using namespace cv::ocl;

-// currently sort procedure on the host is more efficient
-static bool use_cpu_sorter = true;
-
 // compact structure for corners
 struct DefCorner
 {
    float eig;  //eigenvalue of corner
    short x;    //x coordinate of corner point
    short y;    //y coordinate of corner point
-} ;
+};

 // compare procedure for corner
 //it is used for sort on the host side
-struct DefCornerCompare
+struct DefCornerCompare :
+        public std::binary_function<DefCorner, DefCorner, bool>
 {
    bool operator()(const DefCorner a, const DefCorner b) const
    {
@ -69,37 +67,6 @@ struct DefCornerCompare
    }
 };

-// sort corner point using opencl bitonicosrt implementation
-static void sortCorners_caller(oclMat& corners, const int count)
-{
-    Context * cxt = Context::getContext();
-    int     GS = count/2;
-    int     LS = min(255,GS);
-    size_t  globalThreads[3] = {GS, 1, 1};
-    size_t  localThreads[3]  = {LS, 1, 1};
-
-    // 2^numStages should be equal to count or the output is invalid
-    int numStages = 0;
-    for(int i = count; i > 1; i >>= 1)
-    {
-        ++numStages;
-    }
-    const int argc = 4;
-    std::vector< std::pair<size_t, const void *> > args(argc);
-    std::string kernelname = "sortCorners_bitonicSort";
-    args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
-    args[1] = std::make_pair(sizeof(cl_int), (void *)&count);
-    for(int stage = 0; stage < numStages; ++stage)
-    {
-        args[2] = std::make_pair(sizeof(cl_int), (void *)&stage);
-        for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
-        {
-            args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
-            openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
-        }
-    }
-}
-
 // find corners on matrix and put it into array
 static void findCorners_caller(
    const oclMat&   eig_mat,        //input matrix worth eigenvalues
@ -158,7 +125,8 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
    int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
    int offset = src.offset / src.elemSize();

-    {// first parallel pass
+    {
+        // first parallel pass
        std::vector<std::pair<size_t , const void *> > args;
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
@ -173,7 +141,8 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
                            args, -1, -1, "-D T=float -D DEPTH_5");
    }

-    {// run final "serial" kernel to find accumulate results from threads and reset corner counter
+    {
+        // run final "serial" kernel to find accumulate results from threads and reset corner counter
        std::vector<std::pair<size_t , const void *> > args;
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
@ -200,80 +169,54 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
    ensureSizeIsEnough(1,1, CV_32SC1, counter_);

    // find max eigenvalue and reset detected counters
-    minMaxEig_caller(eig_,eig_minmax_,counter_);
+    minMaxEig_caller(eig_, eig_minmax_, counter_);

    // allocate buffer for kernels
    int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
-
-    if(!use_cpu_sorter)
-    {   // round to 2^n
-        unsigned int n=1;
-        for(n=1;n<(unsigned int)corner_array_size;n<<=1) ;
-        corner_array_size = (int)n;
-
-        ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
-
-        // set to 0 to be able use bitonic sort on whole 2^n array
-        tmpCorners_.setTo(0);
-    }
-    else
-    {
-        ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
-    }
+    ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);

    int total = tmpCorners_.cols; // by default the number of corner is full array
-    std::vector<DefCorner>   tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
+    std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm

-    //find points with high eigenvalue and put it into the output array
-    findCorners_caller(
-        eig_,
-        eig_minmax_,
-        static_cast<float>(qualityLevel),
-        mask,
-        tmpCorners_,
-        counter_);
+    // find points with high eigenvalue and put it into the output array
+    findCorners_caller(eig_, eig_minmax_, static_cast<float>(qualityLevel), mask, tmpCorners_, counter_);

-    if(!use_cpu_sorter)
-    {// sort detected corners on deivce side
-        sortCorners_caller(tmpCorners_, corner_array_size);
-    }
-    else
-    {// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
-        openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL));
-    }
-
-    //blocking read whole corners array (sorted or not sorted)
-    openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner));
+    // send non-blocking request to read real non-zero number of corners to sort it on the HOST side
+    openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0, sizeof(int), &total, 0, NULL, NULL));

    if (total == 0)
-    {// check for trivial case
+    {
+        // check for trivial case
        corners.release();
        return;
    }

-    if(use_cpu_sorter)
-    {// sort detected corners on cpu side.
-        tmp.resize(total);
-        std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
-    }
+    // blocking read whole corners array (sorted or not sorted)
+    openCLReadBuffer(tmpCorners_.clCxt, (cl_mem)tmpCorners_.data, &tmp[0], tmpCorners_.cols * sizeof(DefCorner));

-    //estimate maximal size of final output array
+    // sort detected corners on cpu side.
+    tmp.resize(total);
+    printf("total: %d\n", total);
+    std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
+
+    // estimate maximal size of final output array
    int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
    int D2 = (int)ceil(minDistance * minDistance);
+
    // allocate output buffer
    std::vector<Point2f> tmp2;
    tmp2.reserve(total_max);


    if (minDistance < 1)
-    {// we have not distance restriction. then just copy with conversion maximal allowed points into output array
-        for(int i=0;i<total_max && tmp[i].eig>0.0f;++i)
-        {
-            tmp2.push_back(Point2f(tmp[i].x,tmp[i].y));
-        }
+    {
+        // we have not distance restriction. then just copy with conversion maximal allowed points into output array
+        for (int i = 0; i < total_max; ++i)
+            tmp2.push_back(Point2f(tmp[i].x, tmp[i].y));
    }
    else
-    {// we have distance restriction. then start coping to output array from the first element and check distance for each next one
+    {
+        // we have distance restriction. then start coping to output array from the first element and check distance for each next one
        const int cell_size = cvRound(minDistance);
        const int grid_width = (image.cols + cell_size - 1) / cell_size;
        const int grid_height = (image.rows + cell_size - 1) / cell_size;
@ -283,10 +226,6 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
        for (int i = 0; i < total ; ++i)
        {
            DefCorner p = tmp[i];
-
-            if(p.eig<=0.0f)
-                break; // condition to stop that is needed for GPU bitonic sort usage.
-
            bool good = true;

            int x_cell = static_cast<int>(p.x / cell_size);
@ -328,9 +267,8 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,

            if(good)
            {
-                grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x,p.y));
-
-                tmp2.push_back(Point2f(p.x,p.y));
+                grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x, p.y));
+                tmp2.push_back(Point2f(p.x, p.y));

                if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
                    break;
@ -338,12 +276,14 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
        }

    }
+
    int final_size = static_cast<int>(tmp2.size());
-    if(final_size>0)
+    if (final_size > 0)
        corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
    else
        corners.release();
 }
+
 void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
 {
    CV_DbgAssert(points.type() == CV_32FC2);
--- a/modules/ocl/src/opencl/imgproc_gftt.cl
+++ b/modules/ocl/src/opencl/imgproc_gftt.cl
@ -46,6 +46,7 @@
 #ifndef WITH_MASK
 #define WITH_MASK 0
 #endif
+
 //macro to read eigenvalue matrix
 #define GET_SRC_32F(_x, _y) ((__global const float*)(eig + (_y)*eig_pitch))[_x]

@ -107,47 +108,6 @@ __kernel
 #undef GET_SRC_32F


-//bitonic sort
-__kernel
-    void sortCorners_bitonicSort
-    (
-        __global float2 * corners,
-        const int count,
-        const int stage,
-        const int passOfStage
-    )
-{
-    const int threadId = get_global_id(0);
-    if(threadId >= count / 2)
-    {
-        return;
-    }
-
-    const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
-
-    const int pairDistance = 1 << (stage - passOfStage);
-    const int blockWidth   = 2 * pairDistance;
-
-    const int leftId = min( (threadId % pairDistance)
-                   + (threadId / pairDistance) * blockWidth, count );
-
-    const int rightId = min( leftId + pairDistance, count );
-
-    const float2 leftPt  = corners[leftId];
-    const float2 rightPt = corners[rightId];
-
-    const float leftVal  = leftPt.x;
-    const float rightVal = rightPt.x;
-
-    const bool compareResult = leftVal > rightVal;
-
-    float2 greater = compareResult ? leftPt:rightPt;
-    float2 lesser  = compareResult ? rightPt:leftPt;
-
-    corners[leftId]  = sortOrder ? lesser : greater;
-    corners[rightId] = sortOrder ? greater : lesser;
-}
-
 // this is simple short serial kernel that makes some short reduction and initialization work
 // it makes HOST like work to avoid additional sync with HOST to do this short work
 // data - input/output float2.
@ -166,4 +126,4 @@ __kernel void arithm_op_minMax_final(__global float * data, int groupnum,__globa
    }
    data[0] = minVal;
    data[1] = maxVal;
-}
+}