/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; static bool use_cpu_sorter = true; namespace { enum SortMethod { CPU_STL, BITONIC, SELECTION }; const int GROUP_SIZE = 256; template struct Sorter { //typedef EigType; }; //TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed. template<> struct Sorter { typedef oclMat EigType; static cv::Mutex cs; static Mat mat_eig; //prototype static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2) { float v1 = mat_eig.at(cvRound(pt1.s[1]), cvRound(pt1.s[0])); float v2 = mat_eig.at(cvRound(pt2.s[1]), cvRound(pt2.s[0])); return v1 > v2; } static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) { cv::AutoLock lock(cs); //temporarily use STL's sort function Mat mat_corners = corners; mat_eig = eig_tex; std::sort(mat_corners.begin(), mat_corners.begin() + count, clfloat2Gt); corners = mat_corners; } }; cv::Mutex Sorter::cs; cv::Mat Sorter::mat_eig; template<> struct Sorter { typedef TextureCL EigType; static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) { Context * cxt = Context::getContext(); size_t globalThreads[3] = {count / 2, 1, 1}; size_t localThreads[3] = {GROUP_SIZE, 1, 1}; // 2^numStages should be equal to count or the output is invalid int numStages = 0; for(int i = count; i > 1; i >>= 1) { ++numStages; } const int argc = 5; std::vector< std::pair > args(argc); String kernelname = "sortCorners_bitonicSort"; args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex); args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data); args[2] = std::make_pair(sizeof(cl_int), (void *)&count); for(int stage = 0; stage < numStages; ++stage) { args[3] = std::make_pair(sizeof(cl_int), (void *)&stage); for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage) { args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage); openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1); } } } }; template<> struct Sorter { typedef TextureCL EigType; static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) { Context * cxt = Context::getContext(); size_t globalThreads[3] = {count, 1, 1}; size_t localThreads[3] = {GROUP_SIZE, 1, 1}; std::vector< std::pair > args; //local String kernelname = "sortCorners_selectionSortLocal"; int lds_size = GROUP_SIZE * sizeof(cl_float2); args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) ); args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) ); args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) ); args.push_back( std::make_pair( lds_size, (void*)NULL) ); openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1); //final kernelname = "sortCorners_selectionSortFinal"; args.pop_back(); openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1); } }; int findCorners_caller( const TextureCL& eig, const float threshold, const oclMat& mask, oclMat& corners, const int max_count) { std::vector k; Context * cxt = Context::getContext(); std::vector< std::pair > args; String kernelname = "findCorners"; const int mask_strip = mask.step / mask.elemSize1(); oclMat g_counter(1, 1, CV_32SC1); g_counter.setTo(0); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip)); args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data )); size_t globalThreads[3] = {eig.cols, eig.rows, 1}; size_t localThreads[3] = {16, 16, 1}; const char * opt = mask.empty() ? "" : "-D WITH_MASK"; openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt); return std::min(Mat(g_counter).at(0), max_count); } }//unnamed namespace void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask) { CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); ensureSizeIsEnough(image.size(), CV_32F, eig_); if (useHarrisDetector) cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK); else cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); double maxVal = 0; minMax(eig_, NULL, &maxVal); ensureSizeIsEnough(1, std::max(1000, static_cast(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); Ptr eig_tex = bindTexturePtr(eig_); int total = findCorners_caller( *eig_tex, static_cast(maxVal * qualityLevel), mask, tmpCorners_, tmpCorners_.cols); if (total == 0) { corners.release(); return; } if(use_cpu_sorter) { Sorter::sortCorners_caller(eig_, tmpCorners_, total); } else { //if total is power of 2 if(((total - 1) & (total)) == 0) { Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); } else { Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); } } if (minDistance < 1) { Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1); tmpCorners_(roi_range).copyTo(corners); } else { std::vector tmp(total); downloadPoints(tmpCorners_, tmp); std::vector tmp2; tmp2.reserve(total); const int cell_size = cvRound(minDistance); const int grid_width = (image.cols + cell_size - 1) / cell_size; const int grid_height = (image.rows + cell_size - 1) / cell_size; std::vector< std::vector > grid(grid_width * grid_height); for (int i = 0; i < total; ++i) { Point2f p = tmp[i]; bool good = true; int x_cell = static_cast(p.x / cell_size); int y_cell = static_cast(p.y / cell_size); int x1 = x_cell - 1; int y1 = y_cell - 1; int x2 = x_cell + 1; int y2 = y_cell + 1; // boundary check x1 = std::max(0, x1); y1 = std::max(0, y1); x2 = std::min(grid_width - 1, x2); y2 = std::min(grid_height - 1, y2); for (int yy = y1; yy <= y2; yy++) { for (int xx = x1; xx <= x2; xx++) { std::vector& m = grid[yy * grid_width + xx]; if (!m.empty()) { for(size_t j = 0; j < m.size(); j++) { float dx = p.x - m[j].x; float dy = p.y - m[j].y; if (dx * dx + dy * dy < minDistance * minDistance) { good = false; goto break_out; } } } } } break_out: if(good) { grid[y_cell * grid_width + x_cell].push_back(p); tmp2.push_back(p); if (maxCorners > 0 && tmp2.size() == static_cast(maxCorners)) break; } } corners.upload(Mat(1, static_cast(tmp2.size()), CV_32FC2, &tmp2[0])); } } void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector &points_v) { CV_DbgAssert(points.type() == CV_32FC2); points_v.resize(points.cols); openCLSafeCall(clEnqueueReadBuffer( *(cl_command_queue*)getClCommandQueuePtr(), reinterpret_cast(points.data), CL_TRUE, 0, points.cols * sizeof(Point2f), &points_v[0], 0, NULL, NULL)); }