mirror of
https://github.com/opencv/opencv.git
synced 2025-06-13 04:52:53 +08:00
301 lines
12 KiB
C++
301 lines
12 KiB
C++
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
//
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
// If you do not agree to this license, do not download, install,
|
|
// copy or use the software.
|
|
//
|
|
//
|
|
// License Agreement
|
|
// For Open Source Computer Vision Library
|
|
//
|
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
//
|
|
// @Authors
|
|
// Peng Xiao, pengxiao@outlook.com
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
// are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
//
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// This software is provided by the copyright holders and contributors as is and
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
//
|
|
//M*/
|
|
#include "precomp.hpp"
|
|
#include "opencl_kernels.hpp"
|
|
|
|
using namespace cv;
|
|
using namespace cv::ocl;
|
|
|
|
// compact structure for corners
|
|
struct DefCorner
|
|
{
|
|
float eig; //eigenvalue of corner
|
|
short x; //x coordinate of corner point
|
|
short y; //y coordinate of corner point
|
|
};
|
|
|
|
// compare procedure for corner
|
|
//it is used for sort on the host side
|
|
struct DefCornerCompare :
|
|
public std::binary_function<DefCorner, DefCorner, bool>
|
|
{
|
|
bool operator()(const DefCorner a, const DefCorner b) const
|
|
{
|
|
return a.eig > b.eig;
|
|
}
|
|
};
|
|
|
|
// find corners on matrix and put it into array
|
|
static void findCorners_caller(
|
|
const oclMat& eig_mat, //input matrix worth eigenvalues
|
|
oclMat& eigMinMax, //input with min and max values of eigenvalues
|
|
const float qualityLevel,
|
|
const oclMat& mask,
|
|
oclMat& corners, //output array with detected corners
|
|
oclMat& counter) //output value with number of detected corners, have to be 0 before call
|
|
{
|
|
String opt;
|
|
std::vector<int> k;
|
|
Context * cxt = Context::getContext();
|
|
|
|
std::vector< std::pair<size_t, const void*> > args;
|
|
|
|
const int mask_strip = mask.step / mask.elemSize1();
|
|
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(eig_mat.data)));
|
|
|
|
int src_pitch = (int)eig_mat.step;
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch ));
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eigMinMax.data ));
|
|
args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols ));
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&counter.data ));
|
|
|
|
size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
|
|
size_t localThreads[3] = {16, 16, 1};
|
|
if(!mask.empty())
|
|
opt += " -D WITH_MASK=1";
|
|
|
|
openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
|
|
}
|
|
|
|
|
|
static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
|
|
{
|
|
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
|
|
CV_Assert(groupnum != 0);
|
|
|
|
int dbsize = groupnum * 2 * src.elemSize();
|
|
|
|
ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
|
|
|
|
cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
|
|
|
|
int all_cols = src.step / src.elemSize();
|
|
int pre_cols = (src.offset % src.step) / src.elemSize();
|
|
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
|
|
int invalid_cols = pre_cols + sec_cols;
|
|
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
|
|
int offset = src.offset / src.elemSize();
|
|
|
|
{
|
|
// first parallel pass
|
|
std::vector<std::pair<size_t , const void *> > args;
|
|
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
|
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
|
|
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
|
|
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
|
|
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
|
|
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
|
|
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
|
|
size_t globalThreads[3] = {groupnum * 256, 1, 1};
|
|
size_t localThreads[3] = {256, 1, 1};
|
|
openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
|
|
args, -1, -1, "-D T=float -D DEPTH_5");
|
|
}
|
|
|
|
{
|
|
// run final "serial" kernel to find accumulate results from threads and reset corner counter
|
|
std::vector<std::pair<size_t , const void *> > args;
|
|
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
|
|
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
|
|
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
|
|
size_t globalThreads[3] = {1, 1, 1};
|
|
size_t localThreads[3] = {1, 1, 1};
|
|
openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
|
|
args, -1, -1);
|
|
}
|
|
}
|
|
|
|
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
|
|
{
|
|
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
|
|
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
|
|
|
|
ensureSizeIsEnough(image.size(), CV_32F, eig_);
|
|
|
|
if (useHarrisDetector)
|
|
cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
|
else
|
|
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
|
|
|
|
ensureSizeIsEnough(1,1, CV_32SC1, counter_);
|
|
|
|
// find max eigenvalue and reset detected counters
|
|
minMaxEig_caller(eig_, eig_minmax_, counter_);
|
|
|
|
// allocate buffer for kernels
|
|
int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
|
|
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
|
|
|
|
int total = tmpCorners_.cols; // by default the number of corner is full array
|
|
std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
|
|
|
|
// find points with high eigenvalue and put it into the output array
|
|
findCorners_caller(eig_, eig_minmax_, static_cast<float>(qualityLevel), mask, tmpCorners_, counter_);
|
|
|
|
// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
|
|
openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0, sizeof(int), &total, 0, NULL, NULL));
|
|
|
|
if (total == 0)
|
|
{
|
|
// check for trivial case
|
|
corners.release();
|
|
return;
|
|
}
|
|
|
|
// blocking read whole corners array (sorted or not sorted)
|
|
openCLReadBuffer(tmpCorners_.clCxt, (cl_mem)tmpCorners_.data, &tmp[0], tmpCorners_.cols * sizeof(DefCorner));
|
|
|
|
// sort detected corners on cpu side.
|
|
tmp.resize(total);
|
|
std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
|
|
|
|
// estimate maximal size of final output array
|
|
int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
|
|
int D2 = (int)ceil(minDistance * minDistance);
|
|
|
|
// allocate output buffer
|
|
std::vector<Point2f> tmp2;
|
|
tmp2.reserve(total_max);
|
|
|
|
|
|
if (minDistance < 1)
|
|
{
|
|
// we have not distance restriction. then just copy with conversion maximal allowed points into output array
|
|
for (int i = 0; i < total_max; ++i)
|
|
tmp2.push_back(Point2f(tmp[i].x, tmp[i].y));
|
|
}
|
|
else
|
|
{
|
|
// we have distance restriction. then start coping to output array from the first element and check distance for each next one
|
|
const int cell_size = cvRound(minDistance);
|
|
const int grid_width = (image.cols + cell_size - 1) / cell_size;
|
|
const int grid_height = (image.rows + cell_size - 1) / cell_size;
|
|
|
|
std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
|
|
|
|
for (int i = 0; i < total ; ++i)
|
|
{
|
|
DefCorner p = tmp[i];
|
|
bool good = true;
|
|
|
|
int x_cell = static_cast<int>(p.x / cell_size);
|
|
int y_cell = static_cast<int>(p.y / cell_size);
|
|
|
|
int x1 = x_cell - 1;
|
|
int y1 = y_cell - 1;
|
|
int x2 = x_cell + 1;
|
|
int y2 = y_cell + 1;
|
|
|
|
// boundary check
|
|
x1 = std::max(0, x1);
|
|
y1 = std::max(0, y1);
|
|
x2 = std::min(grid_width - 1, x2);
|
|
y2 = std::min(grid_height - 1, y2);
|
|
|
|
for (int yy = y1; yy <= y2; yy++)
|
|
{
|
|
for (int xx = x1; xx <= x2; xx++)
|
|
{
|
|
std::vector<Point2i>& m = grid[yy * grid_width + xx];
|
|
if (m.empty())
|
|
continue;
|
|
for(size_t j = 0; j < m.size(); j++)
|
|
{
|
|
int dx = p.x - m[j].x;
|
|
int dy = p.y - m[j].y;
|
|
|
|
if (dx * dx + dy * dy < D2)
|
|
{
|
|
good = false;
|
|
goto break_out_;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
break_out_:
|
|
|
|
if(good)
|
|
{
|
|
grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x, p.y));
|
|
tmp2.push_back(Point2f(p.x, p.y));
|
|
|
|
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
int final_size = static_cast<int>(tmp2.size());
|
|
if (final_size > 0)
|
|
corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
|
|
else
|
|
corners.release();
|
|
}
|
|
|
|
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
|
|
{
|
|
CV_DbgAssert(points.type() == CV_32FC2);
|
|
points_v.resize(points.cols);
|
|
openCLSafeCall(clEnqueueReadBuffer(
|
|
*(cl_command_queue*)getClCommandQueuePtr(),
|
|
reinterpret_cast<cl_mem>(points.data),
|
|
CL_TRUE,
|
|
0,
|
|
points.cols * sizeof(Point2f),
|
|
&points_v[0],
|
|
0,
|
|
NULL,
|
|
NULL));
|
|
}
|