mirror of
https://github.com/opencv/opencv.git
synced 2025-01-19 06:53:50 +08:00
Refactoring, minor optimization
This commit is contained in:
parent
6b6c7ccfea
commit
fee8f29f48
@ -674,63 +674,57 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
|
|||||||
ocl::Device dev = ocl::Device::getDefault();
|
ocl::Device dev = ocl::Device::getDefault();
|
||||||
|
|
||||||
// make list of nonzero points
|
// make list of nonzero points
|
||||||
const int pixelsPerWI = 4;
|
const int pixelsPerWI = 8;
|
||||||
int group_size = (src.cols + pixelsPerWI - 1)/pixelsPerWI;
|
int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI);
|
||||||
ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc,
|
ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc,
|
||||||
format("-D MAKE_POINT_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", group_size, src.cols));
|
format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols));
|
||||||
if (pointListKernel.empty())
|
if (pointListKernel.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
UMat pointsList(1, (int) src.total(), CV_32SC1);
|
UMat pointsList(1, (int) src.total(), CV_32SC1);
|
||||||
UMat total(1, 1, CV_32SC1, Scalar::all(0));
|
UMat counters(1, 2, CV_32SC1, Scalar::all(0));
|
||||||
pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList),
|
pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList),
|
||||||
ocl::KernelArg::PtrWriteOnly(total));
|
ocl::KernelArg::PtrWriteOnly(counters));
|
||||||
size_t localThreads[2] = { group_size, 1 };
|
size_t localThreads[2] = { workgroup_size, 1 };
|
||||||
size_t globalThreads[2] = { group_size, src.rows };
|
size_t globalThreads[2] = { workgroup_size, src.rows };
|
||||||
|
|
||||||
if (!pointListKernel.run(2, globalThreads, localThreads, false))
|
if (!pointListKernel.run(2, globalThreads, localThreads, false))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int total_points = total.getMat(ACCESS_READ).at<int>(0, 0);
|
int total_points = counters.getMat(ACCESS_READ).at<int>(0, 0);
|
||||||
if (total_points <= 0)
|
if (total_points <= 0)
|
||||||
{
|
{
|
||||||
_lines.assign(UMat(0,0,CV_32FC2));
|
_lines.assign(UMat(0,0,CV_32FC2));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert src to hough space
|
// convert src image to hough space
|
||||||
group_size = min((int) dev.maxWorkGroupSize(), total_points);
|
|
||||||
int local_memory_needed = (numrho + 2)*sizeof(int);
|
|
||||||
ocl::Kernel fillAccumKernel;
|
|
||||||
globalThreads[0] = group_size; globalThreads[1] = numangle;
|
|
||||||
size_t* fillAccumLT = NULL;
|
|
||||||
|
|
||||||
UMat accum(numangle + 2, numrho + 2, CV_32SC1);
|
UMat accum(numangle + 2, numrho + 2, CV_32SC1);
|
||||||
|
workgroup_size = min((int) dev.maxWorkGroupSize(), total_points);
|
||||||
|
ocl::Kernel fillAccumKernel;
|
||||||
|
size_t* fillAccumLT = NULL;
|
||||||
|
int local_memory_needed = (numrho + 2)*sizeof(int);
|
||||||
if (local_memory_needed > dev.localMemSize())
|
if (local_memory_needed > dev.localMemSize())
|
||||||
{
|
{
|
||||||
|
accum.setTo(Scalar::all(0));
|
||||||
fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc,
|
fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc,
|
||||||
format("-D FILL_ACCUM_GLOBAL"));
|
format("-D FILL_ACCUM_GLOBAL"));
|
||||||
accum.setTo(Scalar::all(0));
|
globalThreads[0] = workgroup_size; globalThreads[1] = numangle;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc,
|
fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc,
|
||||||
format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", group_size, numrho + 2));
|
format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", workgroup_size, numrho + 2));
|
||||||
localThreads[0] = group_size; localThreads[1] = 1;
|
localThreads[0] = workgroup_size; localThreads[1] = 1;
|
||||||
|
globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2;
|
||||||
fillAccumLT = localThreads;
|
fillAccumLT = localThreads;
|
||||||
}
|
}
|
||||||
if (fillAccumKernel.empty())
|
if (fillAccumKernel.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int linesMax = min(total_points*numangle/threshold, 4096);
|
|
||||||
UMat lines(linesMax, 1, CV_32FC2);
|
|
||||||
UMat lines_count(1, 1, CV_32SC1, Scalar::all(0));
|
|
||||||
|
|
||||||
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
|
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
|
||||||
total_points, irho, (float) theta, numrho, numangle);
|
total_points, irho, (float) theta, numrho, numangle);
|
||||||
|
|
||||||
|
|
||||||
if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false))
|
if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -739,15 +733,18 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
|
|||||||
if (getLinesKernel.empty())
|
if (getLinesKernel.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
globalThreads[0] = numrho; globalThreads[1] = numangle;
|
// TODO: investigate other strategies to choose linesMax
|
||||||
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines),
|
int linesMax = min(total_points*numangle/threshold, 4096);
|
||||||
ocl::KernelArg::PtrWriteOnly(lines_count), linesMax, threshold, (float) rho, (float) theta);
|
UMat lines(linesMax, 1, CV_32FC2);
|
||||||
|
|
||||||
|
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines),
|
||||||
|
ocl::KernelArg::PtrWriteOnly(counters), linesMax, threshold, (float) rho, (float) theta);
|
||||||
|
|
||||||
|
globalThreads[0] = numrho; globalThreads[1] = numangle;
|
||||||
if (!getLinesKernel.run(2, globalThreads, NULL, false))
|
if (!getLinesKernel.run(2, globalThreads, NULL, false))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
||||||
int total_lines = min(lines_count.getMat(ACCESS_READ).at<int>(0, 0), linesMax);
|
int total_lines = min(counters.getMat(ACCESS_READ).at<int>(0, 1), linesMax);
|
||||||
if (total_lines > 0)
|
if (total_lines > 0)
|
||||||
_lines.assign(lines.rowRange(Range(0, total_lines)));
|
_lines.assign(lines.rowRange(Range(0, total_lines)));
|
||||||
else
|
else
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
// Copyright (C) 2014, Itseez, Inc., all rights reserved.
|
// Copyright (C) 2014, Itseez, Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
#ifdef MAKE_POINT_LIST
|
#ifdef MAKE_POINTS_LIST
|
||||||
|
|
||||||
__kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar * list_ptr, int list_step, int list_offset, __global int* global_offset)
|
__global uchar * list_ptr, int list_step, int list_offset, __global int* global_offset)
|
||||||
@ -54,7 +54,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int
|
|||||||
|
|
||||||
__kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset,
|
__kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset,
|
||||||
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
||||||
int count, float irho, float theta, int numrho, int numangle)
|
int total_points, float irho, float theta, int numrho, int numangle)
|
||||||
{
|
{
|
||||||
int theta_idx = get_global_id(1);
|
int theta_idx = get_global_id(1);
|
||||||
int count_idx = get_global_id(0);
|
int count_idx = get_global_id(0);
|
||||||
@ -70,7 +70,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step,
|
|||||||
|
|
||||||
if (theta_idx < numangle)
|
if (theta_idx < numangle)
|
||||||
{
|
{
|
||||||
for (int i = count_idx; i < count; i += glob_size)
|
for (int i = count_idx; i < total_points; i += glob_size)
|
||||||
{
|
{
|
||||||
const int val = list[i];
|
const int val = list[i];
|
||||||
const int x = (val & 0xFFFF);
|
const int x = (val & 0xFFFF);
|
||||||
@ -86,43 +86,50 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step,
|
|||||||
|
|
||||||
__kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset,
|
__kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset,
|
||||||
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
||||||
int count, float irho, float theta, int numrho, int numangle)
|
int total_points, float irho, float theta, int numrho, int numangle)
|
||||||
{
|
{
|
||||||
int theta_idx = get_global_id(1);
|
int theta_idx = get_global_id(1);
|
||||||
int count_idx = get_local_id(0);
|
int count_idx = get_local_id(0);
|
||||||
|
|
||||||
float cosVal;
|
if (theta_idx > 0 && theta_idx < numangle + 1)
|
||||||
float sinVal = sincos(theta * ((float)theta_idx), &cosVal);
|
|
||||||
sinVal *= irho;
|
|
||||||
cosVal *= irho;
|
|
||||||
|
|
||||||
__local int l_accum[BUFFER_SIZE];
|
|
||||||
for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
|
|
||||||
l_accum[i] = 0;
|
|
||||||
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
__global const int * list = (__global const int*)(list_ptr + list_offset);
|
|
||||||
const int shift = (numrho - 1) / 2;
|
|
||||||
|
|
||||||
if (theta_idx < numangle)
|
|
||||||
{
|
{
|
||||||
for (int i = count_idx; i < count; i += LOCAL_SIZE)
|
float cosVal;
|
||||||
|
float sinVal = sincos(theta * (float) (theta_idx-1), &cosVal);
|
||||||
|
sinVal *= irho;
|
||||||
|
cosVal *= irho;
|
||||||
|
|
||||||
|
__local int l_accum[BUFFER_SIZE];
|
||||||
|
for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
|
||||||
|
l_accum[i] = 0;
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
__global const int * list = (__global const int*)(list_ptr + list_offset);
|
||||||
|
const int shift = (numrho - 1) / 2;
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = count_idx; i < total_points; i += LOCAL_SIZE)
|
||||||
{
|
{
|
||||||
const int val = list[i];
|
const int point = list[i];
|
||||||
const int x = (val & 0xFFFF);
|
const int x = (point & 0xFFFF);
|
||||||
const int y = (val >> 16) & 0xFFFF;
|
const int y = (point >> 16) & 0xFFFF;
|
||||||
|
|
||||||
int r = convert_int_rte(x * cosVal + y * sinVal) + shift;
|
int r = convert_int_rte(x * cosVal + y * sinVal) + shift;
|
||||||
atomic_inc(l_accum + r + 1);
|
atomic_inc(l_accum + r + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
__global int* accum = (__global int*)(accum_ptr + mad24(theta_idx, accum_step, accum_offset));
|
||||||
|
for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
|
||||||
|
accum[i] = l_accum[i];
|
||||||
|
}
|
||||||
|
else if (theta_idx < numangle + 2)
|
||||||
|
{
|
||||||
|
__global int* accum = (__global int*)(accum_ptr + mad24(theta_idx, accum_step, accum_offset));
|
||||||
|
for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
|
||||||
|
accum[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
__global int* accum = (__global int*)(accum_ptr + mad24(theta_idx + 1, accum_step, accum_offset));
|
|
||||||
for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
|
|
||||||
accum[i] = l_accum[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined GET_LINES
|
#elif defined GET_LINES
|
||||||
@ -130,7 +137,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
|
|||||||
#define ACCUM(ptr) *((__global int*)(ptr))
|
#define ACCUM(ptr) *((__global int*)(ptr))
|
||||||
|
|
||||||
__kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
__kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
||||||
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index,
|
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
|
||||||
int linesMax, int threshold, float rho, float theta)
|
int linesMax, int threshold, float rho, float theta)
|
||||||
{
|
{
|
||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
@ -140,6 +147,7 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of
|
|||||||
{
|
{
|
||||||
__global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
|
__global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
|
||||||
__global float2* lines = (__global float2*)(lines_ptr + lines_offset);
|
__global float2* lines = (__global float2*)(lines_ptr + lines_offset);
|
||||||
|
__global int* lines_index = lines_index_ptr + 1;
|
||||||
|
|
||||||
int curVote = ACCUM(accum);
|
int curVote = ACCUM(accum);
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int)
|
|||||||
|
|
||||||
virtual void generateTestData()
|
virtual void generateTestData()
|
||||||
{
|
{
|
||||||
src_size = randomSize(500, 1000);
|
src_size = randomSize(500, 1920);
|
||||||
src.create(src_size, CV_8UC1);
|
src.create(src_size, CV_8UC1);
|
||||||
src.setTo(Scalar::all(0));
|
src.setTo(Scalar::all(0));
|
||||||
line(src, Point(0, 100), Point(100, 100), Scalar::all(255), 1);
|
line(src, Point(0, 100), Point(100, 100), Scalar::all(255), 1);
|
||||||
@ -101,7 +101,7 @@ OCL_TEST_P(HoughLines, GeneratedImage)
|
|||||||
OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold));
|
OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold));
|
||||||
OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold));
|
OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold));
|
||||||
|
|
||||||
//Near(1e-5);
|
Near(1e-5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user