Added HoughLinesP OCL implementation

This commit is contained in:
Alexander Karsakov 2014-09-09 12:50:12 +04:00
parent 39b27a19be
commit eaf5a163b1
3 changed files with 314 additions and 31 deletions

View File

@ -652,6 +652,9 @@ HoughLinesProbabilistic( Mat& image,
}
}
}
#ifdef HAVE_OPENCL
static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters)
{
UMat src = _src.getUMat();
@ -660,16 +663,16 @@ static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOu
UMat counters = _counters.getUMat();
ocl::Device dev = ocl::Device::getDefault();
const int pixelsPerWI = 16;
int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI);
ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc,
const int pixPerWI = 16;
int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixPerWI - 1)/pixPerWI);
ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc,
format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols));
if (pointListKernel.empty())
return false;
pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList),
ocl::KernelArg::PtrWriteOnly(counters));
size_t localThreads[2] = { workgroup_size, 1 };
size_t globalThreads[2] = { workgroup_size, src.rows };
@ -685,12 +688,12 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_
float irho = (float) (1 / rho);
int workgroup_size = min((int) dev.maxWorkGroupSize(), total_points);
ocl::Kernel fillAccumKernel;
size_t localThreads[2];
size_t globalThreads[2];
int local_memory_needed = (numrho + 2)*sizeof(int);
size_t local_memory_needed = (numrho + 2)*sizeof(int);
if (local_memory_needed > dev.localMemSize())
{
accum.setTo(Scalar::all(0));
@ -717,7 +720,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_
}
}
static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double min_theta, double max_theta)
{
CV_Assert(_src.type() == CV_8UC1);
@ -732,7 +735,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
UMat src = _src.getUMat();
int numangle = cvRound((max_theta - min_theta) / theta);
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
UMat pointsList;
UMat counters(1, 2, CV_32SC1, Scalar::all(0));
@ -766,7 +769,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
size_t globalThreads[2] = { (numrho + pixPerWI - 1)/pixPerWI, numangle };
if (!getLinesKernel.run(2, globalThreads, NULL, false))
return false;
int total_lines = min(counters.getMat(ACCESS_READ).at<int>(0, 1), linesMax);
if (total_lines > 0)
_lines.assign(lines.rowRange(Range(0, total_lines)));
@ -775,23 +778,67 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
return true;
}
static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double minLineLength, double maxGap)
{
CV_Assert(_src.type() == CV_8UC1);
UMat src = _src.getUMat();
return false;
UMat src = _src.getUMat();
int numangle = cvRound(CV_PI / theta);
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
UMat pointsList;
UMat counters(1, 2, CV_32SC1, Scalar::all(0));
if (!ocl_makePointsList(src, pointsList, counters))
return false;
int total_points = counters.getMat(ACCESS_READ).at<int>(0, 0);
if (total_points <= 0)
{
_lines.assign(UMat(0,0,CV_32SC4));
return true;
}
UMat accum;
if (!ocl_fillAccum(pointsList, accum, total_points, rho, theta, numrho, numangle))
return false;
ocl::Kernel getLinesKernel("get_lines", ocl::imgproc::hough_lines_oclsrc,
format("-D GET_LINES_PROBABOLISTIC"));
if (getLinesKernel.empty())
return false;
// TODO: investigate other strategies to choose linesMax
int linesMax = min(total_points*numangle/threshold, 4096);
UMat lines(linesMax, 1, CV_32SC4);
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::WriteOnlyNoSize(lines), ocl::KernelArg::PtrWriteOnly(counters),
linesMax, threshold, (int) minLineLength, (int) maxGap, (float) rho, (float) theta);
size_t globalThreads[2] = { numrho, numangle };
if (!getLinesKernel.run(2, globalThreads, NULL, false))
return false;
int total_lines = min(counters.getMat(ACCESS_READ).at<int>(0, 1), linesMax);
if (total_lines > 0)
_lines.assign(lines.rowRange(Range(0, total_lines)));
else
_lines.assign(UMat(0,0,CV_32SC4));
return true;
}
#endif /* HAVE_OPENCL */
}
void cv::HoughLines( InputArray _image, OutputArray _lines,
double rho, double theta, int threshold,
double srn, double stn, double min_theta, double max_theta )
{
CV_OCL_RUN(srn == 0 && stn == 0 && _image.isUMat() && _lines.isUMat(),
CV_OCL_RUN(srn == 0 && stn == 0 && _image.isUMat() && _lines.isUMat(),
ocl_HoughLines(_image, _lines, rho, theta, threshold, min_theta, max_theta));
Mat image = _image.getMat();
@ -810,7 +857,8 @@ void cv::HoughLinesP(InputArray _image, OutputArray _lines,
double rho, double theta, int threshold,
double minLineLength, double maxGap )
{
CV_OCL_RUN(_image.isUMat() && _lines.isUMat(), ocl_HoughLinesP(_image, _lines, rho, theta, threshold, minLineLength, maxGap));
CV_OCL_RUN(_image.isUMat() && _lines.isUMat(),
ocl_HoughLinesP(_image, _lines, rho, theta, threshold, minLineLength, maxGap));
Mat image = _image.getMat();
std::vector<Vec4i> lines;

View File

@ -12,7 +12,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int
{
int x = get_local_id(0);
int y = get_group_id(1);
__local int l_index, l_offset;
__local int l_points[LOCAL_SIZE];
__global const uchar * src = src_ptr + mad24(y, src_step, src_offset);
@ -37,12 +37,12 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int
}
barrier(CLK_LOCAL_MEM_FENCE);
if (x == 0)
l_offset = atomic_add(global_offset, l_index);
barrier(CLK_LOCAL_MEM_FENCE);
list += l_offset;
for (int i=x; i < l_index; i+=GROUP_SIZE)
{
@ -53,8 +53,8 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int
#elif defined FILL_ACCUM_GLOBAL
__kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset,
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
int total_points, float irho, float theta, int numrho, int numangle)
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
int total_points, float irho, float theta, int numrho, int numangle)
{
int theta_idx = get_global_id(1);
int count_idx = get_global_id(0);
@ -90,7 +90,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
{
int theta_idx = get_group_id(1);
int count_idx = get_local_id(0);
if (theta_idx > 0 && theta_idx < numangle + 1)
{
float cosVal;
@ -136,7 +136,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
#define ACCUM(ptr) *((__global int*)(ptr))
__kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
int linesMax, int threshold, float rho, float theta)
{
int x0 = get_global_id(0);
@ -148,7 +148,7 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of
__global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x0+1, (int) sizeof(int), accum_offset));
__global float2* lines = (__global float2*)(lines_ptr + lines_offset);
__global int* lines_index = lines_index_ptr + 1;
for (int x=x0; x<accum_cols-2; x+=gl_size)
{
int curVote = ACCUM(accum);
@ -172,4 +172,169 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of
}
}
#elif GET_LINES_PROBABOLISTIC
#define ACCUM(ptr) *((__global int*)(ptr))
__kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
int linesMax, int threshold, int lineLength, int lineGap, float rho, float theta)
{
int x = get_global_id(0);
int y = get_global_id(1);
__global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
__global int4* lines = (__global int4*)(lines_ptr + lines_offset);
__global int* lines_index = lines_index_ptr + 1;
int curVote = ACCUM(accum);
if (curVote >= threshold &&
curVote > ACCUM(accum - accum_step - sizeof(int)) &&
curVote > ACCUM(accum - accum_step) &&
curVote > ACCUM(accum - accum_step + sizeof(int)) &&
curVote > ACCUM(accum - sizeof(int)) &&
curVote > ACCUM(accum + sizeof(int)) &&
curVote > ACCUM(accum + accum_step - sizeof(int)) &&
curVote > ACCUM(accum + accum_step) &&
curVote > ACCUM(accum + accum_step + sizeof(int)))
{
const float radius = (x - (accum_cols - 2 - 1) * 0.5f) * rho;
const float angle = y * theta;
float cosa;
float sina = sincos(angle, &cosa);
float2 p0 = (float2)(cosa * radius, sina * radius);
float2 dir = (float2)(-sina, cosa);
float2 pb[4] = { (float2)(-1, -1), (float2)(-1, -1), (float2)(-1, -1), (float2)(-1, -1) };
float a;
if (dir.x != 0)
{
a = -p0.x / dir.x;
pb[0].x = 0;
pb[0].y = p0.y + a * dir.y;
a = (src_cols - 1 - p0.x) / dir.x;
pb[1].x = src_cols - 1;
pb[1].y = p0.y + a * dir.y;
}
if (dir.y != 0)
{
a = -p0.y / dir.y;
pb[2].x = p0.x + a * dir.x;
pb[2].y = 0;
a = (src_rows - 1 - p0.y) / dir.y;
pb[3].x = p0.x + a * dir.x;
pb[3].y = src_rows - 1;
}
if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < src_rows))
{
p0 = pb[0];
if (dir.x < 0)
dir = -dir;
}
else if (pb[1].x == src_cols - 1 && (pb[0].y >= 0 && pb[0].y < src_rows))
{
p0 = pb[1];
if (dir.x > 0)
dir = -dir;
}
else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < src_cols))
{
p0 = pb[2];
if (dir.y < 0)
dir = -dir;
}
else if (pb[3].y == src_rows - 1 && (pb[3].x >= 0 && pb[3].x < src_cols))
{
p0 = pb[3];
if (dir.y > 0)
dir = -dir;
}
float2 d;
if (fabs(dir.x) > fabs(dir.y))
{
d.x = dir.x > 0 ? 1 : -1;
d.y = dir.y / fabs(dir.x);
}
else
{
d.x = dir.x / fabs(dir.y);
d.y = dir.y > 0 ? 1 : -1;
}
float2 line_end[2];
int gap;
bool inLine = false;
float2 p1 = p0;
if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows)
return;
for (;;)
{
if (*(src_ptr + mad24(p1.y, src_step, p1.x + src_offset)))
{
gap = 0;
if (!inLine)
{
line_end[0] = p1;
line_end[1] = p1;
inLine = true;
}
else
{
line_end[1] = p1;
}
}
else if (inLine)
{
if (++gap > lineGap)
{
bool good_line = fabs(line_end[1].x - line_end[0].x) >= lineLength ||
fabs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
int index = atomic_inc(lines_index);
if (index < linesMax)
lines[index] = (int4)(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
gap = 0;
inLine = false;
}
}
p1 = p1 + d;
if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows)
{
if (inLine)
{
bool good_line = fabs(line_end[1].x - line_end[0].x) >= lineLength ||
fabs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
int index = atomic_inc(lines_index);
if (index < linesMax)
lines[index] = (int4)(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
}
break;
}
}
}
}
#endif

View File

@ -15,17 +15,18 @@ namespace ocl {
struct Vec2fComparator
{
bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const
bool operator()(const Vec2f& a, const Vec2f b) const
{
if(a[0] != b[0]) return a[0] < b[0];
else return a[1] < b[1];
}
};
PARAM_TEST_CASE(HoughLinesTestBase, double, double, int)
/////////////////////////////// HoughLines ////////////////////////////////////
PARAM_TEST_CASE(HoughLines, double, double, int)
{
double rhoStep;
double thetaStep;
double rhoStep, thetaStep;
int threshold;
Size src_size;
@ -50,7 +51,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int)
line(src, Point(100, 0), Point(100, 200), Scalar::all(255), 1);
line(src, Point(200, 0), Point(200, 200), Scalar::all(255), 1);
line(src, Point(400, 0), Point(400, 200), Scalar::all(255), 1);
src.copyTo(usrc);
}
@ -65,7 +66,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int)
virtual void Near(double eps = 0.)
{
EXPECT_EQ(dst.size(), udst.size());
if (dst.total() > 0)
{
Mat lines_cpu, lines_gpu;
@ -80,8 +81,6 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int)
}
};
typedef HoughLinesTestBase HoughLines;
OCL_TEST_P(HoughLines, RealImage)
{
readRealTestData();
@ -105,10 +104,81 @@ OCL_TEST_P(HoughLines, GeneratedImage)
}
}
/////////////////////////////// HoughLinesP ///////////////////////////////////
PARAM_TEST_CASE(HoughLinesP, int, double, double)
{
double rhoStep, thetaStep, minLineLength, maxGap;
int threshold;
Size src_size;
Mat src, dst;
UMat usrc, udst;
virtual void SetUp()
{
rhoStep = 1.0;
thetaStep = CV_PI / 180;
threshold = GET_PARAM(0);
minLineLength = GET_PARAM(1);
maxGap = GET_PARAM(2);
}
virtual void readRealTestData()
{
Mat img = readImage("shared/pic5.png", IMREAD_GRAYSCALE);
Canny(img, src, 50, 200, 3);
src.copyTo(usrc);
}
virtual void Near(double eps = 0.)
{
Mat lines_gpu = udst.getMat(ACCESS_READ);
if (dst.total() > 0 && lines_gpu.total() > 0)
{
Mat result_cpu(src.size(), CV_8UC1, Scalar::all(0));
Mat result_gpu(src.size(), CV_8UC1, Scalar::all(0));
MatConstIterator_<Vec4i> it = dst.begin<Vec4i>(), end = dst.end<Vec4i>();
for ( ; it != end; it++)
{
Vec4i p = *it;
line(result_cpu, Point(p[0], p[1]), Point(p[2], p[3]), Scalar(255));
}
it = lines_gpu.begin<Vec4i>(), end = lines_gpu.end<Vec4i>();
for ( ; it != end; it++)
{
Vec4i p = *it;
line(result_gpu, Point(p[0], p[1]), Point(p[2], p[3]), Scalar(255));
}
EXPECT_MAT_SIMILAR(result_cpu, result_gpu, eps);
}
}
};
OCL_TEST_P(HoughLinesP, RealImage)
{
readRealTestData();
OCL_OFF(cv::HoughLinesP(src, dst, rhoStep, thetaStep, threshold, minLineLength, maxGap));
OCL_ON(cv::HoughLinesP(usrc, udst, rhoStep, thetaStep, threshold, minLineLength, maxGap));
Near(0.2);
}
OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep
Values(CV_PI / 180.0, CV_PI / 360.0), // thetaStep
Values(80, 150))); // threshold
OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLinesP, Combine(Values(100, 150), // threshold
Values(50, 100), // minLineLength
Values(5, 10))); // maxLineGap
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL