mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
Fix 2.4 ocl Canny.
This fix is a workaround for current 2.4 branch without introducing an additional oclMat buffer into CannyBuf object. Test case is cleaned up. Volatile keywords in kernels are removed for performance concern.
This commit is contained in:
parent
2ccdf56119
commit
d015fa76fa
@ -87,7 +87,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
|
||||
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
|
||||
}
|
||||
}
|
||||
ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
|
||||
ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
|
||||
|
||||
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
|
||||
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
|
||||
@ -141,13 +141,16 @@ namespace
|
||||
void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
|
||||
{
|
||||
using namespace ::cv::ocl::canny;
|
||||
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
||||
oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
|
||||
oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
|
||||
|
||||
edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
|
||||
calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
||||
|
||||
edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
|
||||
edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
|
||||
|
||||
getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);
|
||||
edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
|
||||
|
||||
getEdges_gpu(mapBuf, dst, dst.rows, dst.cols);
|
||||
}
|
||||
}
|
||||
|
||||
@ -172,18 +175,20 @@ void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_th
|
||||
buf.create(src.size(), apperture_size);
|
||||
buf.edgeBuf.setTo(Scalar::all(0));
|
||||
|
||||
oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
|
||||
|
||||
if (apperture_size == 3)
|
||||
{
|
||||
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
|
||||
|
||||
calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
|
||||
calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf.filterDX->apply(src, buf.dx);
|
||||
buf.filterDY->apply(src, buf.dy);
|
||||
|
||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
|
||||
calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
|
||||
}
|
||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
||||
}
|
||||
@ -209,7 +214,10 @@ void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &d
|
||||
buf.dy = dy;
|
||||
buf.create(dx.size(), -1);
|
||||
buf.edgeBuf.setTo(Scalar::all(0));
|
||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
|
||||
|
||||
oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
|
||||
|
||||
calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient);
|
||||
|
||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
||||
}
|
||||
@ -234,7 +242,7 @@ void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_b
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
|
||||
@ -264,12 +272,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
const char * build_options = L2Grad ? "-D L2GRAD":"";
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
}
|
||||
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
|
||||
{
|
||||
@ -292,12 +296,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, i
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
const char * build_options = L2Grad ? "-D L2GRAD":"";
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
}
|
||||
|
||||
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
|
||||
@ -328,7 +328,7 @@ void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int ro
|
||||
string kernelName = "calcMap";
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
|
||||
@ -348,7 +348,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
|
||||
@ -378,7 +378,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
|
||||
std::swap(st1, st2);
|
||||
}
|
||||
@ -403,5 +403,5 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
@ -297,6 +297,9 @@ calcMap
|
||||
map_step /= sizeof(*map);
|
||||
map_offset /= sizeof(*map);
|
||||
|
||||
mag += mag_offset;
|
||||
map += map_offset;
|
||||
|
||||
__local float smem[18][18];
|
||||
|
||||
int gidx = get_global_id(0);
|
||||
@ -389,7 +392,7 @@ edgesHysteresisLocal
|
||||
(
|
||||
__global int * map,
|
||||
__global ushort2 * st,
|
||||
volatile __global unsigned int * counter,
|
||||
__global unsigned int * counter,
|
||||
int rows,
|
||||
int cols,
|
||||
int map_step,
|
||||
@ -399,6 +402,8 @@ edgesHysteresisLocal
|
||||
map_step /= sizeof(*map);
|
||||
map_offset /= sizeof(*map);
|
||||
|
||||
map += map_offset;
|
||||
|
||||
__local int smem[18][18];
|
||||
|
||||
int gidx = get_global_id(0);
|
||||
@ -416,12 +421,12 @@ edgesHysteresisLocal
|
||||
if(ly < 14)
|
||||
{
|
||||
smem[ly][lx] =
|
||||
map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
|
||||
map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
|
||||
}
|
||||
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
|
||||
{
|
||||
smem[ly + 14][lx] =
|
||||
map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
|
||||
map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -482,14 +487,17 @@ edgesHysteresisLocal
|
||||
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
||||
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
||||
|
||||
|
||||
#define stack_size 512
|
||||
__kernel
|
||||
void edgesHysteresisGlobal
|
||||
void
|
||||
__attribute__((reqd_work_group_size(128,1,1)))
|
||||
edgesHysteresisGlobal
|
||||
(
|
||||
__global int * map,
|
||||
__global ushort2 * st1,
|
||||
__global ushort2 * st2,
|
||||
volatile __global int * counter,
|
||||
__global int * counter,
|
||||
int rows,
|
||||
int cols,
|
||||
int count,
|
||||
@ -501,6 +509,8 @@ void edgesHysteresisGlobal
|
||||
map_step /= sizeof(*map);
|
||||
map_offset /= sizeof(*map);
|
||||
|
||||
map += map_offset;
|
||||
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
@ -510,7 +520,7 @@ void edgesHysteresisGlobal
|
||||
int grp_idx = get_group_id(0);
|
||||
int grp_idy = get_group_id(1);
|
||||
|
||||
volatile __local unsigned int s_counter;
|
||||
__local unsigned int s_counter;
|
||||
__local unsigned int s_ind;
|
||||
|
||||
__local ushort2 s_st[stack_size];
|
||||
@ -564,9 +574,9 @@ void edgesHysteresisGlobal
|
||||
pos.x += c_dx[lidx & 7];
|
||||
pos.y += c_dy[lidx & 7];
|
||||
|
||||
if (map[pos.x + map_offset + pos.y * map_step] == 1)
|
||||
if (map[pos.x + pos.y * map_step] == 1)
|
||||
{
|
||||
map[pos.x + map_offset + pos.y * map_step] = 2;
|
||||
map[pos.x + pos.y * map_step] = 2;
|
||||
|
||||
ind = atomic_inc(&s_counter);
|
||||
|
||||
@ -621,6 +631,6 @@ void getEdges
|
||||
|
||||
if(gidy < rows && gidx < cols)
|
||||
{
|
||||
dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
|
||||
dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +45,6 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
#ifdef HAVE_OPENCL
|
||||
#define SHOW_RESULT 0
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Canny
|
||||
@ -59,13 +58,10 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient)
|
||||
bool useL2gradient;
|
||||
|
||||
cv::Mat edges_gold;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
virtual void SetUp()
|
||||
{
|
||||
apperture_size = GET_PARAM(0);
|
||||
useL2gradient = GET_PARAM(1);
|
||||
//int devnums = getDevice(oclinfo);
|
||||
//CV_Assert(devnums > 0);
|
||||
}
|
||||
};
|
||||
|
||||
@ -77,32 +73,18 @@ TEST_P(Canny, Accuracy)
|
||||
double low_thresh = 50.0;
|
||||
double high_thresh = 100.0;
|
||||
|
||||
cv::resize(img, img, cv::Size(512, 384));
|
||||
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
|
||||
|
||||
cv::ocl::oclMat edges;
|
||||
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||
|
||||
char filename [100];
|
||||
sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
|
||||
|
||||
cv::Mat edges_gold;
|
||||
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||
|
||||
#if SHOW_RESULT
|
||||
cv::Mat edges_x2, ocl_edges(edges);
|
||||
edges_x2.create(edges.rows, edges.cols * 2, edges.type());
|
||||
edges_x2.setTo(0);
|
||||
cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
|
||||
cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
|
||||
cv::namedWindow("Canny result (left: cpu, right: ocl)");
|
||||
cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
|
||||
cv::waitKey();
|
||||
#endif //OUTPUT_RESULT
|
||||
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
|
||||
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||
testing::Values(L2gradient(false), L2gradient(true))));
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user