Merge pull request #925 from pengx17:2.4_canny_tmp_fix

This commit is contained in:
Vadim Pisarevsky 2013-05-30 12:01:08 +04:00 committed by OpenCV Buildbot
commit 678371be39
3 changed files with 45 additions and 53 deletions

View File

@ -87,7 +87,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
}
ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
@ -141,13 +141,16 @@ namespace
void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
{
using namespace ::cv::ocl::canny;
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);
edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
getEdges_gpu(mapBuf, dst, dst.rows, dst.cols);
}
}
@ -172,18 +175,20 @@ void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_th
buf.create(src.size(), apperture_size);
buf.edgeBuf.setTo(Scalar::all(0));
oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
if (apperture_size == 3)
{
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
else
{
buf.filterDX->apply(src, buf.dx);
buf.filterDY->apply(src, buf.dy);
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
@ -209,7 +214,10 @@ void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &d
buf.dy = dy;
buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0));
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
@ -234,7 +242,7 @@ void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_b
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
@ -264,12 +272,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
char build_options [15] = "";
if(L2Grad)
{
strcat(build_options, "-D L2GRAD");
}
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
const char * build_options = L2Grad ? "-D L2GRAD":"";
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
{
@ -292,12 +296,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, i
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
char build_options [15] = "";
if(L2Grad)
{
strcat(build_options, "-D L2GRAD");
}
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
const char * build_options = L2Grad ? "-D L2GRAD":"";
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
@ -328,7 +328,7 @@ void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int ro
string kernelName = "calcMap";
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
@ -348,7 +348,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
@ -378,7 +378,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
@ -403,5 +403,5 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}

View File

@ -297,6 +297,9 @@ calcMap
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
mag += mag_offset;
map += map_offset;
__local float smem[18][18];
int gidx = get_global_id(0);
@ -389,7 +392,7 @@ edgesHysteresisLocal
(
__global int * map,
__global ushort2 * st,
volatile __global unsigned int * counter,
__global unsigned int * counter,
int rows,
int cols,
int map_step,
@ -399,6 +402,8 @@ edgesHysteresisLocal
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
map += map_offset;
__local int smem[18][18];
int gidx = get_global_id(0);
@ -416,12 +421,12 @@ edgesHysteresisLocal
if(ly < 14)
{
smem[ly][lx] =
map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
}
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
{
smem[ly + 14][lx] =
map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
}
barrier(CLK_LOCAL_MEM_FENCE);
@ -482,14 +487,17 @@ edgesHysteresisLocal
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
#define stack_size 512
__kernel
void edgesHysteresisGlobal
void
__attribute__((reqd_work_group_size(128,1,1)))
edgesHysteresisGlobal
(
__global int * map,
__global ushort2 * st1,
__global ushort2 * st2,
volatile __global int * counter,
__global int * counter,
int rows,
int cols,
int count,
@ -501,6 +509,8 @@ void edgesHysteresisGlobal
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
map += map_offset;
int gidx = get_global_id(0);
int gidy = get_global_id(1);
@ -510,7 +520,7 @@ void edgesHysteresisGlobal
int grp_idx = get_group_id(0);
int grp_idy = get_group_id(1);
volatile __local unsigned int s_counter;
__local unsigned int s_counter;
__local unsigned int s_ind;
__local ushort2 s_st[stack_size];
@ -564,9 +574,9 @@ void edgesHysteresisGlobal
pos.x += c_dx[lidx & 7];
pos.y += c_dy[lidx & 7];
if (map[pos.x + map_offset + pos.y * map_step] == 1)
if (map[pos.x + pos.y * map_step] == 1)
{
map[pos.x + map_offset + pos.y * map_step] = 2;
map[pos.x + pos.y * map_step] = 2;
ind = atomic_inc(&s_counter);
@ -621,6 +631,6 @@ void getEdges
if(gidy < rows && gidx < cols)
{
dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
}
}

View File

@ -45,7 +45,6 @@
#include "precomp.hpp"
#ifdef HAVE_OPENCL
#define SHOW_RESULT 0
////////////////////////////////////////////////////////
// Canny
@ -59,13 +58,10 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient)
bool useL2gradient;
cv::Mat edges_gold;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
@ -77,32 +73,18 @@ TEST_P(Canny, Accuracy)
double low_thresh = 50.0;
double high_thresh = 100.0;
cv::resize(img, img, cv::Size(512, 384));
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
cv::ocl::oclMat edges;
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
char filename [100];
sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
cv::Mat edges_gold;
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
#if SHOW_RESULT
cv::Mat edges_x2, ocl_edges(edges);
edges_x2.create(edges.rows, edges.cols * 2, edges.type());
edges_x2.setTo(0);
cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
cv::namedWindow("Canny result (left: cpu, right: ocl)");
cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
cv::waitKey();
#endif //OUTPUT_RESULT
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif