mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
A little optimization on ocl/pyrdown, ocl/canny
This commit is contained in:
parent
907a9101eb
commit
1b6639aa3d
@ -66,7 +66,7 @@ namespace cv
|
||||
}
|
||||
}
|
||||
|
||||
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_)
|
||||
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_), counter(NULL)
|
||||
{
|
||||
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
|
||||
|
||||
@ -102,6 +102,10 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
|
||||
|
||||
float counter_f [1] = { 0 };
|
||||
int err = 0;
|
||||
if(counter)
|
||||
{
|
||||
openCLFree(counter);
|
||||
}
|
||||
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
|
||||
openCLSafeCall(err);
|
||||
}
|
||||
@ -322,15 +326,11 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
|
||||
#if CALCMAP_FIXED
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
string kernelName = "calcMap";
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
#else
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
string kernelName = "calcMap_2";
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
#endif
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
//
|
||||
// @Authors
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -118,81 +119,19 @@ uchar4 round_uchar4_float4(float4 v)
|
||||
return round_uchar4_int4(iv);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int idx_row_low(int y, int last_row)
|
||||
{
|
||||
if(y < 0)
|
||||
{
|
||||
y = -y;
|
||||
}
|
||||
return y % (last_row + 1);
|
||||
}
|
||||
|
||||
int idx_row_high(int y, int last_row)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
if(last_row - y < 0)
|
||||
{
|
||||
i = (y - last_row);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = (last_row - y);
|
||||
}
|
||||
if(last_row - i < 0)
|
||||
{
|
||||
j = i - last_row;
|
||||
}
|
||||
else
|
||||
{
|
||||
j = last_row - i;
|
||||
}
|
||||
return j % (last_row + 1);
|
||||
}
|
||||
#define IDX_ROW_HIGH(y,last_row) (abs_diff((int)abs_diff(last_row,y),last_row) % ((last_row)+1))
|
||||
#define IDX_ROW_LOW(y,last_row) (abs(y) % ((last_row) + 1))
|
||||
#define IDX_COL_HIGH(x,last_col) abs_diff((int)abs_diff(x,last_col),last_col)
|
||||
#define IDX_COL_LOW(x,last_col) (abs(x) % ((last_col) + 1))
|
||||
|
||||
int idx_row(int y, int last_row)
|
||||
{
|
||||
return idx_row_low(idx_row_high(y, last_row), last_row);
|
||||
}
|
||||
|
||||
int idx_col_low(int x, int last_col)
|
||||
{
|
||||
if(x < 0)
|
||||
{
|
||||
x = -x;
|
||||
}
|
||||
return x % (last_col + 1);
|
||||
}
|
||||
|
||||
int idx_col_high(int x, int last_col)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
if(last_col - x < 0)
|
||||
{
|
||||
i = (x - last_col);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = (last_col - x);
|
||||
}
|
||||
if(last_col - i < 0)
|
||||
{
|
||||
j = i - last_col;
|
||||
}
|
||||
else
|
||||
{
|
||||
j = last_col - i;
|
||||
}
|
||||
return j % (last_col + 1);
|
||||
return IDX_ROW_LOW(IDX_ROW_HIGH(y,last_row),last_row);
|
||||
}
|
||||
|
||||
int idx_col(int x, int last_col)
|
||||
{
|
||||
return idx_col_low(idx_col_high(x, last_col), last_col);
|
||||
return IDX_COL_LOW(IDX_COL_HIGH(x,last_col),last_col);
|
||||
}
|
||||
|
||||
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols)
|
||||
@ -210,11 +149,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.375f * ((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
@ -224,11 +163,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.375f * ((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
@ -239,11 +178,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.375f * ((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
@ -288,11 +227,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||
sum = sum + co1 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
@ -302,11 +241,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||
sum = sum + co1 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
@ -317,11 +256,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||
sum = sum + co1 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
|
@ -103,6 +103,6 @@ TEST_P(Canny, Accuracy)
|
||||
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ocl_ImgProc, Canny, testing::Combine(
|
||||
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
|
||||
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||
testing::Values(L2gradient(false), L2gradient(true))));
|
||||
|
Loading…
Reference in New Issue
Block a user