ocl: pyrUp/pyrDown 3-channel

This commit is contained in:
Alexander Alekhin 2014-02-27 16:25:26 +04:00
parent 702a2a6ff6
commit dca401d4cc
5 changed files with 84 additions and 56 deletions

View File

@ -57,7 +57,7 @@ namespace ocl {
typedef Size_MatType PyrDownFixture;
OCL_PERF_TEST_P(PyrDownFixture, PyrDown,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
@ -81,7 +81,7 @@ OCL_PERF_TEST_P(PyrDownFixture, PyrDown,
typedef Size_MatType PyrUpFixture;
OCL_PERF_TEST_P(PyrUpFixture, PyrUp,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
@ -105,7 +105,7 @@ OCL_PERF_TEST_P(PyrUpFixture, PyrUp,
typedef Size_MatType BuildPyramidFixture;
OCL_PERF_TEST_P(BuildPyramidFixture, BuildPyramid,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);

View File

@ -51,6 +51,16 @@
#endif
#endif
#if cn != 3
#define loadpix(addr) *(__global const T*)(addr)
#define storepix(val, addr) *(__global T*)(addr) = (val)
#define PIXSIZE ((int)sizeof(T))
#else
#define loadpix(addr) vload3(0, (__global const T1*)(addr))
#define storepix(val, addr) vstore3((val), 0, (__global T1*)(addr))
#define PIXSIZE ((int)sizeof(T1)*3)
#endif
#define noconvert
inline int idx_row_low(int y, int last_row)
@ -90,8 +100,8 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
const int y = get_group_id(1);
__local FT smem[256 + 4];
__global T * dstData = (__global T *)(dst + dst_offset);
__global const uchar * srcData = (__global const uchar*)(src + src_offset);
__global uchar * dstData = dst + dst_offset;
__global const uchar * srcData = src + src_offset;
FT sum;
FT co1 = 0.375f;
@ -104,11 +114,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
{
sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[x]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[x]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[x]);
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + x * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + x * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + x * PIXSIZE));
smem[2 + get_local_id(0)] = sum;
@ -116,11 +126,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
{
const int left_x = x - 2;
sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[left_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[left_x]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[left_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[left_x]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[left_x]);
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + left_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + left_x * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + left_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + left_x * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + left_x * PIXSIZE));
smem[get_local_id(0)] = sum;
}
@ -129,11 +139,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
{
const int right_x = x + 2;
sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[right_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[right_x]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[right_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[right_x]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[right_x]);
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + right_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + right_x * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + right_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + right_x * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + right_x * PIXSIZE));
smem[4 + get_local_id(0)] = sum;
}
@ -142,11 +152,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
{
int col = idx_col(x, last_col);
sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]);
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
smem[2 + get_local_id(0)] = sum;
@ -156,11 +166,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
col = idx_col(left_x, last_col);
sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]);
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
smem[get_local_id(0)] = sum;
}
@ -171,11 +181,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
col = idx_col(right_x, last_col);
sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]);
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
smem[4 + get_local_id(0)] = sum;
}
@ -196,7 +206,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dst_cols)
dstData[y * dst_step / ((int)sizeof(T)) + dst_x] = convertToT(sum);
storepix(convertToT(sum), dstData + y * dst_step + dst_x * PIXSIZE);
}
}

View File

@ -58,6 +58,16 @@
#endif
#endif
#if cn != 3
#define loadpix(addr) *(__global const T*)(addr)
#define storepix(val, addr) *(__global T*)(addr) = (val)
#define PIXSIZE ((int)sizeof(T))
#else
#define loadpix(addr) vload3(0, (__global const T1*)(addr))
#define storepix(val, addr) vstore3((val), 0, (__global T1*)(addr))
#define PIXSIZE ((int)sizeof(T1)*3)
#endif
#define noconvert
@ -76,8 +86,8 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in
__local FT s_srcPatch[10][10];
__local FT s_dstPatch[20][16];
__global T * dstData = (__global T *)(dst + dst_offset);
__global const T * srcData = (__global const T *)(src + src_offset);
__global uchar * dstData = dst + dst_offset;
__global const uchar * srcData = src + src_offset;
if( tidx < 10 && tidy < 10 )
{
@ -90,7 +100,7 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in
srcy = abs(srcy);
srcy = min(src_rows - 1, srcy);
s_srcPatch[tidy][tidx] = convertToFT(srcData[srcx + srcy * src_step / (int) sizeof(T)]);
s_srcPatch[tidy][tidx] = convertToFT(loadpix(srcData + srcy * src_step + srcx * PIXSIZE));
}
barrier(CLK_LOCAL_MEM_FENCE);
@ -155,5 +165,5 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
if ((x < dst_cols) && (y < dst_rows))
dstData[x + y * dst_step / (int)sizeof(T)] = convertToT(4.0f * sum);
storepix(convertToT(4.0f * sum), dstData + y * dst_step + x * PIXSIZE);
}

View File

@ -407,7 +407,7 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
if ((channels != 1 && channels != 2 && channels != 4) || borderType != BORDER_DEFAULT)
if (channels > 4 || borderType != BORDER_DEFAULT)
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
@ -426,12 +426,16 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
char cvt[2][50];
ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc,
format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
String buildOptions = format(
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
"-D T1=%s -D cn=%d",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(depth), channels
);
ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
if (k.empty())
return false;
@ -446,7 +450,7 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
if ((channels != 1 && channels != 2 && channels != 4) || borderType != BORDER_DEFAULT)
if (channels > 4 || borderType != BORDER_DEFAULT)
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
@ -464,12 +468,16 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
char cvt[2][50];
ocl::Kernel k("pyrUp", ocl::imgproc::pyr_up_oclsrc,
format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
String buildOptions = format(
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
"-D T1=%s -D cn=%d",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(depth), channels
);
ocl::Kernel k("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions);
if (k.empty())
return false;

View File

@ -108,7 +108,7 @@ OCL_TEST_P(PyrDown, Mat)
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 4),
Values(1, 2, 3, 4),
Bool()
));
@ -133,7 +133,7 @@ OCL_TEST_P(PyrUp, Mat)
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 4),
Values(1, 2, 3, 4),
Bool()
));