Merge pull request #1760 from ilya-lavrenov:ocl_remap_nn

This commit is contained in:
Roman Donchenko 2013-11-07 17:29:16 +04:00 committed by OpenCV Buildbot
commit 07db81a4bb
4 changed files with 55 additions and 11 deletions

View File

@ -195,9 +195,14 @@ namespace cv
return;
}
if (map1.empty())
map1.swap(map2);
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) ||
/*|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (interpolation == INTER_NEAREST &&
(map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) )) ||
(map1.type() == CV_32FC2 && !map2.data) ||
(map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
CV_Assert(!map2.data || map2.size() == map1.size());
CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP
@ -212,10 +217,14 @@ namespace cv
"BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
string kernelName = "remap";
if ( map1.type() == CV_32FC2 && !map2.data )
if (map1.type() == CV_32FC2 && map2.empty())
kernelName += "_32FC2";
else if (map1.type() == CV_16SC2 && !map2.data)
else if (map1.type() == CV_16SC2)
{
kernelName += "_16SC2";
if (!map2.empty())
kernelName += "_16UC1";
}
else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
kernelName += "_2_32FC1";
else
@ -232,9 +241,6 @@ namespace cv
if (interpolation != INTER_NEAREST)
{
int wdepth = std::max(CV_32F, dst.depth());
if (!supportsDouble)
wdepth = std::min(CV_32F, wdepth);
buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
" -D convertToWT2=convert_%s2 -D WT2=%s2",
typeMap[wdepth], channelMap[ocn],

View File

@ -51,6 +51,13 @@
#endif
#endif
enum
{
INTER_BITS = 5,
INTER_TAB_SIZE = 1 << INTER_BITS,
INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE
};
#ifdef INTER_NEAREST
#define convertToWT
#endif
@ -204,6 +211,36 @@ __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __g
}
}
__kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * dst, __global short2 * map1, __global ushort * map2,
int src_offset, int dst_offset, int map1_offset, int map2_offset,
int src_step, int dst_step, int map1_step, int map2_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int dstIdx = mad24(y, dst_step, x + dst_offset);
int map1Idx = mad24(y, map1_step, x + map1_offset);
int map2Idx = mad24(y, map2_step, x + map2_offset);
int map2Value = convert_int(map2[map2Idx]) & (INTER_TAB_SIZE2 - 1);
int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
int2 gxy = convert_int2(map1[map1Idx]) + (int2)(dx, dy);
int gx = gxy.x, gy = gxy.y;
if (NEED_EXTRAPOLATION(gx, gy))
EXTRAPOLATE(gxy, dst[dstIdx])
else
{
int srcIdx = mad24(gy, src_step, gx + src_offset);
dst[dstIdx] = src[srcIdx];
}
}
}
#elif INTER_LINEAR
__kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
@ -229,7 +266,7 @@ __kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
float2 _u = map_data - convert_float2(map_dataA);
WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
WT scalar = convertToWT(nVal);
WT a = scalar, b = scalar, c = scalar, d = scalar;
@ -282,7 +319,7 @@ __kernel void remap_32FC2(__global T const * restrict src, __global T * dst,
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
float2 _u = map_data - convert_float2(map_dataA);
WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
WT scalar = convertToWT(nVal);
WT a = scalar, b = scalar, c = scalar, d = scalar;

View File

@ -93,8 +93,8 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
#endif
else
{
T array[VECSIZE];
VSTOREN(vecValue, 0, array);
__attribute__(( aligned(sizeof(VT)) )) T array[VECSIZE];
*((VT*)array) = vecValue;
#pragma unroll
for (int i = 0; i < VECSIZE; ++i)
if (gx + i < max_index)

View File

@ -355,6 +355,7 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
Values(1, 2, 3, 4),
Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
pair<MatType, MatType>((MatType)CV_32FC2, noType),
pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
pair<MatType, MatType>((MatType)CV_16SC2, noType)),
Values((Border)BORDER_CONSTANT,
(Border)BORDER_REPLICATE,