mirror of
https://github.com/opencv/opencv.git
synced 2024-11-27 20:50:25 +08:00
ocl_resize: move coeffienct calculation for INTER_LINEAR_EXACT from C++ to OpenCL
This commit is contained in:
parent
6dd905eb53
commit
672e06c408
@ -222,40 +222,75 @@ __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offs
|
||||
|
||||
#elif defined INTER_LINEAR_EXACT
|
||||
|
||||
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
__global const uchar * buffer)
|
||||
{
|
||||
__kernel void precomputeCoeffs(__read_only image2d_t src, __write_only image2d_t coeffsTex, float inv_fx, float inv_fy) {
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
int sx, sy;
|
||||
float fxx, fyy;
|
||||
short ialpha, ibeta;
|
||||
|
||||
if (dx < dst_cols && dy < dst_rows)
|
||||
{
|
||||
__global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
|
||||
__global const short * ialpha = (__global const short *)(yofs + dst_rows);
|
||||
__global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
|
||||
ialpha += dx << 1;
|
||||
fxx = (float)((dx + 0.5) * inv_fx * 0.5 - 0.5);
|
||||
fyy = (float)((dy + 0.5) * inv_fy * 0.5 - 0.5);
|
||||
|
||||
int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
|
||||
sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
|
||||
short a0 = ialpha[0], a1 = ialpha[1];
|
||||
short b0 = ibeta[0], b1 = ibeta[1];
|
||||
sx = floor(fxx);
|
||||
fxx -= sx;
|
||||
if (sx < 0)
|
||||
fxx = 0, sx = 0;
|
||||
if (sx >= get_image_width(src) - 1)
|
||||
fxx = 0, sx = get_image_width(src) - 1;
|
||||
|
||||
int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
|
||||
src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
|
||||
WT data0 = convertToWT(loadpix(srcptr + src_index0));
|
||||
WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
|
||||
WT data2 = convertToWT(loadpix(srcptr + src_index1));
|
||||
WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
|
||||
sy = floor(fyy);
|
||||
fyy -= sy;
|
||||
if (sy < 0)
|
||||
fyy = 0, sy = 0;
|
||||
if (sy >= get_image_height(src) - 1)
|
||||
fyy = 0, sy = get_image_height(src) - 1;
|
||||
|
||||
WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
|
||||
( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
|
||||
ialpha = convert_short_sat_rte((1.f - fxx) * INTER_RESIZE_COEF_SCALE);
|
||||
ibeta = convert_short_sat_rte((1.f - fyy) * INTER_RESIZE_COEF_SCALE);
|
||||
|
||||
storepix(convertToDT((val + 2) >> 2),
|
||||
dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
|
||||
}
|
||||
write_imagei(coeffsTex, (int2)(dx, 0), (int4)(sx, ialpha, 0, 0));
|
||||
write_imagei(coeffsTex, (int2)(0, dy), (int4)(sy, ibeta, 0, 0));
|
||||
}
|
||||
|
||||
__kernel void resizeLN(__read_only image2d_t src, __write_only image2d_t dst, __read_only image2d_t coeffsTex) {
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
int sx, sy, sx0, sx1, sy0, sy1;
|
||||
short ialpha0, ialpha1, ibeta0, ibeta1;
|
||||
int4 v0, v1, v2, v3, res;
|
||||
float4 f0, f1, f2, f3, fres;
|
||||
|
||||
sx = read_imagei(coeffsTex, (int2)(dx, 0)).x;
|
||||
ialpha0 = read_imagei(coeffsTex, (int2)(dx, 0)).y;
|
||||
ialpha1 = INTER_RESIZE_COEF_SCALE - ialpha0;
|
||||
|
||||
sy = read_imagei(coeffsTex, (int2)(0, dy)).x;
|
||||
ibeta0 = read_imagei(coeffsTex, (int2)(0, dy)).y;
|
||||
ibeta1 = INTER_RESIZE_COEF_SCALE - ibeta0;
|
||||
|
||||
sx0 = sx * cn;
|
||||
sx1 = sx0 + cn;
|
||||
sy0 = sy * src_stride;
|
||||
sy1 = sy0 + src_stride;
|
||||
|
||||
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
||||
v0 = read_imagei(src, sampler, (int2)(sx0, sy0));
|
||||
v1 = read_imagei(src, sampler, (int2)(sx1, sy0));
|
||||
v2 = read_imagei(src, sampler, (int2)(sx0, sy1));
|
||||
v3 = read_imagei(src, sampler, (int2)(sx1, sy1));
|
||||
|
||||
f0 = convert_float4(v0);
|
||||
f1 = convert_float4(v1);
|
||||
f2 = convert_float4(v2);
|
||||
f3 = convert_float4(v3);
|
||||
|
||||
fres = (f0 * ialpha0 + f1 * ialpha1) * ibeta0 + (f2 * ialpha0 + f3 * ialpha1) * ibeta1;
|
||||
fres = fres * (1.0f / (INTER_RESIZE_COEF_SCALE * INTER_RESIZE_COEF_SCALE));
|
||||
|
||||
res = convert_int4_sat_rte(fres);
|
||||
write_imagei(dst, (int2)(dx + dst_offset, dy + dst_offset), res);
|
||||
}
|
||||
|
||||
#elif defined INTER_NEAREST
|
||||
|
||||
|
@ -3494,47 +3494,14 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
}
|
||||
}
|
||||
else if (interpolation == INTER_LINEAR_EXACT) {
|
||||
AutoBuffer<uchar> _buffer((dsize.width + dsize.height)*(sizeof(int) + sizeof(short)*2));
|
||||
int* xofs = (int*)_buffer.data(), * yofs = xofs + dsize.width;
|
||||
short* ialpha = (short*)(yofs + dsize.height), * ibeta = ialpha + dsize.width*2;
|
||||
float fxx, fyy;
|
||||
int sx, sy;
|
||||
|
||||
for (int dx = 0; dx < dsize.width; dx++)
|
||||
{
|
||||
fxx = (float)((dx+0.5)*inv_fx - 0.5);
|
||||
sx = cvFloor(fxx);
|
||||
fxx -= sx;
|
||||
|
||||
if (sx < 0)
|
||||
fxx = 0, sx = 0;
|
||||
|
||||
if (sx >= ssize.width-1)
|
||||
fxx = 0, sx = ssize.width-1;
|
||||
|
||||
xofs[dx] = sx;
|
||||
ialpha[dx*2 + 0] = saturate_cast<short>((1.f - fxx) * INTER_RESIZE_COEF_SCALE);
|
||||
ialpha[dx*2 + 1] = saturate_cast<short>(fxx * INTER_RESIZE_COEF_SCALE);
|
||||
}
|
||||
|
||||
for (int dy = 0; dy < dsize.height; dy++)
|
||||
{
|
||||
fyy = (float)((dy+0.5)*inv_fy - 0.5);
|
||||
sy = cvFloor(fyy);
|
||||
fyy -= sy;
|
||||
|
||||
yofs[dy] = sy;
|
||||
ibeta[dy*2 + 0] = saturate_cast<short>((1.f - fyy) * INTER_RESIZE_COEF_SCALE);
|
||||
ibeta[dy*2 + 1] = saturate_cast<short>(fyy * INTER_RESIZE_COEF_SCALE);
|
||||
}
|
||||
|
||||
int wdepth = std::max(depth, CV_32S), wtype = CV_MAKETYPE(wdepth, cn);
|
||||
UMat coeffs;
|
||||
Mat(1, static_cast<int>(_buffer.size()), CV_8UC1, _buffer.data()).copyTo(coeffs);
|
||||
|
||||
char buf[2][50];
|
||||
k.create("resizeLN", ocl::imgproc::resize_oclsrc,
|
||||
format("-D INTER_LINEAR_EXACT -D depth=%d -D T=%s -D T1=%s "
|
||||
// Precompute the coefficients and store them in a texture
|
||||
UMat coeffsTex(dsize.height + dsize.width, 2, CV_16SC1);
|
||||
ocl::KernelArg coeffsTexArg = ocl::KernelArg::WriteOnlyNoSize(coeffsTex);
|
||||
k.create("precomputeCoeffs", ocl::imgproc::resize_oclsrc,
|
||||
format("-D PRECOMPUTE_COEFFS -D depth=%d -D T=%s -D T1=%s "
|
||||
"-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d "
|
||||
"-D INTER_RESIZE_COEF_BITS=%d",
|
||||
depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
|
||||
@ -3543,9 +3510,25 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
cn, INTER_RESIZE_COEF_BITS));
|
||||
if (k.empty())
|
||||
return false;
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(src), coeffsTexArg, (float)inv_fx * 0.5, (float)inv_fy * 0.5);
|
||||
size_t globalThreads[2] = { (size_t)dsize.width, (size_t)dsize.height };
|
||||
if (!k.run(2, globalThreads, NULL, false))
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
|
||||
ocl::KernelArg::PtrReadOnly(coeffs));
|
||||
// Use the texture in the interpolation kernel
|
||||
k.create("resizeLN", ocl::imgproc::resize_oclsrc,
|
||||
format("-D INTER_LINEAR_EXACT -D depth=%d -D T=%s -D T1=%s "
|
||||
"-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d "
|
||||
"-D INTER_RESIZE_COEF_BITS=%d -D USE_TEXTURE",
|
||||
depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
|
||||
ocl::convertTypeStr(depth, wdepth, cn, buf[0], sizeof(buf[0])),
|
||||
ocl::convertTypeStr(wdepth, depth, cn, buf[1], sizeof(buf[1])),
|
||||
cn, INTER_RESIZE_COEF_BITS));
|
||||
if (k.empty())
|
||||
return false;
|
||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), coeffsTexArg);
|
||||
if (!k.run(2, globalThreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
else if (interpolation == INTER_NEAREST)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user