mirror of
https://github.com/opencv/opencv.git
synced 2024-12-11 22:59:16 +08:00
use precomputed coefficents
This commit is contained in:
parent
2d43f4313b
commit
2ec1d1c986
@ -128,6 +128,21 @@ __kernel void resizeSampler(__read_only image2d_t srcImage,
|
|||||||
|
|
||||||
#elif defined INTER_LINEAR_INTEGER
|
#elif defined INTER_LINEAR_INTEGER
|
||||||
|
|
||||||
|
#define FIXED_POINT_BITS 16
|
||||||
|
#define FIXED_POINT_SCALE (1 << FIXED_POINT_BITS)
|
||||||
|
|
||||||
|
// Fixed-point multiply
|
||||||
|
#define FIXED_MUL(a, b) (((a) * (b)) >> FIXED_POINT_BITS)
|
||||||
|
|
||||||
|
// Rounding methods
|
||||||
|
#define ROUND_NEAREST_EVEN 0
|
||||||
|
#define ROUND_DOWN 1
|
||||||
|
#define ROUND_UP 2
|
||||||
|
#define TRUNCATE 3
|
||||||
|
|
||||||
|
// Choose rounding method
|
||||||
|
#define ROUNDING_METHOD ROUND_NEAREST_EVEN
|
||||||
|
|
||||||
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||||
__global const uchar * buffer)
|
__global const uchar * buffer)
|
||||||
@ -162,6 +177,9 @@ __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offs
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#elif defined INTER_LINEAR
|
#elif defined INTER_LINEAR
|
||||||
|
|
||||||
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
@ -222,55 +240,77 @@ __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offs
|
|||||||
|
|
||||||
#elif defined INTER_LINEAR_EXACT
|
#elif defined INTER_LINEAR_EXACT
|
||||||
|
|
||||||
#define FIXED_POINT_BITS 8
|
#define FIXED_POINT_BITS 16
|
||||||
#define FIXED_POINT_SCALE (1 << FIXED_POINT_BITS)
|
#define FIXED_POINT_SCALE (1 << FIXED_POINT_BITS)
|
||||||
|
|
||||||
// Fixed-point multiply
|
// Fixed-point multiply
|
||||||
#define FIXED_MUL(a, b) (((a) * (b)) >> FIXED_POINT_BITS)
|
#define FIXED_MUL(a, b) (((a) * (b)) >> FIXED_POINT_BITS)
|
||||||
|
|
||||||
|
// Rounding methods
|
||||||
|
#define ROUND_NEAREST_EVEN 0
|
||||||
|
#define ROUND_DOWN 1
|
||||||
|
#define ROUND_UP 2
|
||||||
|
#define TRUNCATE 3
|
||||||
|
|
||||||
|
// Choose rounding method
|
||||||
|
#define ROUNDING_METHOD ROUND_NEAREST_EVEN
|
||||||
|
|
||||||
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||||
int ifx, int ify)
|
__global const int * xofs, __global const int * yofs,
|
||||||
|
__global const short * ialpha, __global const short * ibeta)
|
||||||
{
|
{
|
||||||
int dx = get_global_id(0);
|
int dx = get_global_id(0);
|
||||||
int dy = get_global_id(1);
|
int dy = get_global_id(1);
|
||||||
|
|
||||||
if (dx < dst_cols && dy < dst_rows)
|
if (dx >= dst_cols || dy >= dst_rows)
|
||||||
{
|
{
|
||||||
// Calculate source coordinates
|
return; // Exit if dx or dy is out of bounds
|
||||||
int sx = (dx * ifx) >> 16;
|
|
||||||
int sy = (dy * ify) >> 16;
|
|
||||||
|
|
||||||
// Perform boundary checks
|
|
||||||
sx = clamp(sx, 0, src_cols - 1);
|
|
||||||
sy = clamp(sy, 0, src_rows - 1);
|
|
||||||
|
|
||||||
// Calculate interpolation coefficients
|
|
||||||
int u = (dx * ifx) & 0xFFFF;
|
|
||||||
int v = (dy * ify) & 0xFFFF;
|
|
||||||
|
|
||||||
int U = (0x10000 - u) >> 8;
|
|
||||||
int V = (0x10000 - v) >> 8;
|
|
||||||
int U1 = u >> 8;
|
|
||||||
int V1 = v >> 8;
|
|
||||||
|
|
||||||
// Load pixel values
|
|
||||||
WT data0 = convertToWT(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))));
|
|
||||||
WT data1 = convertToWT(loadpix(srcptr + mad24(sy, src_step, mad24(INC(sx, src_cols), TSIZE, src_offset))));
|
|
||||||
WT data2 = convertToWT(loadpix(srcptr + mad24(INC(sy, src_rows), src_step, mad24(sx, TSIZE, src_offset))));
|
|
||||||
WT data3 = convertToWT(loadpix(srcptr + mad24(INC(sy, src_rows), src_step, mad24(INC(sx, src_cols), TSIZE, src_offset))));
|
|
||||||
|
|
||||||
// Perform fixed-point interpolation
|
|
||||||
WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
|
|
||||||
mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
|
|
||||||
|
|
||||||
// Convert and store the result
|
|
||||||
T uval = convertToDT((val + 2) >> 2);
|
|
||||||
storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate source coordinates
|
||||||
|
int sx = (dx * ifx) >> 16;
|
||||||
|
int sy = (dy * ify) >> 16;
|
||||||
|
|
||||||
|
// Perform boundary checks
|
||||||
|
sx = clamp(sx, 0, src_cols - 1);
|
||||||
|
sy = clamp(sy, 0, src_rows - 1);
|
||||||
|
|
||||||
|
// Calculate interpolation coefficients
|
||||||
|
int u = (dx * ifx) & 0xFFFF;
|
||||||
|
int v = (dy * ify) & 0xFFFF;
|
||||||
|
|
||||||
|
int U = (0x10000 - u) >> 8;
|
||||||
|
int V = (0x10000 - v) >> 8;
|
||||||
|
int U1 = u >> 8;
|
||||||
|
int V1 = v >> 8;
|
||||||
|
|
||||||
|
// Load pixel values
|
||||||
|
WT data0 = convertToWT(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))));
|
||||||
|
WT data1 = convertToWT(loadpix(srcptr + mad24(sy, src_step, mad24(INC(sx, src_cols), TSIZE, src_offset))));
|
||||||
|
WT data2 = convertToWT(loadpix(srcptr + mad24(INC(sy, src_rows), src_step, mad24(sx, TSIZE, src_offset))));
|
||||||
|
WT data3 = convertToWT(loadpix(srcptr + mad24(INC(sy, src_rows), src_step, mad24(INC(sx, src_cols), TSIZE, src_offset))));
|
||||||
|
|
||||||
|
// Perform fixed-point interpolation
|
||||||
|
WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
|
||||||
|
mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
|
||||||
|
|
||||||
|
// Convert and store the result
|
||||||
|
#if ROUNDING_METHOD == ROUND_NEAREST_EVEN
|
||||||
|
T uval = convertToDT((val + FIXED_POINT_SCALE / 2) >> FIXED_POINT_BITS);
|
||||||
|
#elif ROUNDING_METHOD == ROUND_DOWN
|
||||||
|
T uval = convertToDT(val >> FIXED_POINT_BITS);
|
||||||
|
#elif ROUNDING_METHOD == ROUND_UP
|
||||||
|
T uval = convertToDT((val + FIXED_POINT_SCALE - 1) >> FIXED_POINT_BITS);
|
||||||
|
#elif ROUNDING_METHOD == TRUNCATE
|
||||||
|
T uval = convertToDT(val >> FIXED_POINT_BITS);
|
||||||
|
#endif
|
||||||
|
storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#elif defined INTER_NEAREST
|
#elif defined INTER_NEAREST
|
||||||
|
|
||||||
__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
|
@ -3495,9 +3495,45 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (interpolation == INTER_LINEAR_EXACT) {
|
else if (interpolation == INTER_LINEAR_EXACT) {
|
||||||
|
AutoBuffer<uchar> _buffer((dsize.width + dsize.height)*(sizeof(int) + sizeof(short)*2));
|
||||||
|
int* xofs = (int*)_buffer.data(), * yofs = xofs + dsize.width;
|
||||||
|
short* ialpha = (short*)(yofs + dsize.height), * ibeta = ialpha + dsize.width*2;
|
||||||
|
float fxx, fyy;
|
||||||
|
int sx, sy;
|
||||||
|
|
||||||
|
for (int dx = 0; dx < dsize.width; dx++)
|
||||||
|
{
|
||||||
|
fxx = (float)((dx+0.5)*inv_fx - 0.5);
|
||||||
|
sx = cvFloor(fxx);
|
||||||
|
fxx -= sx;
|
||||||
|
|
||||||
|
if (sx < 0)
|
||||||
|
fxx = 0, sx = 0;
|
||||||
|
|
||||||
|
if (sx >= ssize.width-1)
|
||||||
|
fxx = 0, sx = ssize.width-1;
|
||||||
|
|
||||||
|
xofs[dx] = sx;
|
||||||
|
ialpha[dx*2 + 0] = saturate_cast<short>((1.f - fxx) * INTER_RESIZE_COEF_SCALE);
|
||||||
|
ialpha[dx*2 + 1] = saturate_cast<short>(fxx * INTER_RESIZE_COEF_SCALE);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int dy = 0; dy < dsize.height; dy++)
|
||||||
|
{
|
||||||
|
fyy = (float)((dy+0.5)*inv_fy - 0.5);
|
||||||
|
sy = cvFloor(fyy);
|
||||||
|
fyy -= sy;
|
||||||
|
|
||||||
|
yofs[dy] = sy;
|
||||||
|
ibeta[dy*2 + 0] = saturate_cast<short>((1.f - fyy) * INTER_RESIZE_COEF_SCALE);
|
||||||
|
ibeta[dy*2 + 1] = saturate_cast<short>(fyy * INTER_RESIZE_COEF_SCALE);
|
||||||
|
}
|
||||||
|
|
||||||
|
int wdepth = std::max(depth, CV_32S), wtype = CV_MAKETYPE(wdepth, cn);
|
||||||
|
UMat coeffs;
|
||||||
|
Mat(1, static_cast<int>(_buffer.size()), CV_8UC1, _buffer.data()).copyTo(coeffs);
|
||||||
|
|
||||||
char buf[2][50];
|
char buf[2][50];
|
||||||
int wdepth = depth <= CV_8S ? CV_32S : std::max(depth, CV_32F);
|
|
||||||
int wtype = CV_MAKETYPE(wdepth, cn);
|
|
||||||
k.create("resizeLN", ocl::imgproc::resize_oclsrc,
|
k.create("resizeLN", ocl::imgproc::resize_oclsrc,
|
||||||
format("-D INTER_LINEAR_EXACT -D depth=%d -D T=%s -D T1=%s "
|
format("-D INTER_LINEAR_EXACT -D depth=%d -D T=%s -D T1=%s "
|
||||||
"-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d "
|
"-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d "
|
||||||
@ -3510,7 +3546,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
|
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
|
||||||
(float)inv_fx, (float)inv_fy);
|
ocl::KernelArg::PtrReadOnly(coeffs));
|
||||||
}
|
}
|
||||||
else if (interpolation == INTER_NEAREST)
|
else if (interpolation == INTER_NEAREST)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user