cuda resize onnx done

This commit is contained in:
Ginkgo 2024-06-10 13:50:54 +08:00
parent 379c16e106
commit 358b64ad0e
6 changed files with 168 additions and 226 deletions

View File

@ -458,12 +458,12 @@ CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uchar, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , char, char) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , ushort, ushort) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , short, short) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , int, int) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uint, uint) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)

View File

@ -281,14 +281,11 @@ enum InterpolationFlags {
//! ONNX Resize Flags //! ONNX Resize Flags
enum ResizeONNXFlags enum ResizeONNXFlags
{ {
// static_assert((1 << INTER_COORDINATE_SHIFT) > INTER_MAX, "");
// https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize
INTER_SAMPLER_SHIFT = 0, INTER_SAMPLER_SHIFT = 0,
INTER_SAMPLER_BIT = 4, INTER_SAMPLER_BIT = 3,
INTER_SAMPLER_MASK = ((1 << INTER_SAMPLER_BIT) - 1) << INTER_SAMPLER_SHIFT, INTER_SAMPLER_MASK = ((1 << INTER_SAMPLER_BIT) - 1) << INTER_SAMPLER_SHIFT,
INTER_COORDINATE_SHIFT = 4, INTER_COORDINATE_SHIFT = INTER_SAMPLER_SHIFT + INTER_SAMPLER_BIT,
INTER_COORDINATE_BIT = 3, INTER_COORDINATE_BIT = 3,
INTER_COORDINATE_MASK = ((1 << INTER_COORDINATE_BIT) - 1) << INTER_COORDINATE_SHIFT, INTER_COORDINATE_MASK = ((1 << INTER_COORDINATE_BIT) - 1) << INTER_COORDINATE_SHIFT,
/** x_original = (x_resized + 0.5) / scale - 0.5 */ /** x_original = (x_resized + 0.5) / scale - 0.5 */
@ -305,8 +302,8 @@ enum ResizeONNXFlags
/** x_original = x_resized / scale */ /** x_original = x_resized / scale */
INTER_ASYMMETRIC = 4 << INTER_COORDINATE_SHIFT, INTER_ASYMMETRIC = 4 << INTER_COORDINATE_SHIFT,
/** x_original = length_resized > 1 /** x_original = length_resized > 1
* ? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1) ? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1)
* : 0.5 * (start_x + end_x) * (length_original - 1) */ : 0.5 * (start_x + end_x) * (length_original - 1) */
INTER_TF_CROP_RESIZE = 5 << INTER_COORDINATE_SHIFT, INTER_TF_CROP_RESIZE = 5 << INTER_COORDINATE_SHIFT,
INTER_NEAREST_MODE_SHIFT = INTER_COORDINATE_SHIFT + INTER_COORDINATE_BIT, INTER_NEAREST_MODE_SHIFT = INTER_COORDINATE_SHIFT + INTER_COORDINATE_BIT,
@ -2468,12 +2465,12 @@ CV_EXPORTS_W void resize( InputArray src, OutputArray dst,
int interpolation = INTER_LINEAR ); int interpolation = INTER_LINEAR );
/** @brief onnx resize op /** @brief onnx resize op
https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize
https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py
Not support `exclude_outside` and `extrapolation_value` yet. Not support `exclude_outside` and `extrapolation_value` yet.
To get a similar result to resize, give dsize and: To get a similar result to `cv::resize`, give dsize and:
INTER_NEAREST : ASYMMETRIC + NEAREST_FLOOR INTER_NEAREST : ASYMMETRIC + NEAREST_FLOOR
INTER_LINEAR : HALF_PIXEL INTER_LINEAR : HALF_PIXEL
INTER_CUBIC : HALF_PIXEL + cubicCoeff(-0.75) INTER_CUBIC : HALF_PIXEL + cubicCoeff(-0.75)
@ -2490,9 +2487,8 @@ To get a similar result to resize, give dsize and:
@sa resize @sa resize
*/ */
CV_EXPORTS_W void resizeOnnx( CV_EXPORTS_W void resizeOnnx(InputArray src, OutputArray dst, Size dsize,
InputArray src, OutputArray dst, Size dsize, Point2d scale = Point2d(), Point2d scale = Point2d(), int interpolation = INTER_LINEAR | INTER_HALF_PIXEL,
int interpolation = INTER_LINEAR | INTER_HALF_PIXEL,
float cubicCoeff = -0.75f, Rect2d const& roi = Rect2d()); float cubicCoeff = -0.75f, Rect2d const& roi = Rect2d());
/** @brief Applies an affine transformation to an image. /** @brief Applies an affine transformation to an image.

View File

@ -67,22 +67,20 @@ __kernel void resizeOnnx_nearest(
#if PIXEL_SIZE == 1 #if PIXEL_SIZE == 1
*D = *S; *D = *S;
#elif PIXEL_SIZE == 2 || PIXEL_SIZE == 4 || PIXEL_SIZE == 8 || PIXEL_SIZE == 16 #elif PIXEL_SIZE == 2
*(__global VT*)(D) = *(__global const VT*)(S); *(__global ushort*)(D) = *(__global const ushort*)(S);
#elif PIXEL_SIZE == 3 #elif PIXEL_SIZE == 3
vstore3(vload3(0, S), 0, D); vstore3(vload3(0, S), 0, D);
#elif PIXEL_SIZE == 4
*(__global uint*)(D) = *(__global const uint*)(S);
#elif PIXEL_SIZE == 6 #elif PIXEL_SIZE == 6
vstore3(vload3(0, (__global ushort const*)(S)), 0, (__global ushort*)(D)); vstore3(vload3(0, (__global ushort const*)(S)), 0, (__global ushort*)(D));
#elif PIXEL_SIZE == 8
*(__global uint2*)(D) = *(__global const uint2*)(S);
#elif PIXEL_SIZE == 12 #elif PIXEL_SIZE == 12
vstore3(vload3(0, (__global const uint*)(S)), 0, (__global uint*)(D)); vstore3(vload3(0, (__global const uint*)(S)), 0, (__global uint*)(D));
#elif PIXEL_SIZE == 24 #elif PIXEL_SIZE == 16
vstore3(vload3(0, (__global ulong const*)(S)), 0, (__global ulong*)(D)); *(__global uint4*)(D) = *(__global const uint4*)(S);
#elif PIXEL_SIZE == 32
*(__global uint8*)(D) = *(__global uint8 const*)(S);
#elif PIXEL_SIZE == 64
*(__global uint16*)(D) = *(__global uint16 const*)(S);
#elif PIXEL_SIZE == 128
*(__global ulong16*)(D) = *(__global ulong16 const*)(S);
#else #else
for (int i = 0; i < pixel_size; ++i) for (int i = 0; i < pixel_size; ++i)
D[i] = S[i]; D[i] = S[i];
@ -114,7 +112,7 @@ __kernel void resizeOnnx_linear(
__global uchar const* S2 = srcptr + (y1 * src_step + mad24(x0, pixel_size, src_offset)); __global uchar const* S2 = srcptr + (y1 * src_step + mad24(x0, pixel_size, src_offset));
__global uchar const* S3 = srcptr + (y1 * src_step + mad24(x1, pixel_size, src_offset)); __global uchar const* S3 = srcptr + (y1 * src_step + mad24(x1, pixel_size, src_offset));
__global uchar * D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); __global uchar * D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset));
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 #if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW s0 = TO_VEC_WORK(loadpix(S0)); VW s1 = TO_VEC_WORK(loadpix(S1)); VW s0 = TO_VEC_WORK(loadpix(S0)); VW s1 = TO_VEC_WORK(loadpix(S1));
VW s2 = TO_VEC_WORK(loadpix(S2)); VW s3 = TO_VEC_WORK(loadpix(S3)); VW s2 = TO_VEC_WORK(loadpix(S2)); VW s3 = TO_VEC_WORK(loadpix(S3));
VT d0 = TO_VEC_TYPE((u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3); VT d0 = TO_VEC_TYPE((u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3);
@ -154,7 +152,7 @@ __kernel void resizeOnnx_linear_antialias(
int ix = convert_int_rtn(fx), iy = convert_int_rtn(fy); int ix = convert_int_rtn(fx), iy = convert_int_rtn(fy);
float rx = fx - ix, ry = fy - iy; float rx = fx - ix, ry = fy - iy;
__global uchar* D = dstptr + dy * dst_step + mad24(dx, pixel_size, dst_offset); __global uchar* D = dstptr + dy * dst_step + mad24(dx, pixel_size, dst_offset);
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 #if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW sumval = (VW)(0); VW sumval = (VW)(0);
float weight = 0; float weight = 0;
for (int h = ystart; h < yend; ++h) for (int h = ystart; h < yend; ++h)
@ -266,7 +264,7 @@ __kernel void resizeOnnx_cubic(
xcoeff [x - xstart] = cubicCoeff(A, A2, A3, x - fx); xcoeff [x - xstart] = cubicCoeff(A, A2, A3, x - fx);
} }
__global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); __global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset));
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 #if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW sum = (VW)(0); VW sum = (VW)(0);
for (int y = ystart; y <= ylimit; ++y) for (int y = ystart; y <= ylimit; ++y)
{ {
@ -322,13 +320,14 @@ __kernel void resizeOnnx_table(
__global int const* yoffset = xoffset + xstride; __global int const* yoffset = xoffset + xstride;
__global float const* xcoeff = (__global float const*)(yoffset + ystride); __global float const* xcoeff = (__global float const*)(yoffset + ystride);
__global float const* ycoeff = (__global float const*)(xcoeff + xstride); __global float const* ycoeff = (__global float const*)(xcoeff + xstride);
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 #if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW sum = (VW)(0); VW sum = (VW)(0);
// exact ykanti / xkanti loops // exact ykanti / xkanti loops
for (int y = dy; y < ystride; y += dst_rows) for (int y = dy; y < ystride; y += dst_rows)
{ {
// offset is already clamped. xoffset is given by uchar // offset is already clamped
__global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); // xoffset is given by uchar, yoffset already multiply by src_step
__global const uchar* S = srcptr + yoffset[y] + src_offset;
VW sline = (VW)(0); VW sline = (VW)(0);
for (int x = dx; x < xstride; x += dst_cols) for (int x = dx; x < xstride; x += dst_cols)
sline += xcoeff[x] * TO_VEC_WORK(loadpix(S + xoffset[x])); sline += xcoeff[x] * TO_VEC_WORK(loadpix(S + xoffset[x]));
@ -341,7 +340,7 @@ __kernel void resizeOnnx_table(
W sum = 0; W sum = 0;
for (int y = dy; y < ystride; y += dst_rows) for (int y = dy; y < ystride; y += dst_rows)
{ {
__global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); __global const uchar* S = (srcptr + yoffset[y] + src_offset);
W sline = 0; W sline = 0;
for (int x = dx; x < xstride; x += dst_cols) for (int x = dx; x < xstride; x += dst_cols)
sline += xcoeff[x] * TO_WORK(((__global T const*)(S + xoffset[x]))[i]); sline += xcoeff[x] * TO_WORK(((__global T const*)(S + xoffset[x]))[i]);

View File

@ -1356,36 +1356,36 @@ public:
break; break;
case 2: case 2:
for (; x < width; ++x) for (; x < width; ++x)
reinterpret_cast<short*>(D)[x] = *(reinterpret_cast<short const*>(S + x_ofs[x])); reinterpret_cast<ushort*>(D)[x] = *(reinterpret_cast<ushort const*>(S + x_ofs[x]));
break; break;
case 3: case 3:
for (; x < width; ++x, D += 3) for (; x < width; ++x, D += 3)
{ {
const uchar* _tS = S + x_ofs[x]; uchar const* _tS = S + x_ofs[x];
D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2]; D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2];
} }
break; break;
case 4: case 4:
for (; x < width; ++x) for (; x < width; ++x)
reinterpret_cast<int*>(D)[x] = *(reinterpret_cast<int const*>(S + x_ofs[x])); reinterpret_cast<uint*>(D)[x] = *(reinterpret_cast<uint const*>(S + x_ofs[x]));
break; break;
case 6: case 6:
for (; x < width; ++x, D += 6) for (; x < width; ++x, D += 6)
{ {
short const* _tS = reinterpret_cast<short const*>(S + x_ofs[x]); ushort const* _tS = reinterpret_cast<ushort const*>(S + x_ofs[x]);
short* _tD = reinterpret_cast<short*>(D); ushort* _tD = reinterpret_cast<ushort*>(D);
_tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2]; _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
} }
break; break;
case 8: case 8:
for (; x < width; ++x) for (; x < width; ++x)
reinterpret_cast<int64*>(D)[x] = *(reinterpret_cast<int64 const*>(S + x_ofs[x])); reinterpret_cast<uint64*>(D)[x] = *(reinterpret_cast<uint64 const*>(S + x_ofs[x]));
break; break;
case 12: case 12:
for (; x < width; ++x, D += 12) for (; x < width; ++x, D += 12)
{ {
int const* _tS = reinterpret_cast<int const*>(S + x_ofs[x]); uint const* _tS = reinterpret_cast<uint const*>(S + x_ofs[x]);
int* _tD = reinterpret_cast<int*>(D); uint* _tD = reinterpret_cast<uint*>(D);
_tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2]; _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
} }
break; break;
@ -3465,7 +3465,8 @@ public:
int xmin, xmax; int xmin, xmax;
private: private:
void cubic_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, float A, TabIdx* elem) void cubicCoeffsAntiAlias(
int dstlen, int cn, float srcpos, float scale, int srclen, float A, TabIdx* elem)
{ {
scale = min(scale, 1.f); scale = min(scale, 1.f);
int index = cvFloor(srcpos); int index = cvFloor(srcpos);
@ -3497,7 +3498,7 @@ private:
} }
} }
void cubic_coeffs(float x, float A, float* coeffs) void cubicCoeffs(float x, float A, float* coeffs)
{ {
coeffs[0] = A * ((((x + 1) - 5) * (x + 1) + 8) * (x + 1) - 4); coeffs[0] = A * ((((x + 1) - 5) * (x + 1) + 8) * (x + 1) - 4);
coeffs[1] = ((A + 2) * x - (A + 3)) * x * x + 1; coeffs[1] = ((A + 2) * x - (A + 3)) * x * x + 1;
@ -3505,7 +3506,8 @@ private:
coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2]; coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
} }
void linear_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, TabIdx* elem) void linearCoeffsAntialias(
int dstlen, int cn, float srcpos, float scale, int srclen, TabIdx* elem)
{ {
scale = min(scale, 1.f); scale = min(scale, 1.f);
int index = cvFloor(srcpos); int index = cvFloor(srcpos);
@ -3532,7 +3534,7 @@ private:
} }
} }
void linear_coeffs(float x, float* coeffs) void linearCoeffs(float x, float* coeffs)
{ {
coeffs[0] = 1.f - x; coeffs[0] = 1.f - x;
coeffs[1] = x; coeffs[1] = x;
@ -3570,16 +3572,17 @@ private:
area.commit(); area.commit();
CV_CheckLE(ksize, MAX_ESIZE, "resampler kernel's size is too larger"); CV_CheckLE(ksize, MAX_ESIZE, "resampler kernel's size is too larger");
if (antialias) // when upsampling, `antialias` is same to `generic`, so use `generic` to speed up
if (antialias && scaled.x < 1.0)
{ {
float a = M(0, 0), b = M(0, 1); float a = M(0, 0), b = M(0, 1);
for (int d = 0; d < dsize.width; ++d) for (int d = 0; d < dsize.width; ++d)
{ {
float f = fmaf(static_cast<float>(d), a, b); float f = fmaf(static_cast<float>(d), a, b);
if (sampler == INTER_LINEAR) if (sampler == INTER_LINEAR)
linear_coeffs_antialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti); linearCoeffsAntialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti);
else // if (sampler == INTER_CUBIC) else // if (sampler == INTER_CUBIC)
cubic_coeffs_antialias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti); cubicCoeffsAntiAlias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti);
} }
} }
else else
@ -3608,9 +3611,9 @@ private:
for (int k = 0; k < cn; ++k) for (int k = 0; k < cn; ++k)
xofs[cn * d + k] = cn * s + k; xofs[cn * d + k] = cn * s + k;
if (sampler == INTER_LINEAR) if (sampler == INTER_LINEAR)
linear_coeffs(f, cbuf); linearCoeffs(f, cbuf);
else // if (sampler == INTER_CUBIC) else // if (sampler == INTER_CUBIC)
cubic_coeffs(f, cubicCoeff, cbuf); cubicCoeffs(f, cubicCoeff, cbuf);
if (is_fixpt) if (is_fixpt)
{ {
short* coeffs = reinterpret_cast<short*>(xcoeffs) + cn * ksize * d; short* coeffs = reinterpret_cast<short*>(xcoeffs) + cn * ksize * d;
@ -3638,16 +3641,16 @@ private:
} }
} }
if (antialias) if (antialias && scaled.y < 1.0)
{ {
float a = M(1, 0), b = M(1, 1); float a = M(1, 0), b = M(1, 1);
for (int d = 0; d < dsize.height; ++d) for (int d = 0; d < dsize.height; ++d)
{ {
float f = fmaf(static_cast<float>(d), a, b); float f = fmaf(static_cast<float>(d), a, b);
if (sampler == INTER_LINEAR) if (sampler == INTER_LINEAR)
linear_coeffs_antialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti); linearCoeffsAntialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti);
else // if (sampler == INTER_CUBIC) else // if (sampler == INTER_CUBIC)
cubic_coeffs_antialias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti); cubicCoeffsAntiAlias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti);
} }
} }
else else
@ -3662,9 +3665,9 @@ private:
f -= s; f -= s;
yofs[d] = s; yofs[d] = s;
if (sampler == INTER_LINEAR) if (sampler == INTER_LINEAR)
linear_coeffs(f, cbuf); linearCoeffs(f, cbuf);
else // if (sampler == INTER_CUBIC) else // if (sampler == INTER_CUBIC)
cubic_coeffs(f, cubicCoeff, cbuf); cubicCoeffs(f, cubicCoeff, cbuf);
if (is_fixpt) if (is_fixpt)
{ {
short* coeffs = reinterpret_cast<short*>(ycoeffs) + 1 * ksize * d; short* coeffs = reinterpret_cast<short*>(ycoeffs) + 1 * ksize * d;
@ -3755,7 +3758,7 @@ public:
"something wrong"); "something wrong");
} }
void hori_antialias_accumulate(T const* S, IdxT* L) const void horiAntialiasAccumulate(T const* S, IdxT* L) const
{ {
IdxT alpha; IdxT alpha;
int const cn = dst.channels(); int const cn = dst.channels();
@ -3809,15 +3812,11 @@ public:
} }
} }
void hori_antialias_lines(T const** srcptr, WT** dstptr, IdxT* L, int count) const void horiAntialiasLines(T const** srcptr, WT** dstptr, IdxT* L, int count) const
{ {
int cn = dst.channels(); int cn = dst.channels();
int dwidth = dst.cols * cn; int dwidth = dst.cols * cn;
#ifdef CV_CXX11 bool const same_wt_idxt = std::is_same<WT, IdxT>::value;
constexpr bool same_wt_idxt = std::is_same<WT, IdxT>::value;
#else
bool const same_wt_idxt = false;
#endif
for (int i = 0; i < count; ++i) for (int i = 0; i < count; ++i)
{ {
T const* S = srcptr[i]; T const* S = srcptr[i];
@ -3825,7 +3824,7 @@ public:
if (same_wt_idxt) if (same_wt_idxt)
L = reinterpret_cast<IdxT*>(dstptr[i]); L = reinterpret_cast<IdxT*>(dstptr[i]);
memset(L, 0, sizeof(IdxT) * dwidth); memset(L, 0, sizeof(IdxT) * dwidth);
hori_antialias_accumulate(S, L); horiAntialiasAccumulate(S, L);
if (!same_wt_idxt) if (!same_wt_idxt)
{ {
WT* D = dstptr[i]; WT* D = dstptr[i];
@ -3844,7 +3843,7 @@ public:
} }
} }
void hori_generic_lines(T const** srcptr, WT** dstptr, int count) const void horiGenericLines(T const** srcptr, WT** dstptr, int count) const
{ {
int cn = src.channels(); int cn = src.channels();
int ssize = src.cols * cn; int ssize = src.cols * cn;
@ -3857,53 +3856,7 @@ public:
ssize, dsize, cn, xmin, xmax); ssize, dsize, cn, xmin, xmax);
} }
void vert_antialias_hori_antialias(int dy, IdxT* L, IdxT* A) const void vertAntialias(Range const& range) const
{
// the start and end of ytab
int dwidth = dst.channels() * dst.cols;
int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti;
memset(A, 0, dwidth * sizeof(IdxT));
for (int t = tstart; t < tend; ++t)
{
IdxT beta;
int sy = ctrl.ytab[t].si;
CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong");
ctrl.ytab[t].as(beta);
memset(L, 0, dwidth * sizeof(IdxT));
hori_antialias_accumulate(src.template ptr<T>(sy), L);
for (int w = 0; w < dwidth; ++w)
A[w] += L[w] * beta;
}
T* D = dst.template ptr<T>(dy);
for (int w = 0; w < dwidth; ++w)
D[w] = saturate_cast<T>(A[w]);
}
void vert_antialias_hori_generic(int dy, WT* L, IdxT* A) const
{
// FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS> cast;
int dwidth = dst.channels() * dst.cols;
int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti;
memset(A, 0, dwidth * sizeof(IdxT));
for (int t = tstart; t < tend; ++t)
{
IdxT beta;
int sy = ctrl.ytab[t].si;
CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong");
ctrl.ytab[t].as(beta);
T const* S = src.template ptr<T>(sy);
hori_generic_lines(&S, &L, 1);
if (ctrl.is_fixpt)
beta /= INTER_RESIZE_COEF_SCALE;
for (int w = 0; w < dwidth; ++w)
A[w] += L[w] * beta;
}
T* D = dst.template ptr<T>(dy);
for (int w = 0; w < dwidth; ++w)
D[w] = saturate_cast<T>(A[w]);
}
void vert_antialias(Range const& range) const
{ {
int cn = dst.channels(); int cn = dst.channels();
int dwidth = dst.cols * cn; int dwidth = dst.cols * cn;
@ -3913,14 +3866,38 @@ public:
WT* Lw = reinterpret_cast<WT*>(L); WT* Lw = reinterpret_cast<WT*>(L);
for (int dy = range.start; dy < range.end; ++dy) for (int dy = range.start; dy < range.end; ++dy)
{ {
if (ctrl.xkanti) int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti;
vert_antialias_hori_antialias(dy, L, A); memset(A, 0, dwidth * sizeof(IdxT));
else for (int t = tstart; t < tend; ++t)
vert_antialias_hori_generic(dy, Lw, A); {
IdxT beta;
int sy = ctrl.ytab[t].si;
CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong");
ctrl.ytab[t].as(beta);
T const* S = src.template ptr<T>(sy);
if (ctrl.xkanti)
{
memset(L, 0, dwidth * sizeof(IdxT));
horiAntialiasAccumulate(S, L);
for (int w = 0; w < dwidth; ++w)
A[w] += L[w] * beta;
}
else
{
horiGenericLines(&S, &Lw, 1);
if (ctrl.is_fixpt)
beta /= INTER_RESIZE_COEF_SCALE;
for (int w = 0; w < dwidth; ++w)
A[w] += Lw[w] * beta;
}
}
T* D = dst.template ptr<T>(dy);
for (int w = 0; w < dwidth; ++w)
D[w] = saturate_cast<T>(A[w]);
} }
} }
void vert_generic(Range const& range) const void vertGeneric(Range const& range) const
{ {
int ksize = ctrl.ksize, ksize2 = ksize / 2; int ksize = ctrl.ksize, ksize2 = ksize / 2;
int cn = src.channels(); int cn = src.channels();
@ -3963,9 +3940,9 @@ public:
if (k0 < ksize) if (k0 < ksize)
{ {
if (ctrl.xkanti) if (ctrl.xkanti)
hori_antialias_lines(srows + k0, rows + k0, L, ksize - k0); horiAntialiasLines(srows + k0, rows + k0, L, ksize - k0);
else else
hori_generic_lines(srows + k0, rows + k0, ksize - k0); horiGenericLines(srows + k0, rows + k0, ksize - k0);
} }
vresize(const_cast<WT const**>(rows), dst.template ptr<T>(dy), beta, dwidth); vresize(const_cast<WT const**>(rows), dst.template ptr<T>(dy), beta, dwidth);
} }
@ -3974,9 +3951,9 @@ public:
virtual void operator() (Range const& range) const CV_OVERRIDE virtual void operator() (Range const& range) const CV_OVERRIDE
{ {
if (ctrl.ykanti) if (ctrl.ykanti)
vert_antialias(range); vertAntialias(range);
else else
vert_generic(range); vertGeneric(range);
} }
}; };
@ -4003,7 +3980,7 @@ typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst,
const DecimateAlpha* ytab, int ytab_size, const DecimateAlpha* ytab, int ytab_size,
const int* yofs); const int* yofs);
typedef void (*ResizeOnnxFunc)(Mat const& src, Mat& dst, ResizeOnnxCtrl const&); typedef void (*ResizeOnnxFunc)(Mat const& src, Mat& dst, ResizeOnnxCtrl const& ctrl);
static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab ) static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab )
@ -4517,7 +4494,7 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst,
float* ycoeff = reinterpret_cast<float*>(xcoeff + xstride); float* ycoeff = reinterpret_cast<float*>(xcoeff + xstride);
ocl_resizeOnnxTable(src.cols, dst.cols, pixel_size, ocl_resizeOnnxTable(src.cols, dst.cols, pixel_size,
sampler, M(0, 0), M(0, 1), cubicCoeff, scale.x, xoffset, xcoeff); sampler, M(0, 0), M(0, 1), cubicCoeff, scale.x, xoffset, xcoeff);
ocl_resizeOnnxTable(src.rows, dst.rows, 1, ocl_resizeOnnxTable(src.rows, dst.rows, static_cast<int>(src.step[0]),
sampler, M(1, 0), M(1, 1), cubicCoeff, scale.y, yoffset, ycoeff); sampler, M(1, 0), M(1, 1), cubicCoeff, scale.y, yoffset, ycoeff);
UMat utable; UMat utable;
Mat(1, tabsize, CV_32S, table.data()).copyTo(utable); Mat(1, tabsize, CV_32S, table.data()).copyTo(utable);
@ -5175,6 +5152,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
void cv::resizeOnnx(InputArray _src, OutputArray _dst, void cv::resizeOnnx(InputArray _src, OutputArray _dst,
Size dsize, Point2d scale, int interpolation, float cubicCoeff, Rect2d const& roi) Size dsize, Point2d scale, int interpolation, float cubicCoeff, Rect2d const& roi)
{ {
static_assert((1 << INTER_SAMPLER_BIT) >= INTER_MAX, "");
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
Size ssize = _src.size(); Size ssize = _src.size();
@ -5185,7 +5163,7 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst,
{ {
CV_CheckGT(scale.x, 0.0, "scale must > 0 if no dsize given"); CV_CheckGT(scale.x, 0.0, "scale must > 0 if no dsize given");
CV_CheckGT(scale.y, 0.0, "scale must > 0 if no dsize given"); CV_CheckGT(scale.y, 0.0, "scale must > 0 if no dsize given");
// https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py#L365 // https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py
// output_size = (scale_factors * np.array(data.shape)).astype(int) // output_size = (scale_factors * np.array(data.shape)).astype(int)
dsize.width = static_cast<int>(scale.x * ssize.width ); dsize.width = static_cast<int>(scale.x * ssize.width );
dsize.height = static_cast<int>(scale.y * ssize.height); dsize.height = static_cast<int>(scale.y * ssize.height);
@ -5196,8 +5174,8 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst,
scale.y = static_cast<double>(dsize.height) / ssize.height; scale.y = static_cast<double>(dsize.height) / ssize.height;
} }
CV_CheckFalse(dsize.empty(), "dst size must not empty"); CV_CheckFalse(dsize.empty(), "dst size must not empty");
CV_CheckGT(scale.x, 0.0, "computed scale <= 0 with given dsize"); CV_CheckGT(scale.x, 0.0, "require computed or given scale > 0");
CV_CheckGT(scale.y, 0.0, "computed scale <= 0 with given dsize"); CV_CheckGT(scale.y, 0.0, "require computed or given scale > 0");
int sampler = interpolation & INTER_SAMPLER_MASK; int sampler = interpolation & INTER_SAMPLER_MASK;
int nearest = interpolation & INTER_NEAREST_MODE_MASK; int nearest = interpolation & INTER_NEAREST_MODE_MASK;
@ -5237,6 +5215,9 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst,
_src.copyTo(_dst); _src.copyTo(_dst);
return; return;
} }
// Antialias is applied when downsampling
if (scale.x >= 1.0 && scale.y >= 1.0)
interpolation &= ~INTER_ANTIALIAS_MASK;
// Fake reference to source. Resolves issue 13577 in case of src == dst. // Fake reference to source. Resolves issue 13577 in case of src == dst.
UMat srcUMat; UMat srcUMat;

View File

@ -338,18 +338,18 @@ OCL_TEST(Resize, overflow_21198)
PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int) PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int)
{ {
int type, interpolation; int depth, interpolation;
int widthMultiple; int widthMultiple;
double fx, fy; double fx, fy;
bool useRoi; bool useRoi;
Mat middle;
TEST_DECLARE_INPUT_PARAMETER(src); Rect src_loc, dst_loc;
TEST_DECLARE_OUTPUT_PARAMETER(dst); Mat src, dst, src_roi, dst_roi;
UMat gsrc, gdst, gsrc_roi, gdst_roi;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); depth = GET_PARAM(0);
fx = GET_PARAM(1); fx = GET_PARAM(1);
fy = GET_PARAM(2); fy = GET_PARAM(2);
interpolation = GET_PARAM(3); interpolation = GET_PARAM(3);
@ -357,96 +357,70 @@ PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int)
widthMultiple = GET_PARAM(5); widthMultiple = GET_PARAM(5);
} }
void random_roi() void random_submat(int type,
Size& size, Rect& roi, Mat& mat, Mat& sub, UMat& gmat, UMat& gsub)
{ {
CV_Assert(fx > 0 && fy > 0); int border = useRoi ? 65 : 0;
roi.x = randomInt(0, border);
roi.y = randomInt(0, border);
roi.width = size.width;
roi.height = size.height;
size.width += roi.x + randomInt(0, border);
size.height += roi.y + randomInt(0, border);
mat = randomMat(size, type, -127, 127);
mat.copyTo(gmat);
sub = mat(roi);
gsub = gmat(roi);
}
Size srcRoiSize = randomSize(10, MAX_VALUE), dstRoiSize; void random_roi(int type)
// Make sure the width is a multiple of the requested value, and no more {
srcRoiSize.width += widthMultiple - 1 - (srcRoiSize.width - 1) % widthMultiple; Size srcSize, dstSize;
dstRoiSize.width = cvRound(srcRoiSize.width * fx); int minSize = min(fx, fy) < 1.0 ? 10 : 1;
dstRoiSize.height = cvRound(srcRoiSize.height * fy); while (dstSize.empty())
if (dstRoiSize.empty())
{ {
random_roi(); srcSize = randomSize(minSize, 129);
return; srcSize.width += widthMultiple - 1 - (srcSize.width - 1) % widthMultiple;
dstSize.width = cvRound(srcSize.width * fx);
dstSize.height = cvRound(srcSize.height * fy);
} }
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); random_submat(type, srcSize, src_loc, src, src_roi, gsrc, gsrc_roi);
randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); random_submat(type, dstSize, dst_loc, dst, dst_roi, gdst, gdst_roi);
#if 0
// if nearest test failed, maybe the fma issue, try open this #if
// set pixels' value to their coordinate
if ((interpolation & INTER_SAMPLER_MASK) == INTER_NEAREST)
{
int channel = CV_MAT_CN(type);
middle.create(src.rows, src.cols, CV_16SC(channel));
for (int h = 0; h < src.rows; ++h)
{
for (int c = 0; c < channel; c += 2)
{
// even x; odd y
short* S = middle.ptr<short>(h) + c;
for (int w = 0; w < src.cols; ++w, S += channel)
S[0] = static_cast<short>(w);
}
for (int c = 1; c < channel; c += 2)
{
// even x; odd y
short* S = middle.ptr<short>(h) + c;
for (int w = 0; w < src.cols; ++w, S += channel)
S[0] = static_cast<short>(h);
}
}
middle.convertTo(src, type);
src_roi = src(Rect(srcBorder.lef, srcBorder.top, srcRoiSize.width, srcRoiSize.height));
}
#endif
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
} }
}; };
OCL_TEST_P(ResizeOnnx, Mat) OCL_TEST_P(ResizeOnnx, Mat)
{ {
Size whole;
Point offset;
Mat host, host_roi; Mat host, host_roi;
int cn = CV_MAT_CN(type);
int depth = CV_MAT_DEPTH(type);
double eps = depth <= CV_32S ? integerEps : 5e-2; double eps = depth <= CV_32S ? integerEps : 5e-2;
for (int j = 0; j < test_loop_times; j++) // loop on channel to reduce the number of test
for (int cn = 1; cn <= 6; ++cn)
{ {
random_roi(); int type = CV_MAKETYPE(depth, cn);
for (int j = 0; j < test_loop_times; ++j)
OCL_OFF(cv::resizeOnnx(src_roi, dst_roi,
dst_roi.size(), Point2d(fx, fy), interpolation));
OCL_ON(cv::resizeOnnx(usrc_roi, udst_roi,
dst_roi.size(), Point2d(fx, fy), interpolation));
dst_roi.locateROI(whole, offset);
udst.copyTo(host);
host_roi = host(Rect(offset, dst_roi.size()));
if (cn <= 4 && depth != CV_8S && depth != CV_32S)
OCL_EXPECT_MAT_N_DIFF(dst, eps);
else
{ {
// more strict than OCL_EXPECT_MAT_N_DIFF random_roi(type);
double dif = cv::norm(dst_roi, host_roi, NORM_INF);
EXPECT_LE(dif, eps) OCL_OFF(cv::resizeOnnx(src_roi, dst_roi,
<< "Size: " << src_roi.size() dst_roi.size(), Point2d(fx, fy), interpolation));
<< ", NormInf: " << dif << std::endl; OCL_ON(cv::resizeOnnx(gsrc_roi, gdst_roi,
dst_roi.size(), Point2d(fx, fy), interpolation));
// copy whole gdst to make sure that
// we really use the given roi memory and not allocate a new one
gdst.copyTo(host);
host_roi = host(dst_loc);
string info = cv::format(
"fail on type %sC%d src %dx%d dst %dx%d src_roi %dx%d dst_roi %dx%d",
depthToString(depth), cn, src.cols, src.rows, dst.cols, dst.rows,
src_roi.cols, src_roi.rows, dst_roi.cols, dst_roi.rows);
EXPECT_LE(cv::norm(dst_roi, host_roi, NORM_INF), eps) << info;
} }
} }
} }
///////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////
// remap // remap
@ -689,23 +663,18 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine(
Bool(), Bool(),
Values(1, 16))); Values(1, 16)));
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAlias, ResizeOnnx, Combine( OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, ResizeOnnx, Combine(
Values( Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F),
CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7), Values(0.4, 0.27, 1.6),
CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10), Values(0.5, 0.71, 2.7),
CV_32FC1, CV_32FC4, CV_32FC(11)),
Values(0.5, 0.31, 1.4),
Values(0.5, 0.73, 3.7),
Values((int)(INTER_LINEAR), (int)(INTER_CUBIC)), Values((int)(INTER_LINEAR), (int)(INTER_CUBIC)),
Bool(), Bool(),
Values(1, 16))); Values(1, 16)));
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine( OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine(
Values( Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F),
CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7), Values(0.4, 0.27, 1.6),
CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10), Values(0.5, 0.71, 2.7),
CV_32FC1, CV_32FC4, CV_32FC(11)),
Values(0.5, 0.27, 2.6),
Values(0.5, 0.71, 4.1),
Values( Values(
(int)(INTER_ANTIALIAS | INTER_LINEAR), (int)(INTER_ANTIALIAS | INTER_LINEAR),
(int)(INTER_ANTIALIAS | INTER_CUBIC )), (int)(INTER_ANTIALIAS | INTER_CUBIC )),
@ -713,12 +682,9 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine(
Values(1, 16))); Values(1, 16)));
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpNearest, ResizeOnnx, Combine( OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpNearest, ResizeOnnx, Combine(
Values( Values(CV_8S, CV_16S, CV_32F, CV_64F),
CV_8UC1, CV_8SC2, CV_8UC4, CV_8SC(7), Values(0.4, 0.27, 1.6),
CV_16UC1, CV_16SC3, CV_16UC(9), CV_32SC(10), Values(0.5, 0.71, 2.7),
CV_32FC1, CV_32FC4, CV_32FC(11)),
Values(0.5, 0.27, 2.6),
Values(0.5, 0.71, 4.1),
Values( Values(
(int)(INTER_NEAREST | INTER_NEAREST_PREFER_FLOOR), (int)(INTER_NEAREST | INTER_NEAREST_PREFER_FLOOR),
(int)(INTER_NEAREST | INTER_NEAREST_PREFER_CEIL), (int)(INTER_NEAREST | INTER_NEAREST_PREFER_CEIL),

View File

@ -43,10 +43,10 @@ struct ResizeOnnx
Mat iS(szsrc, CV_64F, insrc.data()); Mat iS(szsrc, CV_64F, insrc.data());
Mat iR(szref, CV_64F, inref.data()); Mat iR(szref, CV_64F, inref.data());
Mat S = iS, R = iR, nS, nR; Mat S = iS, R = iR, nS, nR;
double alpha[6] = {1, 1, 5, 5, -1, -3}; double alpha[8] = {1, -1, 5, 5, 0, -3, -2, +4};
double beta[6] = {0, 7, 0, 7, +0, -7}; double beta[8] = {0, -0, 0, 7, 7, -7, -6, +6};
RNG rng; RNG rng;
for (int cn = 1; cn <= 6; ++cn) for (int cn = 1; cn <= 8; ++cn)
{ {
if (cn > 1) if (cn > 1)
{ {
@ -59,7 +59,7 @@ struct ResizeOnnx
{ {
double eps = (depth <= CV_32S) ? 1.0 : 1e-3; double eps = (depth <= CV_32S) ? 1.0 : 1e-3;
int type = CV_MAKETYPE(depth, cn); int type = CV_MAKETYPE(depth, cn);
string errinfo = "failed on type " + typeToString(type); string errinfo = "fail on type " + typeToString(type);
Mat src, ref, dst; Mat src, ref, dst;
rand_roi(rng, src, szsrc, type); rand_roi(rng, src, szsrc, type);
if (szdst.area()) if (szdst.area())