cuda resize onnx done

This commit is contained in:
Ginkgo 2024-06-10 13:50:54 +08:00
parent 379c16e106
commit 358b64ad0e
6 changed files with 168 additions and 226 deletions

View File

@ -458,12 +458,12 @@ CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)

View File

@ -281,14 +281,11 @@ enum InterpolationFlags {
//! ONNX Resize Flags
enum ResizeONNXFlags
{
// static_assert((1 << INTER_COORDINATE_SHIFT) > INTER_MAX, "");
// https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize
INTER_SAMPLER_SHIFT = 0,
INTER_SAMPLER_BIT = 4,
INTER_SAMPLER_BIT = 3,
INTER_SAMPLER_MASK = ((1 << INTER_SAMPLER_BIT) - 1) << INTER_SAMPLER_SHIFT,
INTER_COORDINATE_SHIFT = 4,
INTER_COORDINATE_SHIFT = INTER_SAMPLER_SHIFT + INTER_SAMPLER_BIT,
INTER_COORDINATE_BIT = 3,
INTER_COORDINATE_MASK = ((1 << INTER_COORDINATE_BIT) - 1) << INTER_COORDINATE_SHIFT,
/** x_original = (x_resized + 0.5) / scale - 0.5 */
@ -305,8 +302,8 @@ enum ResizeONNXFlags
/** x_original = x_resized / scale */
INTER_ASYMMETRIC = 4 << INTER_COORDINATE_SHIFT,
/** x_original = length_resized > 1
* ? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1)
* : 0.5 * (start_x + end_x) * (length_original - 1) */
? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1)
: 0.5 * (start_x + end_x) * (length_original - 1) */
INTER_TF_CROP_RESIZE = 5 << INTER_COORDINATE_SHIFT,
INTER_NEAREST_MODE_SHIFT = INTER_COORDINATE_SHIFT + INTER_COORDINATE_BIT,
@ -2468,12 +2465,12 @@ CV_EXPORTS_W void resize( InputArray src, OutputArray dst,
int interpolation = INTER_LINEAR );
/** @brief onnx resize op
https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize
https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py
Not support `exclude_outside` and `extrapolation_value` yet.
To get a similar result to resize, give dsize and:
To get a similar result to `cv::resize`, give dsize and:
INTER_NEAREST : ASYMMETRIC + NEAREST_FLOOR
INTER_LINEAR : HALF_PIXEL
INTER_CUBIC : HALF_PIXEL + cubicCoeff(-0.75)
@ -2490,9 +2487,8 @@ To get a similar result to resize, give dsize and:
@sa resize
*/
CV_EXPORTS_W void resizeOnnx(
InputArray src, OutputArray dst, Size dsize, Point2d scale = Point2d(),
int interpolation = INTER_LINEAR | INTER_HALF_PIXEL,
CV_EXPORTS_W void resizeOnnx(InputArray src, OutputArray dst, Size dsize,
Point2d scale = Point2d(), int interpolation = INTER_LINEAR | INTER_HALF_PIXEL,
float cubicCoeff = -0.75f, Rect2d const& roi = Rect2d());
/** @brief Applies an affine transformation to an image.

View File

@ -67,22 +67,20 @@ __kernel void resizeOnnx_nearest(
#if PIXEL_SIZE == 1
*D = *S;
#elif PIXEL_SIZE == 2 || PIXEL_SIZE == 4 || PIXEL_SIZE == 8 || PIXEL_SIZE == 16
*(__global VT*)(D) = *(__global const VT*)(S);
#elif PIXEL_SIZE == 2
*(__global ushort*)(D) = *(__global const ushort*)(S);
#elif PIXEL_SIZE == 3
vstore3(vload3(0, S), 0, D);
#elif PIXEL_SIZE == 4
*(__global uint*)(D) = *(__global const uint*)(S);
#elif PIXEL_SIZE == 6
vstore3(vload3(0, (__global ushort const*)(S)), 0, (__global ushort*)(D));
#elif PIXEL_SIZE == 8
*(__global uint2*)(D) = *(__global const uint2*)(S);
#elif PIXEL_SIZE == 12
vstore3(vload3(0, (__global const uint*)(S)), 0, (__global uint*)(D));
#elif PIXEL_SIZE == 24
vstore3(vload3(0, (__global ulong const*)(S)), 0, (__global ulong*)(D));
#elif PIXEL_SIZE == 32
*(__global uint8*)(D) = *(__global uint8 const*)(S);
#elif PIXEL_SIZE == 64
*(__global uint16*)(D) = *(__global uint16 const*)(S);
#elif PIXEL_SIZE == 128
*(__global ulong16*)(D) = *(__global ulong16 const*)(S);
#elif PIXEL_SIZE == 16
*(__global uint4*)(D) = *(__global const uint4*)(S);
#else
for (int i = 0; i < pixel_size; ++i)
D[i] = S[i];
@ -114,7 +112,7 @@ __kernel void resizeOnnx_linear(
__global uchar const* S2 = srcptr + (y1 * src_step + mad24(x0, pixel_size, src_offset));
__global uchar const* S3 = srcptr + (y1 * src_step + mad24(x1, pixel_size, src_offset));
__global uchar * D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset));
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16
#if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW s0 = TO_VEC_WORK(loadpix(S0)); VW s1 = TO_VEC_WORK(loadpix(S1));
VW s2 = TO_VEC_WORK(loadpix(S2)); VW s3 = TO_VEC_WORK(loadpix(S3));
VT d0 = TO_VEC_TYPE((u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3);
@ -154,7 +152,7 @@ __kernel void resizeOnnx_linear_antialias(
int ix = convert_int_rtn(fx), iy = convert_int_rtn(fy);
float rx = fx - ix, ry = fy - iy;
__global uchar* D = dstptr + dy * dst_step + mad24(dx, pixel_size, dst_offset);
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16
#if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW sumval = (VW)(0);
float weight = 0;
for (int h = ystart; h < yend; ++h)
@ -266,7 +264,7 @@ __kernel void resizeOnnx_cubic(
xcoeff [x - xstart] = cubicCoeff(A, A2, A3, x - fx);
}
__global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset));
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16
#if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW sum = (VW)(0);
for (int y = ystart; y <= ylimit; ++y)
{
@ -322,13 +320,14 @@ __kernel void resizeOnnx_table(
__global int const* yoffset = xoffset + xstride;
__global float const* xcoeff = (__global float const*)(yoffset + ystride);
__global float const* ycoeff = (__global float const*)(xcoeff + xstride);
#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16
#if CN == 1 || CN == 2 || CN == 3 || CN == 4
VW sum = (VW)(0);
// exact ykanti / xkanti loops
for (int y = dy; y < ystride; y += dst_rows)
{
// offset is already clamped. xoffset is given by uchar
__global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset);
// offset is already clamped
// xoffset is given by uchar, yoffset already multiply by src_step
__global const uchar* S = srcptr + yoffset[y] + src_offset;
VW sline = (VW)(0);
for (int x = dx; x < xstride; x += dst_cols)
sline += xcoeff[x] * TO_VEC_WORK(loadpix(S + xoffset[x]));
@ -341,7 +340,7 @@ __kernel void resizeOnnx_table(
W sum = 0;
for (int y = dy; y < ystride; y += dst_rows)
{
__global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset);
__global const uchar* S = (srcptr + yoffset[y] + src_offset);
W sline = 0;
for (int x = dx; x < xstride; x += dst_cols)
sline += xcoeff[x] * TO_WORK(((__global T const*)(S + xoffset[x]))[i]);

View File

@ -1356,36 +1356,36 @@ public:
break;
case 2:
for (; x < width; ++x)
reinterpret_cast<short*>(D)[x] = *(reinterpret_cast<short const*>(S + x_ofs[x]));
reinterpret_cast<ushort*>(D)[x] = *(reinterpret_cast<ushort const*>(S + x_ofs[x]));
break;
case 3:
for (; x < width; ++x, D += 3)
{
const uchar* _tS = S + x_ofs[x];
uchar const* _tS = S + x_ofs[x];
D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2];
}
break;
case 4:
for (; x < width; ++x)
reinterpret_cast<int*>(D)[x] = *(reinterpret_cast<int const*>(S + x_ofs[x]));
reinterpret_cast<uint*>(D)[x] = *(reinterpret_cast<uint const*>(S + x_ofs[x]));
break;
case 6:
for (; x < width; ++x, D += 6)
{
short const* _tS = reinterpret_cast<short const*>(S + x_ofs[x]);
short* _tD = reinterpret_cast<short*>(D);
ushort const* _tS = reinterpret_cast<ushort const*>(S + x_ofs[x]);
ushort* _tD = reinterpret_cast<ushort*>(D);
_tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
}
break;
case 8:
for (; x < width; ++x)
reinterpret_cast<int64*>(D)[x] = *(reinterpret_cast<int64 const*>(S + x_ofs[x]));
reinterpret_cast<uint64*>(D)[x] = *(reinterpret_cast<uint64 const*>(S + x_ofs[x]));
break;
case 12:
for (; x < width; ++x, D += 12)
{
int const* _tS = reinterpret_cast<int const*>(S + x_ofs[x]);
int* _tD = reinterpret_cast<int*>(D);
uint const* _tS = reinterpret_cast<uint const*>(S + x_ofs[x]);
uint* _tD = reinterpret_cast<uint*>(D);
_tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
}
break;
@ -3465,7 +3465,8 @@ public:
int xmin, xmax;
private:
void cubic_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, float A, TabIdx* elem)
void cubicCoeffsAntiAlias(
int dstlen, int cn, float srcpos, float scale, int srclen, float A, TabIdx* elem)
{
scale = min(scale, 1.f);
int index = cvFloor(srcpos);
@ -3497,7 +3498,7 @@ private:
}
}
void cubic_coeffs(float x, float A, float* coeffs)
void cubicCoeffs(float x, float A, float* coeffs)
{
coeffs[0] = A * ((((x + 1) - 5) * (x + 1) + 8) * (x + 1) - 4);
coeffs[1] = ((A + 2) * x - (A + 3)) * x * x + 1;
@ -3505,7 +3506,8 @@ private:
coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
}
void linear_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, TabIdx* elem)
void linearCoeffsAntialias(
int dstlen, int cn, float srcpos, float scale, int srclen, TabIdx* elem)
{
scale = min(scale, 1.f);
int index = cvFloor(srcpos);
@ -3532,7 +3534,7 @@ private:
}
}
void linear_coeffs(float x, float* coeffs)
void linearCoeffs(float x, float* coeffs)
{
coeffs[0] = 1.f - x;
coeffs[1] = x;
@ -3570,16 +3572,17 @@ private:
area.commit();
CV_CheckLE(ksize, MAX_ESIZE, "resampler kernel's size is too larger");
if (antialias)
// when upsampling, `antialias` is same to `generic`, so use `generic` to speed up
if (antialias && scaled.x < 1.0)
{
float a = M(0, 0), b = M(0, 1);
for (int d = 0; d < dsize.width; ++d)
{
float f = fmaf(static_cast<float>(d), a, b);
if (sampler == INTER_LINEAR)
linear_coeffs_antialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti);
linearCoeffsAntialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti);
else // if (sampler == INTER_CUBIC)
cubic_coeffs_antialias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti);
cubicCoeffsAntiAlias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti);
}
}
else
@ -3608,9 +3611,9 @@ private:
for (int k = 0; k < cn; ++k)
xofs[cn * d + k] = cn * s + k;
if (sampler == INTER_LINEAR)
linear_coeffs(f, cbuf);
linearCoeffs(f, cbuf);
else // if (sampler == INTER_CUBIC)
cubic_coeffs(f, cubicCoeff, cbuf);
cubicCoeffs(f, cubicCoeff, cbuf);
if (is_fixpt)
{
short* coeffs = reinterpret_cast<short*>(xcoeffs) + cn * ksize * d;
@ -3638,16 +3641,16 @@ private:
}
}
if (antialias)
if (antialias && scaled.y < 1.0)
{
float a = M(1, 0), b = M(1, 1);
for (int d = 0; d < dsize.height; ++d)
{
float f = fmaf(static_cast<float>(d), a, b);
if (sampler == INTER_LINEAR)
linear_coeffs_antialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti);
linearCoeffsAntialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti);
else // if (sampler == INTER_CUBIC)
cubic_coeffs_antialias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti);
cubicCoeffsAntiAlias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti);
}
}
else
@ -3662,9 +3665,9 @@ private:
f -= s;
yofs[d] = s;
if (sampler == INTER_LINEAR)
linear_coeffs(f, cbuf);
linearCoeffs(f, cbuf);
else // if (sampler == INTER_CUBIC)
cubic_coeffs(f, cubicCoeff, cbuf);
cubicCoeffs(f, cubicCoeff, cbuf);
if (is_fixpt)
{
short* coeffs = reinterpret_cast<short*>(ycoeffs) + 1 * ksize * d;
@ -3755,7 +3758,7 @@ public:
"something wrong");
}
void hori_antialias_accumulate(T const* S, IdxT* L) const
void horiAntialiasAccumulate(T const* S, IdxT* L) const
{
IdxT alpha;
int const cn = dst.channels();
@ -3809,15 +3812,11 @@ public:
}
}
void hori_antialias_lines(T const** srcptr, WT** dstptr, IdxT* L, int count) const
void horiAntialiasLines(T const** srcptr, WT** dstptr, IdxT* L, int count) const
{
int cn = dst.channels();
int dwidth = dst.cols * cn;
#ifdef CV_CXX11
constexpr bool same_wt_idxt = std::is_same<WT, IdxT>::value;
#else
bool const same_wt_idxt = false;
#endif
bool const same_wt_idxt = std::is_same<WT, IdxT>::value;
for (int i = 0; i < count; ++i)
{
T const* S = srcptr[i];
@ -3825,7 +3824,7 @@ public:
if (same_wt_idxt)
L = reinterpret_cast<IdxT*>(dstptr[i]);
memset(L, 0, sizeof(IdxT) * dwidth);
hori_antialias_accumulate(S, L);
horiAntialiasAccumulate(S, L);
if (!same_wt_idxt)
{
WT* D = dstptr[i];
@ -3844,7 +3843,7 @@ public:
}
}
void hori_generic_lines(T const** srcptr, WT** dstptr, int count) const
void horiGenericLines(T const** srcptr, WT** dstptr, int count) const
{
int cn = src.channels();
int ssize = src.cols * cn;
@ -3857,53 +3856,7 @@ public:
ssize, dsize, cn, xmin, xmax);
}
void vert_antialias_hori_antialias(int dy, IdxT* L, IdxT* A) const
{
// the start and end of ytab
int dwidth = dst.channels() * dst.cols;
int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti;
memset(A, 0, dwidth * sizeof(IdxT));
for (int t = tstart; t < tend; ++t)
{
IdxT beta;
int sy = ctrl.ytab[t].si;
CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong");
ctrl.ytab[t].as(beta);
memset(L, 0, dwidth * sizeof(IdxT));
hori_antialias_accumulate(src.template ptr<T>(sy), L);
for (int w = 0; w < dwidth; ++w)
A[w] += L[w] * beta;
}
T* D = dst.template ptr<T>(dy);
for (int w = 0; w < dwidth; ++w)
D[w] = saturate_cast<T>(A[w]);
}
void vert_antialias_hori_generic(int dy, WT* L, IdxT* A) const
{
// FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS> cast;
int dwidth = dst.channels() * dst.cols;
int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti;
memset(A, 0, dwidth * sizeof(IdxT));
for (int t = tstart; t < tend; ++t)
{
IdxT beta;
int sy = ctrl.ytab[t].si;
CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong");
ctrl.ytab[t].as(beta);
T const* S = src.template ptr<T>(sy);
hori_generic_lines(&S, &L, 1);
if (ctrl.is_fixpt)
beta /= INTER_RESIZE_COEF_SCALE;
for (int w = 0; w < dwidth; ++w)
A[w] += L[w] * beta;
}
T* D = dst.template ptr<T>(dy);
for (int w = 0; w < dwidth; ++w)
D[w] = saturate_cast<T>(A[w]);
}
void vert_antialias(Range const& range) const
void vertAntialias(Range const& range) const
{
int cn = dst.channels();
int dwidth = dst.cols * cn;
@ -3913,14 +3866,38 @@ public:
WT* Lw = reinterpret_cast<WT*>(L);
for (int dy = range.start; dy < range.end; ++dy)
{
if (ctrl.xkanti)
vert_antialias_hori_antialias(dy, L, A);
else
vert_antialias_hori_generic(dy, Lw, A);
int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti;
memset(A, 0, dwidth * sizeof(IdxT));
for (int t = tstart; t < tend; ++t)
{
IdxT beta;
int sy = ctrl.ytab[t].si;
CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong");
ctrl.ytab[t].as(beta);
T const* S = src.template ptr<T>(sy);
if (ctrl.xkanti)
{
memset(L, 0, dwidth * sizeof(IdxT));
horiAntialiasAccumulate(S, L);
for (int w = 0; w < dwidth; ++w)
A[w] += L[w] * beta;
}
else
{
horiGenericLines(&S, &Lw, 1);
if (ctrl.is_fixpt)
beta /= INTER_RESIZE_COEF_SCALE;
for (int w = 0; w < dwidth; ++w)
A[w] += Lw[w] * beta;
}
}
T* D = dst.template ptr<T>(dy);
for (int w = 0; w < dwidth; ++w)
D[w] = saturate_cast<T>(A[w]);
}
}
void vert_generic(Range const& range) const
void vertGeneric(Range const& range) const
{
int ksize = ctrl.ksize, ksize2 = ksize / 2;
int cn = src.channels();
@ -3963,9 +3940,9 @@ public:
if (k0 < ksize)
{
if (ctrl.xkanti)
hori_antialias_lines(srows + k0, rows + k0, L, ksize - k0);
horiAntialiasLines(srows + k0, rows + k0, L, ksize - k0);
else
hori_generic_lines(srows + k0, rows + k0, ksize - k0);
horiGenericLines(srows + k0, rows + k0, ksize - k0);
}
vresize(const_cast<WT const**>(rows), dst.template ptr<T>(dy), beta, dwidth);
}
@ -3974,9 +3951,9 @@ public:
virtual void operator() (Range const& range) const CV_OVERRIDE
{
if (ctrl.ykanti)
vert_antialias(range);
vertAntialias(range);
else
vert_generic(range);
vertGeneric(range);
}
};
@ -4003,7 +3980,7 @@ typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst,
const DecimateAlpha* ytab, int ytab_size,
const int* yofs);
typedef void (*ResizeOnnxFunc)(Mat const& src, Mat& dst, ResizeOnnxCtrl const&);
typedef void (*ResizeOnnxFunc)(Mat const& src, Mat& dst, ResizeOnnxCtrl const& ctrl);
static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab )
@ -4517,7 +4494,7 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst,
float* ycoeff = reinterpret_cast<float*>(xcoeff + xstride);
ocl_resizeOnnxTable(src.cols, dst.cols, pixel_size,
sampler, M(0, 0), M(0, 1), cubicCoeff, scale.x, xoffset, xcoeff);
ocl_resizeOnnxTable(src.rows, dst.rows, 1,
ocl_resizeOnnxTable(src.rows, dst.rows, static_cast<int>(src.step[0]),
sampler, M(1, 0), M(1, 1), cubicCoeff, scale.y, yoffset, ycoeff);
UMat utable;
Mat(1, tabsize, CV_32S, table.data()).copyTo(utable);
@ -5175,6 +5152,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
void cv::resizeOnnx(InputArray _src, OutputArray _dst,
Size dsize, Point2d scale, int interpolation, float cubicCoeff, Rect2d const& roi)
{
static_assert((1 << INTER_SAMPLER_BIT) >= INTER_MAX, "");
CV_INSTRUMENT_REGION();
Size ssize = _src.size();
@ -5185,7 +5163,7 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst,
{
CV_CheckGT(scale.x, 0.0, "scale must > 0 if no dsize given");
CV_CheckGT(scale.y, 0.0, "scale must > 0 if no dsize given");
// https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py#L365
// https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py
// output_size = (scale_factors * np.array(data.shape)).astype(int)
dsize.width = static_cast<int>(scale.x * ssize.width );
dsize.height = static_cast<int>(scale.y * ssize.height);
@ -5196,8 +5174,8 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst,
scale.y = static_cast<double>(dsize.height) / ssize.height;
}
CV_CheckFalse(dsize.empty(), "dst size must not empty");
CV_CheckGT(scale.x, 0.0, "computed scale <= 0 with given dsize");
CV_CheckGT(scale.y, 0.0, "computed scale <= 0 with given dsize");
CV_CheckGT(scale.x, 0.0, "require computed or given scale > 0");
CV_CheckGT(scale.y, 0.0, "require computed or given scale > 0");
int sampler = interpolation & INTER_SAMPLER_MASK;
int nearest = interpolation & INTER_NEAREST_MODE_MASK;
@ -5237,6 +5215,9 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst,
_src.copyTo(_dst);
return;
}
// Antialias is applied when downsampling
if (scale.x >= 1.0 && scale.y >= 1.0)
interpolation &= ~INTER_ANTIALIAS_MASK;
// Fake reference to source. Resolves issue 13577 in case of src == dst.
UMat srcUMat;

View File

@ -338,18 +338,18 @@ OCL_TEST(Resize, overflow_21198)
PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int)
{
int type, interpolation;
int depth, interpolation;
int widthMultiple;
double fx, fy;
bool useRoi;
Mat middle;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst);
Rect src_loc, dst_loc;
Mat src, dst, src_roi, dst_roi;
UMat gsrc, gdst, gsrc_roi, gdst_roi;
virtual void SetUp()
{
type = GET_PARAM(0);
depth = GET_PARAM(0);
fx = GET_PARAM(1);
fy = GET_PARAM(2);
interpolation = GET_PARAM(3);
@ -357,96 +357,70 @@ PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int)
widthMultiple = GET_PARAM(5);
}
void random_roi()
void random_submat(int type,
Size& size, Rect& roi, Mat& mat, Mat& sub, UMat& gmat, UMat& gsub)
{
CV_Assert(fx > 0 && fy > 0);
int border = useRoi ? 65 : 0;
roi.x = randomInt(0, border);
roi.y = randomInt(0, border);
roi.width = size.width;
roi.height = size.height;
size.width += roi.x + randomInt(0, border);
size.height += roi.y + randomInt(0, border);
mat = randomMat(size, type, -127, 127);
mat.copyTo(gmat);
sub = mat(roi);
gsub = gmat(roi);
}
Size srcRoiSize = randomSize(10, MAX_VALUE), dstRoiSize;
// Make sure the width is a multiple of the requested value, and no more
srcRoiSize.width += widthMultiple - 1 - (srcRoiSize.width - 1) % widthMultiple;
dstRoiSize.width = cvRound(srcRoiSize.width * fx);
dstRoiSize.height = cvRound(srcRoiSize.height * fy);
if (dstRoiSize.empty())
void random_roi(int type)
{
Size srcSize, dstSize;
int minSize = min(fx, fy) < 1.0 ? 10 : 1;
while (dstSize.empty())
{
random_roi();
return;
srcSize = randomSize(minSize, 129);
srcSize.width += widthMultiple - 1 - (srcSize.width - 1) % widthMultiple;
dstSize.width = cvRound(srcSize.width * fx);
dstSize.height = cvRound(srcSize.height * fy);
}
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
#if 0
// if nearest test failed, maybe the fma issue, try open this #if
// set pixels' value to their coordinate
if ((interpolation & INTER_SAMPLER_MASK) == INTER_NEAREST)
{
int channel = CV_MAT_CN(type);
middle.create(src.rows, src.cols, CV_16SC(channel));
for (int h = 0; h < src.rows; ++h)
{
for (int c = 0; c < channel; c += 2)
{
// even x; odd y
short* S = middle.ptr<short>(h) + c;
for (int w = 0; w < src.cols; ++w, S += channel)
S[0] = static_cast<short>(w);
}
for (int c = 1; c < channel; c += 2)
{
// even x; odd y
short* S = middle.ptr<short>(h) + c;
for (int w = 0; w < src.cols; ++w, S += channel)
S[0] = static_cast<short>(h);
}
}
middle.convertTo(src, type);
src_roi = src(Rect(srcBorder.lef, srcBorder.top, srcRoiSize.width, srcRoiSize.height));
}
#endif
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
random_submat(type, srcSize, src_loc, src, src_roi, gsrc, gsrc_roi);
random_submat(type, dstSize, dst_loc, dst, dst_roi, gdst, gdst_roi);
}
};
OCL_TEST_P(ResizeOnnx, Mat)
{
Size whole;
Point offset;
Mat host, host_roi;
int cn = CV_MAT_CN(type);
int depth = CV_MAT_DEPTH(type);
double eps = depth <= CV_32S ? integerEps : 5e-2;
for (int j = 0; j < test_loop_times; j++)
// loop on channel to reduce the number of test
for (int cn = 1; cn <= 6; ++cn)
{
random_roi();
OCL_OFF(cv::resizeOnnx(src_roi, dst_roi,
dst_roi.size(), Point2d(fx, fy), interpolation));
OCL_ON(cv::resizeOnnx(usrc_roi, udst_roi,
dst_roi.size(), Point2d(fx, fy), interpolation));
dst_roi.locateROI(whole, offset);
udst.copyTo(host);
host_roi = host(Rect(offset, dst_roi.size()));
if (cn <= 4 && depth != CV_8S && depth != CV_32S)
OCL_EXPECT_MAT_N_DIFF(dst, eps);
else
int type = CV_MAKETYPE(depth, cn);
for (int j = 0; j < test_loop_times; ++j)
{
// more strict than OCL_EXPECT_MAT_N_DIFF
double dif = cv::norm(dst_roi, host_roi, NORM_INF);
EXPECT_LE(dif, eps)
<< "Size: " << src_roi.size()
<< ", NormInf: " << dif << std::endl;
random_roi(type);
OCL_OFF(cv::resizeOnnx(src_roi, dst_roi,
dst_roi.size(), Point2d(fx, fy), interpolation));
OCL_ON(cv::resizeOnnx(gsrc_roi, gdst_roi,
dst_roi.size(), Point2d(fx, fy), interpolation));
// copy whole gdst to make sure that
// we really use the given roi memory and not allocate a new one
gdst.copyTo(host);
host_roi = host(dst_loc);
string info = cv::format(
"fail on type %sC%d src %dx%d dst %dx%d src_roi %dx%d dst_roi %dx%d",
depthToString(depth), cn, src.cols, src.rows, dst.cols, dst.rows,
src_roi.cols, src_roi.rows, dst_roi.cols, dst_roi.rows);
EXPECT_LE(cv::norm(dst_roi, host_roi, NORM_INF), eps) << info;
}
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// remap
@ -689,23 +663,18 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine(
Bool(),
Values(1, 16)));
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAlias, ResizeOnnx, Combine(
Values(
CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7),
CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10),
CV_32FC1, CV_32FC4, CV_32FC(11)),
Values(0.5, 0.31, 1.4),
Values(0.5, 0.73, 3.7),
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, ResizeOnnx, Combine(
Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F),
Values(0.4, 0.27, 1.6),
Values(0.5, 0.71, 2.7),
Values((int)(INTER_LINEAR), (int)(INTER_CUBIC)),
Bool(),
Values(1, 16)));
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine(
Values(
CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7),
CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10),
CV_32FC1, CV_32FC4, CV_32FC(11)),
Values(0.5, 0.27, 2.6),
Values(0.5, 0.71, 4.1),
Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F),
Values(0.4, 0.27, 1.6),
Values(0.5, 0.71, 2.7),
Values(
(int)(INTER_ANTIALIAS | INTER_LINEAR),
(int)(INTER_ANTIALIAS | INTER_CUBIC )),
@ -713,12 +682,9 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine(
Values(1, 16)));
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpNearest, ResizeOnnx, Combine(
Values(
CV_8UC1, CV_8SC2, CV_8UC4, CV_8SC(7),
CV_16UC1, CV_16SC3, CV_16UC(9), CV_32SC(10),
CV_32FC1, CV_32FC4, CV_32FC(11)),
Values(0.5, 0.27, 2.6),
Values(0.5, 0.71, 4.1),
Values(CV_8S, CV_16S, CV_32F, CV_64F),
Values(0.4, 0.27, 1.6),
Values(0.5, 0.71, 2.7),
Values(
(int)(INTER_NEAREST | INTER_NEAREST_PREFER_FLOOR),
(int)(INTER_NEAREST | INTER_NEAREST_PREFER_CEIL),

View File

@ -43,10 +43,10 @@ struct ResizeOnnx
Mat iS(szsrc, CV_64F, insrc.data());
Mat iR(szref, CV_64F, inref.data());
Mat S = iS, R = iR, nS, nR;
double alpha[6] = {1, 1, 5, 5, -1, -3};
double beta[6] = {0, 7, 0, 7, +0, -7};
double alpha[8] = {1, -1, 5, 5, 0, -3, -2, +4};
double beta[8] = {0, -0, 0, 7, 7, -7, -6, +6};
RNG rng;
for (int cn = 1; cn <= 6; ++cn)
for (int cn = 1; cn <= 8; ++cn)
{
if (cn > 1)
{
@ -59,7 +59,7 @@ struct ResizeOnnx
{
double eps = (depth <= CV_32S) ? 1.0 : 1e-3;
int type = CV_MAKETYPE(depth, cn);
string errinfo = "failed on type " + typeToString(type);
string errinfo = "fail on type " + typeToString(type);
Mat src, ref, dst;
rand_roi(rng, src, szsrc, type);
if (szdst.area())