From 6dd93a82edec0870d53020d961c2a5e5ce56e1af Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Sat, 1 Jun 2024 16:04:36 +0800 Subject: [PATCH 01/12] cpu + ocl resize onnx done - rebase to 4.x - squash commit history due to so many conflicts --- modules/imgproc/include/opencv2/imgproc.hpp | 89 +- modules/imgproc/src/opencl/resize_onnx.cl | 360 ++++++ modules/imgproc/src/resize.cpp | 1217 ++++++++++++++++++- modules/imgproc/test/ocl/test_warp.cpp | 154 ++- modules/imgproc/test/test_resize_onnx.cpp | 498 ++++++++ modules/ts/include/opencv2/ts/ocl_test.hpp | 4 +- 6 files changed, 2302 insertions(+), 20 deletions(-) create mode 100644 modules/imgproc/src/opencl/resize_onnx.cl create mode 100644 modules/imgproc/test/test_resize_onnx.cpp diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 471a857f63..8e6eecb667 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -245,7 +245,7 @@ enum MorphShapes { //! @{ //! interpolation algorithm -enum InterpolationFlags{ +enum InterpolationFlags { /** nearest neighbor interpolation */ INTER_NEAREST = 0, /** bilinear interpolation */ @@ -278,6 +278,55 @@ enum InterpolationFlags{ WARP_RELATIVE_MAP = 32 }; +//! ONNX Resize Flags +enum ResizeONNXFlags +{ + // static_assert((1 << INTER_COORDINATE_SHIFT) > INTER_MAX, ""); + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize + + INTER_SAMPLER_SHIFT = 0, + INTER_SAMPLER_BIT = 4, + INTER_SAMPLER_MASK = ((1 << INTER_SAMPLER_BIT) - 1) << INTER_SAMPLER_SHIFT, + + INTER_COORDINATE_SHIFT = 4, + INTER_COORDINATE_BIT = 3, + INTER_COORDINATE_MASK = ((1 << INTER_COORDINATE_BIT) - 1) << INTER_COORDINATE_SHIFT, + /** x_original = (x_resized + 0.5) / scale - 0.5 */ + INTER_HALF_PIXEL = 0 << INTER_COORDINATE_SHIFT, + /** adjustment = output_width_int / output_width + center = input_width / 2 + offset = center * (1 - adjustment) + x_ori = offset + (x + 0.5) / scale - 0.5 */ + INTER_HALF_PIXEL_SYMMETRIC = 1 << INTER_COORDINATE_SHIFT, + /** x_original = length_resized > 1 ? (x_resized + 0.5) / scale - 0.5 : 0 */ + INTER_HALF_PIXEL_PYTORCH = 2 << INTER_COORDINATE_SHIFT, + /** x_original = x_resized * (length_original - 1) / (length_resized - 1) */ + INTER_ALIGN_CORNERS = 3 << INTER_COORDINATE_SHIFT, + /** x_original = x_resized / scale */ + INTER_ASYMMETRIC = 4 << INTER_COORDINATE_SHIFT, + /** x_original = length_resized > 1 + * ? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1) + * : 0.5 * (start_x + end_x) * (length_original - 1) */ + INTER_TF_CROP_RESIZE = 5 << INTER_COORDINATE_SHIFT, + + INTER_NEAREST_MODE_SHIFT = INTER_COORDINATE_SHIFT + INTER_COORDINATE_BIT, + INTER_NEAREST_MODE_BIT = 2, + INTER_NEAREST_MODE_MASK = ((1 << INTER_NEAREST_MODE_BIT) - 1) << INTER_NEAREST_MODE_SHIFT, + /** round half down: x = ceil(x - 0.5) */ + INTER_NEAREST_PREFER_FLOOR = 0 << INTER_NEAREST_MODE_SHIFT, + /** round half up : x = floor(x + 0.5) */ + INTER_NEAREST_PREFER_CEIL = 1 << INTER_NEAREST_MODE_SHIFT, + /** x = floor(x) */ + INTER_NEAREST_FLOOR = 2 << INTER_NEAREST_MODE_SHIFT, + /** x = ceil(x) */ + INTER_NEAREST_CEIL = 3 << INTER_NEAREST_MODE_SHIFT, + + INTER_ANTIALIAS_SHIFT = INTER_NEAREST_MODE_SHIFT + INTER_NEAREST_MODE_BIT, + INTER_ANTIALIAS_BIT = 1, + INTER_ANTIALIAS_MASK = ((1 << INTER_ANTIALIAS_BIT) - 1) << INTER_ANTIALIAS_SHIFT, + INTER_ANTIALIAS = 1 << INTER_ANTIALIAS_SHIFT, +}; + /** \brief Specify the polar mapping mode @sa warpPolar */ @@ -288,11 +337,11 @@ enum WarpPolarMode }; enum InterpolationMasks { - INTER_BITS = 5, - INTER_BITS2 = INTER_BITS * 2, - INTER_TAB_SIZE = 1 << INTER_BITS, - INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE - }; + INTER_BITS = 5, + INTER_BITS2 = INTER_BITS * 2, + INTER_TAB_SIZE = 1 << INTER_BITS, + INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE +}; //! @} imgproc_transform @@ -2418,6 +2467,34 @@ CV_EXPORTS_W void resize( InputArray src, OutputArray dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR ); +/** @brief onnx resize op +https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize +https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py + +Not support `exclude_outside` and `extrapolation_value` yet. + +To get a similar result to resize, give dsize and: + INTER_NEAREST : ASYMMETRIC + NEAREST_FLOOR + INTER_LINEAR : HALF_PIXEL + INTER_CUBIC : HALF_PIXEL + cubicCoeff(-0.75) + +@param src input image. +@param dst output image; it has the size dsize (when it is non-zero) or the size computed from src.size(), scale; the type of dst is the same as of src. +@param dsize output image size; if it equals to zero, it is computed as: + \f[\texttt{dsize = Size(int(scale.x * src.cols), int(scale.y * src.rows))}\f] + Either dsize or scale must be non-zero. +@param scale scale factor; use same definition as ONNX, if scale > 1, it's upsampling. +@param interpolation interpolation / coordiante, see #InterpolationFlags and #ResizeONNXFlags +@param cubicCoeff cubic sampling coeff; range \f[[-1.0, 0)\f] +@param roi crop region; if provided, the rois' coordinates are normalized in the coordinate system of the input image; it only takes effect with INTER_TF_CROP_RESIZE (ONNX tf_crop_and_resize) + +@sa resize + */ +CV_EXPORTS_W void resizeOnnx( + InputArray src, OutputArray dst, Size dsize, Point2d scale = Point2d(), + int interpolation = INTER_LINEAR | INTER_HALF_PIXEL, + float cubicCoeff = -0.75f, Rect2d const& roi = Rect2d()); + /** @brief Applies an affine transformation to an image. The function warpAffine transforms the source image using the specified matrix: diff --git a/modules/imgproc/src/opencl/resize_onnx.cl b/modules/imgproc/src/opencl/resize_onnx.cl new file mode 100644 index 0000000000..03a64e2bc1 --- /dev/null +++ b/modules/imgproc/src/opencl/resize_onnx.cl @@ -0,0 +1,360 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifdef DOUBLE_SUPPORT +# ifdef cl_amd_fp64 +# pragma OPENCL EXTENSION cl_amd_fp64:enable +# elif defined (cl_khr_fp64) +# pragma OPENCL EXTENSION cl_khr_fp64:enable +# endif +#endif + +#define noconvert(x) (x) + +#ifndef T +# define INTER_NEAREST1 +# define INTER_LINEAR1 +# define INTER_CUBIC +# define INTER_ANTIALIAS1 +# define T int +# define W double +# define CN 3 +# define PIXEL_SIZE 12 +# define VT int3 +# define VW double3 +# define TO_WORK convert_double +# define TO_VEC_WORK convert_double3 +# define TO_TYPE convert_int_sat_rte +# define TO_VEC_TYPE convert_int3_sat_rte +#endif + +// use parameter `channel' to reduce the number of kernels +#if CN != 3 +# define loadpix(addr) *(__global const VT*)(addr) +# define storepix(val, addr) *(__global VT*)(addr) = val +#else +# define loadpix(addr) vload3(0, (__global const T*)(addr)) +# define storepix(val, addr) vstore3(val, 0, (__global T*)(addr)) +#endif + +#if defined(INTER_NEAREST) + +__kernel void resizeOnnx_nearest( + __global uchar const* srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + int pixel_size, float offset, float m00, float m01, float m10, float m11) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + if (dx < dst_cols && dy < dst_rows) + { + float fx = fma(dx, m00 , m01), fy = fma(dy, m10, m11); + +#if defined(INTER_NEAREST_PREFER_FLOOR) || defined(INTER_NEAREST_CEIL) + // x, y will >= 0, so `round toward positive infinity' is equivalent to ceil + int sx = convert_int_rtp(fx + offset); + int sy = convert_int_rtp(fy + offset); +#else + // x, y will >= 0, so `round toward negative infinity' is equivalent to floor + int sx = convert_int_rtn(fx + offset); + int sy = convert_int_rtn(fy + offset); +#endif + sx = clamp(sx, 0, src_cols - 1); + sy = clamp(sy, 0, src_rows - 1); + // maybe step >= 8M, so do not use `mad24' for y + __global uchar const* S = srcptr + (sy * src_step + mad24(sx, pixel_size, src_offset)); + __global uchar * D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); + +#if PIXEL_SIZE == 1 + *D = *S; +#elif PIXEL_SIZE == 2 || PIXEL_SIZE == 4 || PIXEL_SIZE == 8 || PIXEL_SIZE == 16 + *(__global VT*)(D) = *(__global const VT*)(S); +#elif PIXEL_SIZE == 3 + vstore3(vload3(0, S), 0, D); +#elif PIXEL_SIZE == 6 + vstore3(vload3(0, (__global ushort const*)(S)), 0, (__global ushort*)(D)); +#elif PIXEL_SIZE == 12 + vstore3(vload3(0, (__global const uint*)(S)), 0, (__global uint*)(D)); +#elif PIXEL_SIZE == 24 + vstore3(vload3(0, (__global ulong const*)(S)), 0, (__global ulong*)(D)); +#elif PIXEL_SIZE == 32 + *(__global uint8*)(D) = *(__global uint8 const*)(S); +#elif PIXEL_SIZE == 64 + *(__global uint16*)(D) = *(__global uint16 const*)(S); +#elif PIXEL_SIZE == 128 + *(__global ulong16*)(D) = *(__global ulong16 const*)(S); +#else + for (int i = 0; i < pixel_size; ++i) + D[i] = S[i]; +#endif + } +} + +#elif defined(INTER_LINEAR) && !defined(INTER_ANTIALIAS) + +__kernel void resizeOnnx_linear( + __global uchar const* srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + int pixel_size, int channel, float m00, float m01, float m10, float m11) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + if (dx < dst_cols && dy < dst_rows) + { + float fx = fma(dx, m00, m01), fy = fma(dy, m10, m11); + int ix = convert_int_rtn(fx), iy = convert_int_rtn(fy); + float u1 = fx - ix, v1 = fy - iy; + float u0 = 1.f - u1, v0 = 1.f - v1; + int x0 = max(ix, 0); + int y0 = max(iy, 0); + int x1 = min(ix + 1, src_cols - 1); + int y1 = min(iy + 1, src_rows - 1); + __global uchar const* S0 = srcptr + (y0 * src_step + mad24(x0, pixel_size, src_offset)); + __global uchar const* S1 = srcptr + (y0 * src_step + mad24(x1, pixel_size, src_offset)); + __global uchar const* S2 = srcptr + (y1 * src_step + mad24(x0, pixel_size, src_offset)); + __global uchar const* S3 = srcptr + (y1 * src_step + mad24(x1, pixel_size, src_offset)); + __global uchar * D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 + VW s0 = TO_VEC_WORK(loadpix(S0)); VW s1 = TO_VEC_WORK(loadpix(S1)); + VW s2 = TO_VEC_WORK(loadpix(S2)); VW s3 = TO_VEC_WORK(loadpix(S3)); + VT d0 = TO_VEC_TYPE((u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3); + storepix(d0, D); +#else + for (int i = 0; i < channel; ++i) + { + W s0 = TO_WORK(((__global T const*)(S0))[i]); + W s1 = TO_WORK(((__global T const*)(S1))[i]); + W s2 = TO_WORK(((__global T const*)(S2))[i]); + W s3 = TO_WORK(((__global T const*)(S3))[i]); + W d0 = (u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3; + ((__global T*)(D))[i] = TO_TYPE(d0); + } +#endif + } +} + +#elif defined(INTER_LINEAR) && defined(INTER_ANTIALIAS) + +__kernel void resizeOnnx_linear_antialias( + __global uchar const* srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + int pixel_size, int channel, float m00, float m01, float m10, float m11, + float xscale, float yscale) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + if (dx < dst_cols && dy < dst_rows) + { + int xstart = convert_int_rtn(-1.f / xscale) + 1; + int xend = 2 - xstart; + int ystart = convert_int_rtn(-1.f / yscale) + 1; + int yend = 2 - ystart; + float fx = fma(dx, m00, m01), fy = fma(dy, m10, m11); + int ix = convert_int_rtn(fx), iy = convert_int_rtn(fy); + float rx = fx - ix, ry = fy - iy; + __global uchar* D = dstptr + dy * dst_step + mad24(dx, pixel_size, dst_offset); +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 + VW sumval = (VW)(0); + float weight = 0; + for (int h = ystart; h < yend; ++h) + { + VW sline = (VW)(0); + float wline = 0; + int sy = clamp(iy + h, 0, src_rows - 1); + __global uchar const* S = srcptr + sy * src_step + src_offset; + for (int w = xstart; w < xend; ++w) + { + // the computation of linear's weights is trival, so do it in kernel + int sx = clamp(ix + w, 0, src_cols - 1); + float t = fabs(w - rx) * xscale; + t = clamp(1.f - t, 0.f, 1.f); + wline += t; + sline += t * TO_VEC_WORK(loadpix(S + sx * pixel_size)); + } + float u = fabs(h - ry) * yscale; + u = clamp(1.f - u, 0.f, 1.f); + weight += u * wline; + sumval += u * sline; + } + VT d0 = TO_VEC_TYPE(sumval / weight); + storepix(d0, D); +#else + W sumval = 0; + float weight = 0; + for (int h = ystart; h < yend; ++h) + { + W sline = 0; + float wline = 0; + int sy = clamp(iy + h, 0, src_rows - 1); + __global uchar const* S = srcptr + sy * src_step + src_offset; + for (int w = xstart; w < xend; ++w) + { + int sx = clamp(ix + w, 0, src_cols - 1); + float t = fabs(w - rx) * xscale; + t = clamp(1.f - t, 0.f, 1.f); + wline += t; + sline += t * TO_WORK(((__global T const*)(S + sx * pixel_size))[0]); + } + float u = fabs(h - ry) * yscale; + u = clamp(1.f - u, 0.f, 1.f); + weight += u * wline; + sumval += u * sline; + } + ((__global T*)(D))[0] = TO_TYPE(sumval / weight); + + for (int i = 1; i < channel; ++i) + { + sumval = 0; + for (int h = ystart; h < yend; ++h) + { + W sline = 0; + int sy = clamp(iy + h, 0, src_rows - 1); + __global uchar const* S = srcptr + sy * src_step + src_offset; + for (int w = xstart; w < xend; ++w) + { + int sx = clamp(ix + w, 0, src_cols - 1); + float t = fabs(w - rx) * xscale; + t = clamp(1.f - t, 0.f, 1.f); + sline += t * TO_WORK(((__global T const*)(S + sx * pixel_size))[i]); + } + float u = fabs(h - ry) * yscale; + u = clamp(1.f - u, 0.f, 1.f); + sumval += u * sline; + } + ((__global T*)(D))[i] = TO_TYPE(sumval / weight); + } +#endif + } +} + + +#elif defined(INTER_CUBIC) && !defined(INTER_ANTIALIAS) + +float cubicCoeff(float A, float A2, float A3, float x) +{ + x = fabs(x); + if (x <= 1) + x = (A2 * x - A3) * x * x + 1; + else if (x <= 2) + x = A * (((x - 5) * x + 8) * x - 4); + else + x = 0; + return x; +} + +__kernel void resizeOnnx_cubic( + __global uchar const* srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + int pixel_size, int channel, float m00, float m01, float m10, float m11, float A) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + float A2 = A + 2, A3 = A + 3; + if (dx < dst_cols && dy < dst_rows) + { + float fx = fma(dx, m00, m01), fy = fma(dy, m10, m11); + int xstart = convert_int_rtn(fx) - 1; + int ystart = convert_int_rtn(fy) - 1; + int xlimit = xstart + 3; + int ylimit = ystart + 3; + int xoffset[4]; + float xcoeff[4]; + for (int x = xstart; x <= xlimit; ++x) + { + xoffset[x - xstart] = clamp(x, 0, src_cols - 1) * pixel_size; + xcoeff [x - xstart] = cubicCoeff(A, A2, A3, x - fx); + } + __global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 + VW sum = (VW)(0); + for (int y = ystart; y <= ylimit; ++y) + { + int yoffset = clamp(y, 0, src_rows - 1) * src_step + src_offset; + VW line = (VW)(0); + for (int x = 0; x < 4; ++x) + line += (VW)(xcoeff[x]) * TO_VEC_WORK(loadpix(srcptr + yoffset + xoffset[x])); + sum += line * (VW)(cubicCoeff(A, A2, A3, y - fy)); + } + storepix(TO_VEC_TYPE(sum), D); +#else + int yoffset[4]; + float ycoeff[4]; + for (int y = ystart; y <= ylimit; ++y) + { + yoffset[y - ystart] = clamp(y, 0, src_rows - 1) * src_step + src_offset; + ycoeff [y - ystart] = cubicCoeff(A, A2, A3, y - fy); + } + for (int i = 0; i < channel; ++i) + { + W sum = 0; + for (int y = 0; y < 4; ++y) + { + W line = 0; + for (int x = 0; x < 4; ++x) + line += xcoeff[x] * TO_WORK(((__global T const*) + (srcptr + yoffset[y] + xoffset[x]))[i]); + sum += line * ycoeff[y]; + } + ((__global T*)(D))[i] = TO_TYPE(sum); + } +#endif + } +} + +#elif defined(INTER_CUBIC) && defined(INTER_ANTIALIAS) + +// the computation of cubic's weight is heavy(?), so do it outside +// maybe it is also ok for linear antialias resize? +__kernel void resizeOnnx_table( + __global uchar const* srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + int pixel_size, int channel, int xkanti, int ykanti, __global int const* table) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + if (dx < dst_cols && dy < dst_rows) + { + int xstride = xkanti * dst_cols; + int ystride = ykanti * dst_rows; + __global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); + __global int const* xoffset = table; + __global int const* yoffset = xoffset + xstride; + __global float const* xcoeff = (__global float const*)(yoffset + ystride); + __global float const* ycoeff = (__global float const*)(xcoeff + xstride); +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 + VW sum = (VW)(0); + // exact ykanti / xkanti loops + for (int y = dy; y < ystride; y += dst_rows) + { + // offset is already clamped. xoffset is given by uchar + __global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); + VW line = (VW)(0); + for (int x = dx; x < xstride; x += dst_cols) + line += xcoeff[x] * TO_VEC_WORK(loadpix(S + xoffset[x])); + sum += line * ycoeff[y]; + } + storepix(TO_VEC_TYPE(sum), D); +#else + for (int i = 0; i < channel; ++i) + { + W sum = 0; + for (int y = dy; y < ystride; y += dst_rows) + { + __global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); + W line = 0; + for (int x = dx; x < xstride; x += dst_cols) + line += xcoeff[x] * TO_WORK(((__global T const*)(S + xoffset[x]))[i]); + sum += line * ycoeff[y]; + } + ((__global T*)(D))[i] = TO_TYPE(sum); + } +#endif + } +} + +#else + +#error "empty kernel" + +#endif diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 7e45f1e0f4..9fa892de98 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -946,6 +946,60 @@ static inline void interpolateLanczos4( float x, float* coeffs ) coeffs[i] *= sum; } +/** + * the coordiante transformation from dst to src is linear + * and can be written as: x_org = f(x) = a * x + b. + * note: scale may be user input and not equal to (src / dst). + * ref to onnx, length_resized is src * scale (float), not dst (int). + */ +static Vec2f interCoordinate(int coordinate, int dst, int src, double scale, double start, double end) +{ + float a, b; + if (coordinate == INTER_HALF_PIXEL + || coordinate == INTER_HALF_PIXEL_SYMMETRIC + || coordinate == INTER_HALF_PIXEL_PYTORCH) + { + a = static_cast(1.0 / scale); + b = static_cast(0.5 / scale - 0.5); + if (coordinate == INTER_HALF_PIXEL_SYMMETRIC) + b += static_cast(0.5 * (src - dst / scale)); + if (coordinate == INTER_HALF_PIXEL_PYTORCH && dst <= 1) + { + a = 0.f; + b = -0.5f; + } + } + else if (coordinate == INTER_ALIGN_CORNERS) + { + a = static_cast((src - 1.0) / (src * scale - 1.0)); + b = 0.f; + } + else if (coordinate == INTER_ASYMMETRIC) + { + a = static_cast(1.0 / scale); + b = 0.f; + } + else if (coordinate == INTER_TF_CROP_RESIZE) + { + CV_CheckGE(start, 0.0, "roi's start is out of image"); + CV_CheckLE(end , 1.0, "roi's end is out of image"); + CV_CheckLT(start, end, "roi's start must be less than its end"); + if (dst <= 1) + { + a = 0.f; + b = static_cast(0.5 * (start + end) * (src - 1.0)); + } + else + { + a = static_cast((end - start) * (src - 1.0) / (src * scale - 1.0)); + b = static_cast(start * (src - 1.0)); + } + } + else + CV_Error(Error::StsBadArg, format("Unknown coordinate transformation mode %d", coordinate)); + return Vec2f(a, b); +} + template struct Cast { typedef ST type1; @@ -1231,6 +1285,128 @@ static void resizeNN_bitexact( const Mat& src, Mat& dst, double /*fx*/, double / parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } +class ResizeOnnxNNInvoker : public ParallelLoopBody +{ + Mat src; + Mat& dst; + Matx22f M; + int mode; + float offset; + AutoBuffer x_ofs; + ResizeOnnxNNInvoker(const ResizeOnnxNNInvoker&); + ResizeOnnxNNInvoker& operator=(const ResizeOnnxNNInvoker&); + + int srcIndex(int x, float a, float b) const + { + // offset can not add to M(0, 1) and M(1, 1) directly + // due to the small float error near integer + float f = fmaf(static_cast(x), a, b); + if (mode == INTER_NEAREST_PREFER_FLOOR || + mode == INTER_NEAREST_CEIL) + x = cvCeil(f + offset); + else + x = cvFloor(f + offset); + return x; + } + +public: + ResizeOnnxNNInvoker(Mat const& _src, Mat& _dst, const Matx22f& _M, int _mode) + : src(_src), dst(_dst), M(_M), mode(_mode) + { + offset = 0.f; + if (mode == INTER_NEAREST_PREFER_FLOOR) + offset = -0.5f; + if (mode == INTER_NEAREST_PREFER_CEIL) + offset = +0.5f; + + x_ofs.allocate(dst.cols); + size_t pix_size = src.elemSize(); + for (int x = 0; x < dst.cols; ++x) + { + int sx = srcIndex(x, M(0, 0), M(0, 1)); + sx = min(max(sx, 0), src.cols - 1); + x_ofs[x] = sx * pix_size; + } + } + + virtual void operator() (const Range& range) const CV_OVERRIDE + { + int width = dst.cols; + size_t pix_size = src.elemSize(); + for (int y = range.start; y < range.end; ++y) + { + uchar* D = dst.ptr(y); + int sy = srcIndex(y, M(1, 0), M(1, 1)); + sy = min(max(sy, 0), src.rows - 1); + uchar const* S = src.ptr(sy); + int x = 0; + + switch (pix_size) + { + case 1: + for (; x <= width - 2; x += 2) + { + uchar t0 = S[x_ofs[x ]]; + uchar t1 = S[x_ofs[x + 1]]; + D[x ] = t0; + D[x + 1] = t1; + } + for (; x < width; ++x) + D[x] = S[x_ofs[x]]; + break; + case 2: + for (; x < width; ++x) + reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); + break; + case 3: + for (; x < width; ++x, D += 3) + { + const uchar* _tS = S + x_ofs[x]; + D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2]; + } + break; + case 4: + for (; x < width; ++x) + reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); + break; + case 6: + for (; x < width; ++x, D += 6) + { + short const* _tS = reinterpret_cast(S + x_ofs[x]); + short* _tD = reinterpret_cast(D); + _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2]; + } + break; + case 8: + for (; x < width; ++x) + reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); + break; + case 12: + for (; x < width; ++x, D += 12) + { + int const* _tS = reinterpret_cast(S + x_ofs[x]); + int* _tD = reinterpret_cast(D); + _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2]; + } + break; +#if CV_SIMD128 + case 16: + for (; x < width; ++x, D += 16) + v_store(D, v_load(S + x_ofs[x])); + break; +#endif + default: + for (; x < width; ++x, D += pix_size) + { + uchar const* _tS = S + x_ofs[x]; + for (size_t k = 0; k < pix_size; ++k) + D[k] = _tS[k]; + } + } + } + } +}; + struct VResizeNoVec { template @@ -2924,16 +3100,15 @@ public: int _scale_x, int _scale_y, const int* _ofs, const int* _xofs) : ParallelLoopBody(), src(_src), dst(_dst), scale_x(_scale_x), scale_y(_scale_y), ofs(_ofs), xofs(_xofs) - { - } + {} virtual void operator() (const Range& range) const CV_OVERRIDE { Size ssize = src.size(), dsize = dst.size(); int cn = src.channels(); - int area = scale_x*scale_y; - float scale = 1.f/(area); - int dwidth1 = (ssize.width/scale_x)*cn; + int area = scale_x * scale_y; + float scale = 1.f / area; + int dwidth1 = ssize.width / scale_x * cn; dsize.width *= cn; ssize.width *= cn; int dy, dx, k = 0; @@ -2989,8 +3164,9 @@ public: count++; } } - - D[dx] = saturate_cast((float)sum/count); + // sum maybe double, converting it to float will decrease precision + // when count < 2^23, converting it to float is fine + D[dx] = saturate_cast(sum / static_cast(count)); } } } @@ -3260,6 +3436,561 @@ static void resizeArea_( const Mat& src, Mat& dst, } +class ResizeOnnxCtrl +{ + utils::BufferArea area; + +public: + struct TabIdx + { + int si, di; // index on src / dst by elem1 + union { float f; double d; }; // coefficient / weight + + void as(float& v) { v = f; } + void as(double& v) { v = d; } + }; + + /* resize parameter */ + bool is_fixpt, is_double; + int ksize, xkanti, ykanti; + + /* for antialias resize */ + TabIdx* xtab; + TabIdx* ytab; + /* for generic resize */ + int* xofs; + int* yofs; + double* xcoeffs; + double* ycoeffs; + int xmin, xmax; + +private: + void cubic_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, float A, TabIdx* elem) + { + scale = min(scale, 1.f); + int index = cvFloor(srcpos); + float ratio = srcpos - index; + int start = cvFloor(-2.f / scale) + 1; + int end = 2 - start; + int len = end - start; + float sum = 0; + for (int i = start; i < end; ++i) + { + float x = fabsf(i - ratio) * scale; + if (x <= 1) + x = ((A + 2) * x - (A + 3)) * x * x + 1; + else if (x <= 2) + x = A * (((x - 5) * x + 8) * x - 4); + else + x = 0; + elem[i - start].di = cn * dstlen; + elem[i - start].si = cn * min(max(index + i, 0), srclen - 1); + elem[i - start].f = x; + sum += x; + } + for (int i = 0; i < len; ++i) + { + if (is_double) + elem[i].d = elem[i].f / sum; + else + elem[i].f = elem[i].f / sum; + } + } + + void cubic_coeffs(float x, float A, float* coeffs) + { + coeffs[0] = A * ((((x + 1) - 5) * (x + 1) + 8) * (x + 1) - 4); + coeffs[1] = ((A + 2) * x - (A + 3)) * x * x + 1; + coeffs[2] = ((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1; + coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2]; + } + + void linear_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, TabIdx* elem) + { + scale = min(scale, 1.f); + int index = cvFloor(srcpos); + float ratio = srcpos - index; + int start = cvFloor(-1.f / scale) + 1; + int end = 2 - start; + int len = end - start; + float sum = 0.f; + for (int i = start; i < end; ++i) + { + float x = fabsf(i - ratio) * scale; + x = min(max(1.f - x, 0.f), 1.f); + elem[i - start].di = cn * dstlen; + elem[i - start].si = cn * min(max(index + i, 0), srclen - 1); + elem[i - start].f = x; + sum += x; + } + for (int i = 0; i < len; ++i) + { + if (is_double) + elem[i].d = elem[i].f / sum; + else + elem[i].f = elem[i].f / sum; + } + } + + void linear_coeffs(float x, float* coeffs) + { + coeffs[0] = 1.f - x; + coeffs[1] = x; + } + + public: + ResizeOnnxCtrl(int interpolation, int type, float cubicCoeff, + Size ssize, Size dsize, Point2d const& scaled, Matx22f const& M) + { + int sampler = interpolation & INTER_SAMPLER_MASK; + int antialias = interpolation & INTER_ANTIALIAS_MASK; + Point2f scale = static_cast(scaled); + CV_CheckGE(cubicCoeff, -1.f, "cubic coefficient should range [-1, 0)"); + CV_CheckLT(cubicCoeff, +0.f, "cubic coefficient should range [-1, 0)"); + CV_Check(sampler, sampler == INTER_LINEAR || sampler == INTER_CUBIC, + "should not error"); + + int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); + ksize = (sampler == INTER_LINEAR ? 2 : 4); + is_double = (depth == CV_64F); + is_fixpt = (depth == CV_8U || depth == CV_8S); + is_double = (depth == CV_32S || depth == CV_64F); + xtab = ytab = nullptr; + xofs = yofs = nullptr; + xcoeffs = ycoeffs = nullptr; + int khalf = ksize / 2; + xkanti = 2 * cvCeil(khalf / min(scale.x, 1.f)); + ykanti = 2 * cvCeil(khalf / min(scale.y, 1.f)); + area.allocate(xtab, xkanti * dsize.width ); + area.allocate(ytab, ykanti * dsize.height); + area.allocate(xofs, dsize.width * cn + 1); + area.allocate(yofs, dsize.height * 1 + 1); + area.allocate(xcoeffs, ksize * dsize.width * cn); + area.allocate(ycoeffs, ksize * dsize.height * 1); + area.commit(); + CV_CheckLE(ksize, MAX_ESIZE, "resampler kernel's size is too larger"); + + if (antialias) + { + float a = M(0, 0), b = M(0, 1); + for (int d = 0; d < dsize.width; ++d) + { + float f = fmaf(static_cast(d), a, b); + if (sampler == INTER_LINEAR) + linear_coeffs_antialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti); + else // if (sampler == INTER_CUBIC) + cubic_coeffs_antialias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti); + } + } + else + { + xkanti = 0; + xmin = 0; + xmax = dsize.width; + float cbuf[MAX_ESIZE]; + float a = M(0, 0), b = M(0, 1); + for (int d = 0; d < dsize.width; ++d) + { + float f = fmaf(static_cast(d), a, b); + int s = cvFloor(f); + f -= s; + if (s < khalf - 1) { + xmin = d + 1; + if (s < 0 && sampler == INTER_LINEAR) + f = 0, s = 0; + } + if (s + khalf >= ssize.width) + { + xmax = min(xmax, d); + if (s >= ssize.width - 1 && sampler == INTER_LINEAR) + f = 0, s = ssize.width - 1; + } + for (int k = 0; k < cn; ++k) + xofs[cn * d + k] = cn * s + k; + if (sampler == INTER_LINEAR) + linear_coeffs(f, cbuf); + else // if (sampler == INTER_CUBIC) + cubic_coeffs(f, cubicCoeff, cbuf); + if (is_fixpt) + { + short* coeffs = reinterpret_cast(xcoeffs) + cn * ksize * d; + for (int k = 0; k < ksize; ++k) + coeffs[k] = saturate_cast(cbuf[k] * INTER_RESIZE_COEF_SCALE); + for (int k = ksize; k < cn * ksize; ++k) + coeffs[k] = coeffs[k - ksize]; + } + else if (is_double) + { + double* coeffs = xcoeffs + cn * ksize * d; + for (int k = 0; k < ksize; ++k) + coeffs[k] = cbuf[k]; + for (int k = ksize; k < cn * ksize; ++k) + coeffs[k] = coeffs[k - ksize]; + } + else + { + float* coeffs = reinterpret_cast(xcoeffs) + cn * ksize * d; + for (int k = 0; k < ksize; ++k) + coeffs[k] = cbuf[k]; + for (int k = ksize; k < cn * ksize; ++k) + coeffs[k] = coeffs[k - ksize]; + } + } + } + + if (antialias) + { + float a = M(1, 0), b = M(1, 1); + for (int d = 0; d < dsize.height; ++d) + { + float f = fmaf(static_cast(d), a, b); + if (sampler == INTER_LINEAR) + linear_coeffs_antialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti); + else // if (sampler == INTER_CUBIC) + cubic_coeffs_antialias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti); + } + } + else + { + ykanti = 0; + float cbuf[MAX_ESIZE]; + float a = M(1, 0), b = M(1, 1); + for (int d = 0; d < dsize.height; ++d) + { + float f = fmaf(static_cast(d), a, b); + int s = cvFloor(f); + f -= s; + yofs[d] = s; + if (sampler == INTER_LINEAR) + linear_coeffs(f, cbuf); + else // if (sampler == INTER_CUBIC) + cubic_coeffs(f, cubicCoeff, cbuf); + if (is_fixpt) + { + short* coeffs = reinterpret_cast(ycoeffs) + 1 * ksize * d; + for (int k = 0; k < ksize; ++k) + coeffs[k] = saturate_cast(cbuf[k] * INTER_RESIZE_COEF_SCALE); + } + else if (is_double) + { + double* coeffs = ycoeffs + 1 * ksize * d; + for (int k = 0; k < ksize; ++k) + coeffs[k] = cbuf[k]; + } + else + { + float* coeffs = reinterpret_cast(ycoeffs) + 1 * ksize * d; + for (int k = 0; k < ksize; ++k) + coeffs[k] = cbuf[k]; + } + } + } + } +}; + +template +class ResizeOnnxInvoker : public ParallelLoopBody +{ + Mat const& src; + Mat& dst; + ResizeOnnxCtrl const& ctrl; + HResize hresize; + VResize vresize; + + ResizeOnnxInvoker& operator =(ResizeOnnxInvoker const&); + +public: + typedef typename HResize::value_type T; + typedef typename HResize::buf_type WT; + typedef typename HResize::alpha_type AT; + + ResizeOnnxInvoker(const Mat& _src, Mat& _dst, ResizeOnnxCtrl const& _ctrl) : + src(_src), dst(_dst), ctrl(_ctrl) + { + CV_CheckLE(ctrl.ksize, MAX_ESIZE, "resampler kernel's size is too larger"); + CV_Check(ctrl.is_fixpt, !(ctrl.is_fixpt && ctrl.is_double), "can not be both types"); + // prefer static_assert, but how ? +#ifdef CV_CXX11 + // check generic resize + if (ctrl.is_fixpt) + { + CV_Check(ctrl.is_fixpt, (std::is_same::value), + "when use fixpt / short coeffs, AT is expected to be short"); + CV_Check(sizeof(T) * 10 + sizeof(WT), + (std::is_same::value + && (std::is_same::value || std::is_same::value)), + "something wrong"); + } + else if (ctrl.is_double) + { + CV_Check(ctrl.is_double, (std::is_same::value), + "when use double coeffs, AT is expected to be double"); + CV_Check(sizeof(T) * 10 + sizeof(WT), + (std::is_same::value && + (std::is_same::value || std::is_same::value)), + "something wrong"); + } + else + { + CV_Check(sizeof(AT), (std::is_same::value), + "when use float coeffs, AT is expected to be short"); + CV_Check(sizeof(T) * 10 + sizeof(WT), + (std::is_same::value + && (std::is_same::value || std::is_same::value + || std::is_same::value)), + "something wrong"); + } + // check antialias resize + if (ctrl.is_double) + { + CV_Check(ctrl.is_double, (std::is_same::value), + "when use double coeffs, AT is expected to be double"); + } + else + { + CV_Check(ctrl.is_double, (std::is_same::value), + "when use double coeffs, AT is expected to be double"); + } + CV_Check(sizeof(IdxT) * 10 + sizeof(WT), + (std::is_same::type>::value), + "something wrong"); +#endif + } + + void hori_antialias_accumulate(T const* S, IdxT* L) const + { + IdxT alpha; + int const cn = dst.channels(); + int const len = ctrl.xkanti * dst.cols; + if (cn == 1) + for (int k = 0; k < len; ++k) + { + int di = ctrl.xtab[k].di; + int si = ctrl.xtab[k].si; + ctrl.xtab[k].as(alpha); + L[di] += S[si] * alpha; + } + else if (cn == 2) + for (int k = 0; k < len; ++k) + { + int di = ctrl.xtab[k].di; + int si = ctrl.xtab[k].si; + ctrl.xtab[k].as(alpha); + L[di ] += S[si ] * alpha; + L[di + 1] += S[si + 1] * alpha; + } + else if (cn == 3) + for (int k = 0; k < len; ++k) + { + int di = ctrl.xtab[k].di; + int si = ctrl.xtab[k].si; + ctrl.xtab[k].as(alpha); + L[di ] += S[si ] * alpha; + L[di + 1] += S[si + 1] * alpha; + L[di + 2] += S[si + 2] * alpha; + } + else if (cn == 4) + for (int k = 0; k < len; ++k) + { + int di = ctrl.xtab[k].di; + int si = ctrl.xtab[k].si; + ctrl.xtab[k].as(alpha); + L[di ] += S[si ] * alpha; + L[di + 1] += S[si + 1] * alpha; + L[di + 2] += S[si + 2] * alpha; + L[di + 3] += S[si + 3] * alpha; + } + else + for (int k = 0; k < len; ++k) + { + int di = ctrl.xtab[k].di; + int si = ctrl.xtab[k].si; + ctrl.xtab[k].as(alpha); + for (int c = 0; c < cn; ++c) + L[di + c] += S[si + c] * alpha; + } + } + + void hori_antialias_lines(T const** srcptr, WT** dstptr, IdxT* L, int count) const + { + int cn = dst.channels(); + int dwidth = dst.cols * cn; +#ifdef CV_CXX11 + constexpr bool same_wt_idxt = std::is_same::value; +#else + bool const same_wt_idxt = false; +#endif + for (int i = 0; i < count; ++i) + { + T const* S = srcptr[i]; + // reinterpret_cast makes compiler happy + if (same_wt_idxt) + L = reinterpret_cast(dstptr[i]); + memset(L, 0, sizeof(IdxT) * dwidth); + hori_antialias_accumulate(S, L); + if (!same_wt_idxt) + { + WT* D = dstptr[i]; + if (ctrl.is_fixpt) + { + float const alpha = INTER_RESIZE_COEF_SCALE; + for (int k = 0; k < dwidth; ++k) + D[k] = saturate_cast(L[k] * alpha); + } + else + { + for (int k = 0; k < dwidth; ++k) + D[k] = saturate_cast(L[k]); + } + } + } + } + + void hori_generic_lines(T const** srcptr, WT** dstptr, int count) const + { + int cn = src.channels(); + int ssize = src.cols * cn; + int dsize = dst.cols * cn; + int xmin = ctrl.xmin * cn; + int xmax = ctrl.xmax * cn; + // just call hresize + hresize(srcptr, dstptr, count, + ctrl.xofs, reinterpret_cast(ctrl.xcoeffs), + ssize, dsize, cn, xmin, xmax); + } + + void vert_antialias_hori_antialias(int dy, IdxT* L, IdxT* A) const + { + // the start and end of ytab + int dwidth = dst.channels() * dst.cols; + int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti; + memset(A, 0, dwidth * sizeof(IdxT)); + for (int t = tstart; t < tend; ++t) + { + IdxT beta; + int sy = ctrl.ytab[t].si; + CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong"); + ctrl.ytab[t].as(beta); + memset(L, 0, dwidth * sizeof(IdxT)); + hori_antialias_accumulate(src.template ptr(sy), L); + for (int w = 0; w < dwidth; ++w) + A[w] += L[w] * beta; + } + T* D = dst.template ptr(dy); + for (int w = 0; w < dwidth; ++w) + D[w] = saturate_cast(A[w]); + } + + void vert_antialias_hori_generic(int dy, WT* L, IdxT* A) const + { + // FixedPtCast cast; + int dwidth = dst.channels() * dst.cols; + int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti; + memset(A, 0, dwidth * sizeof(IdxT)); + for (int t = tstart; t < tend; ++t) + { + IdxT beta; + int sy = ctrl.ytab[t].si; + CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong"); + ctrl.ytab[t].as(beta); + T const* S = src.template ptr(sy); + hori_generic_lines(&S, &L, 1); + if (ctrl.is_fixpt) + beta /= INTER_RESIZE_COEF_SCALE; + for (int w = 0; w < dwidth; ++w) + A[w] += L[w] * beta; + } + T* D = dst.template ptr(dy); + for (int w = 0; w < dwidth; ++w) + D[w] = saturate_cast(A[w]); + } + + void vert_antialias(Range const& range) const + { + int cn = dst.channels(); + int dwidth = dst.cols * cn; + AutoBuffer line(dwidth * 2); + IdxT* L = line.data(); + IdxT* A = line.data() + dwidth; + WT* Lw = reinterpret_cast(L); + for (int dy = range.start; dy < range.end; ++dy) + { + if (ctrl.xkanti) + vert_antialias_hori_antialias(dy, L, A); + else + vert_antialias_hori_generic(dy, Lw, A); + } + } + + void vert_generic(Range const& range) const + { + int ksize = ctrl.ksize, ksize2 = ksize / 2; + int cn = src.channels(); + int dwidth = dst.cols * cn; + size_t bufstep = alignSize(dwidth, CV_SIMD_WIDTH / sizeof(IdxT)); + AutoBuffer _buffer(bufstep * (ksize + 1)); + T const* srows[MAX_ESIZE] = {0}; + WT* rows[MAX_ESIZE] = {0}; + int prev_sy[MAX_ESIZE]; + IdxT* L = _buffer.data() + bufstep * ksize; + for (int k = 0; k < ksize; ++k) + { + prev_sy[k] = -1; + rows[k] = reinterpret_cast(_buffer.data() + bufstep * k); + } + AT const* beta = reinterpret_cast(ctrl.ycoeffs) + ksize * range.start; + for (int dy = range.start; dy < range.end; ++dy, beta += ksize) + { + int sy0 = ctrl.yofs[dy], k0 = ksize, k1 = 0; + for(int k = 0; k < ksize; k++ ) + { + int sy = min(max(sy0 - ksize2 + 1 + k, 0), src.rows - 1); + for (k1 = max(k1, k); k1 < ksize; ++k1) + { + // if the sy-th row has been computed already, reuse it. + if (sy == prev_sy[k1]) + { + if (k1 > k) + memcpy(rows[k], rows[k1], bufstep * sizeof(WT)); + break; + } + } + // remember the first row that needs to be computed + if( k1 == ksize ) + k0 = min(k0, k); + srows[k] = src.template ptr(sy); + prev_sy[k] = sy; + } + + if (k0 < ksize) + { + if (ctrl.xkanti) + hori_antialias_lines(srows + k0, rows + k0, L, ksize - k0); + else + hori_generic_lines(srows + k0, rows + k0, ksize - k0); + } + vresize(const_cast(rows), dst.template ptr(dy), beta, dwidth); + } + } + + virtual void operator() (Range const& range) const CV_OVERRIDE + { + if (ctrl.ykanti) + vert_antialias(range); + else + vert_generic(range); + } +}; + +template +static void resizeOnnx_(Mat const& src, Mat& dst, ResizeOnnxCtrl const& ctrl) +{ + parallel_for_(Range(0, dst.rows), + ResizeOnnxInvoker(src, dst, ctrl), + static_cast(dst.rows) * dst.cols / (1 << 16)); +} + + typedef void (*ResizeFunc)( const Mat& src, Mat& dst, const int* xofs, const void* alpha, const int* yofs, const void* beta, @@ -3274,6 +4005,8 @@ typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst, const DecimateAlpha* ytab, int ytab_size, const int* yofs); +typedef void (*ResizeOnnxFunc)(Mat const& src, Mat& dst, ResizeOnnxCtrl const&); + static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab ) { @@ -3316,7 +4049,9 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec return k; } + #ifdef HAVE_OPENCL + static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab, float * const alpha_tab, int * const ofs_tab) { @@ -3569,6 +4304,255 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, return k.run(2, globalsize, 0, false); } +static void ocl_resizeOnnxTable(int srclen, int dstlen, int esz, + int sampler, float a, float b, float A, float scale, int* offset, float* coeff) +{ + // maybe want do linear resize in this way? + CV_Assert(sampler == INTER_LINEAR || sampler == INTER_CUBIC); + scale = min(scale, 1.f); + int start = cvFloor((sampler == INTER_LINEAR ? -1.f : -2.f) / scale) + 1; + int end = 2 - start; + int kanti = end - start; + for (int d = 0; d < dstlen; ++d) + { + float spos = fmaf(static_cast(d), a, b); + int index = cvFloor(spos); + float ratio = spos - index; + float sum = 0.f; + for (int i = start; i < end; ++i) + { + float x = fabs(i - ratio) * scale; + if (sampler == INTER_LINEAR) + x = min(max(x, 0.f), 1.f); + else + { + if (x <= 1) + x = ((A + 2) * x - (A + 3)) * x * x + 1; + else if (x <= 2) + x = A * (((x - 5) * x + 8) * x - 4); + else + x = 0; + } + // make work-item(s) in a work-group load offset / coeff in one / fewer memory transaction + // offsets & coeffs are arranged like + // 00 10 20 ... n0 + // 01 11 21 ... n1 ... + // 0(k-1) 1(k-1) 2(k-1) ... n(k-1) + int to = d + (i - start) * dstlen; + offset[to] = min(max(index + i, 0), srclen - 1) * esz; + coeff [to] = x; + sum += x; + } + for (int i = 0; i < kanti; ++i) + coeff[d + i * dstlen] /= sum; + } +} + +static char const* ocl_resizeOnnx_typeToString(int type, char* buf, size_t size) +{ + // typeToStr CV_Assert will failed + static char const* tab[CV_64F + 1] = + { "uchar", "char", "ushort", "short", "int", "float", "double" }; + int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + if (cn == 1) + return tab[depth]; + CV_Assert(size >= 18); + snprintf(buf, size, "%s%d", tab[depth], cn); + return buf; +} + +static char const* ocl_resizeOnnx_convertTypeString(int sdepth, int ddepth, int cn, char* buf, size_t size) +{ + if( sdepth == ddepth ) + return "noconvert"; + char dtype[32]; + const char *typestr = ocl_resizeOnnx_typeToString(CV_MAKETYPE(ddepth, cn), dtype, sizeof(dtype)); + if ((ddepth >= CV_32F) || + (ddepth == CV_32S && sdepth < CV_32S) || + (ddepth == CV_16S && sdepth <= CV_8S) || + (ddepth == CV_16U && sdepth == CV_8U)) + snprintf(buf, size, "convert_%s", typestr); + else if (sdepth >= CV_32F) + snprintf(buf, size, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : "")); + else + snprintf(buf, size, "convert_%s_sat", typestr); + return buf; +} + + +static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, + Matx22f const& M, Point2d const& scaled, int interpolation, float cubicCoeff) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + int sampler = interpolation & INTER_SAMPLER_MASK; + int nearest = interpolation & INTER_NEAREST_MODE_MASK; + int antialias = interpolation & INTER_ANTIALIAS_MASK; + Point2f scale = static_cast(scaled); + if (depth > CV_64F) + return false; + + ocl::Kernel k; + UMat src = _src.getUMat(), dst = _dst.getUMat(); + size_t globalsize[] = {static_cast(dst.cols), static_cast(dst.rows)}; + char buf[6][64]; + int pixel_size = static_cast(src.elemSize()); + int T = depth, VT = type; + String buildopts, errmsg; + + if (sampler == INTER_NEAREST) + { + int W = depth, VW = type; + float offset = (nearest == INTER_NEAREST_PREFER_FLOOR) ? -0.5f : + (nearest == INTER_NEAREST_PREFER_CEIL) ? 0.5f : 0.f; + static char const *nearest_name[4] = { + "INTER_NEAREST_PREFER_FLOOR", "INTER_NEAREST_PREFER_CEIL", + "INTER_NEAREST_FLOOR", "INTER_NEAREST_CEIL" + }; + buildopts = format( + "-D INTER_NEAREST -D %s " + "-D T=%s -D W=%s -D CN=%d -D PIXEL_SIZE=%d -D VT=%s -D VW=%s " + "-D TO_WORK=%s -D TO_VEC_WORK=%s -D TO_TYPE=%s -D TO_VEC_TYPE=%s ", + nearest_name[nearest >> INTER_NEAREST_MODE_SHIFT], + ocl_resizeOnnx_typeToString(T, nullptr, 0), + ocl_resizeOnnx_typeToString(W, nullptr, 0), + cn, pixel_size, + ocl_resizeOnnx_typeToString(VT, buf[0], sizeof(buf[0])), + ocl_resizeOnnx_typeToString(VW, buf[1], sizeof(buf[1])), + ocl_resizeOnnx_convertTypeString(T, W, 1, buf[2], sizeof(buf[2])), + ocl_resizeOnnx_convertTypeString(T, W, cn, buf[3], sizeof(buf[3])), + ocl_resizeOnnx_convertTypeString(W, T, 1, buf[4], sizeof(buf[4])), + ocl_resizeOnnx_convertTypeString(W, T, cn, buf[5], sizeof(buf[5])) + ); + k.create("resizeOnnx_nearest", ocl::imgproc::resize_onnx_oclsrc, + buildopts, &errmsg); + if (k.empty()) + return false; + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + pixel_size, offset, M(0, 0), M(0, 1), M(1, 0), M(1, 1)); + } + else if (sampler == INTER_LINEAR && !antialias) + { + int W = (T < CV_32S || T == CV_32F) ? CV_32F : CV_64F, VW = CV_MAKETYPE(W, cn); + buildopts = format( + "-D INTER_LINEAR " + "-D T=%s -D W=%s -D CN=%d -D PIXEL_SIZE=%d -D VT=%s -D VW=%s " + "-D TO_WORK=%s -D TO_VEC_WORK=%s -D TO_TYPE=%s -D TO_VEC_TYPE=%s ", + ocl_resizeOnnx_typeToString(T, nullptr, 0), + ocl_resizeOnnx_typeToString(W, nullptr, 0), + cn, pixel_size, + ocl_resizeOnnx_typeToString(VT, buf[0], sizeof(buf[0])), + ocl_resizeOnnx_typeToString(VW, buf[1], sizeof(buf[1])), + ocl_resizeOnnx_convertTypeString(T, W, 1, buf[2], sizeof(buf[2])), + ocl_resizeOnnx_convertTypeString(T, W, cn, buf[3], sizeof(buf[3])), + ocl_resizeOnnx_convertTypeString(W, T, 1, buf[4], sizeof(buf[4])), + ocl_resizeOnnx_convertTypeString(W, T, cn, buf[5], sizeof(buf[5])) + ); + k.create("resizeOnnx_linear", ocl::imgproc::resize_onnx_oclsrc, + buildopts, &errmsg); + if (k.empty()) + return false; + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + pixel_size, cn, M(0, 0), M(0, 1), M(1, 0), M(1, 1)); + } + else if (sampler == INTER_LINEAR && antialias) + { + int W = (T < CV_32S || T == CV_32F) ? CV_32F : CV_64F, VW = CV_MAKETYPE(W, cn); + buildopts = format( + "-D INTER_LINEAR -D INTER_ANTIALIAS " + "-D T=%s -D W=%s -D CN=%d -D PIXEL_SIZE=%d -D VT=%s -D VW=%s " + "-D TO_WORK=%s -D TO_VEC_WORK=%s -D TO_TYPE=%s -D TO_VEC_TYPE=%s ", + ocl_resizeOnnx_typeToString(T, nullptr, 0), + ocl_resizeOnnx_typeToString(W, nullptr, 0), + cn, pixel_size, + ocl_resizeOnnx_typeToString(VT, buf[0], sizeof(buf[0])), + ocl_resizeOnnx_typeToString(VW, buf[1], sizeof(buf[1])), + ocl_resizeOnnx_convertTypeString(T, W, 1, buf[2], sizeof(buf[2])), + ocl_resizeOnnx_convertTypeString(T, W, cn, buf[3], sizeof(buf[3])), + ocl_resizeOnnx_convertTypeString(W, T, 1, buf[4], sizeof(buf[4])), + ocl_resizeOnnx_convertTypeString(W, T, cn, buf[5], sizeof(buf[5])) + ); + k.create("resizeOnnx_linear_antialias", ocl::imgproc::resize_onnx_oclsrc, + buildopts, &errmsg); + if (k.empty()) + return false; + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + pixel_size, cn, M(0, 0), M(0, 1), M(1, 0), M(1, 1), + min(scale.x, 1.f), min(scale.y, 1.f)); + } + else if (sampler == INTER_CUBIC && !antialias) + { + int W = (T < CV_32S || T == CV_32F) ? CV_32F : CV_64F, VW = CV_MAKETYPE(W, cn); + buildopts = format( + "-D INTER_CUBIC " + "-D T=%s -D W=%s -D CN=%d -D PIXEL_SIZE=%d -D VT=%s -D VW=%s " + "-D TO_WORK=%s -D TO_VEC_WORK=%s -D TO_TYPE=%s -D TO_VEC_TYPE=%s ", + ocl_resizeOnnx_typeToString(T, nullptr, 0), + ocl_resizeOnnx_typeToString(W, nullptr, 0), + cn, pixel_size, + ocl_resizeOnnx_typeToString(VT, buf[0], sizeof(buf[0])), + ocl_resizeOnnx_typeToString(VW, buf[1], sizeof(buf[1])), + ocl_resizeOnnx_convertTypeString(T, W, 1, buf[2], sizeof(buf[2])), + ocl_resizeOnnx_convertTypeString(T, W, cn, buf[3], sizeof(buf[3])), + ocl_resizeOnnx_convertTypeString(W, T, 1, buf[4], sizeof(buf[4])), + ocl_resizeOnnx_convertTypeString(W, T, cn, buf[5], sizeof(buf[5])) + ); + k.create("resizeOnnx_cubic", ocl::imgproc::resize_onnx_oclsrc, + buildopts, &errmsg); + if (k.empty()) + return false; + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + pixel_size, cn, M(0, 0), M(0, 1), M(1, 0), M(1, 1), cubicCoeff); + } + else if (sampler == INTER_CUBIC && antialias) + { + int ksize = 4; + int khalf = ksize / 2; + int xkanti = 2 * cvCeil(khalf / min(scale.x, 1.f)); + int ykanti = 2 * cvCeil(khalf / min(scale.y, 1.f)); + int xstride = xkanti * dst.cols; + int ystride = ykanti * dst.rows; + int tabsize = (xstride + ystride) * 2; + AutoBuffer table(tabsize); + int* xoffset = table.data(); + int* yoffset = xoffset + xstride; + float* xcoeff = reinterpret_cast(yoffset + ystride); + float* ycoeff = reinterpret_cast(xcoeff + xstride); + ocl_resizeOnnxTable(src.cols, dst.cols, pixel_size, + sampler, M(0, 0), M(0, 1), cubicCoeff, scale.x, xoffset, xcoeff); + ocl_resizeOnnxTable(src.rows, dst.rows, 1, + sampler, M(1, 0), M(1, 1), cubicCoeff, scale.y, yoffset, ycoeff); + UMat utable; + Mat(1, tabsize, CV_32S, table.data()).copyTo(utable); + int W = (T < CV_32S || T == CV_32F) ? CV_32F : CV_64F, VW = CV_MAKETYPE(W, cn); + buildopts = format( + "-D INTER_CUBIC -D INTER_ANTIALIAS " + "-D T=%s -D W=%s -D CN=%d -D PIXEL_SIZE=%d -D VT=%s -D VW=%s " + "-D TO_WORK=%s -D TO_VEC_WORK=%s -D TO_TYPE=%s -D TO_VEC_TYPE=%s ", + ocl_resizeOnnx_typeToString(T, nullptr, 0), + ocl_resizeOnnx_typeToString(W, nullptr, 0), + cn, pixel_size, + ocl_resizeOnnx_typeToString(VT, buf[0], sizeof(buf[0])), + ocl_resizeOnnx_typeToString(VW, buf[1], sizeof(buf[1])), + ocl_resizeOnnx_convertTypeString(T, W, 1, buf[2], sizeof(buf[2])), + ocl_resizeOnnx_convertTypeString(T, W, cn, buf[3], sizeof(buf[3])), + ocl_resizeOnnx_convertTypeString(W, T, 1, buf[4], sizeof(buf[4])), + ocl_resizeOnnx_convertTypeString(W, T, cn, buf[5], sizeof(buf[5])) + ); + k.create("resizeOnnx_table", ocl::imgproc::resize_onnx_oclsrc, + buildopts, &errmsg); + if (k.empty()) + return false; + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + pixel_size, cn, xkanti, ykanti, ocl::KernelArg::PtrReadOnly(utable)); + } + else + CV_Error(cv::Error::StsError, "should not got here"); + + if (errmsg.size()) + fputs(errmsg.data(), stderr); + return k.run(2, globalsize, 0, false); +} + #endif #ifdef HAVE_IPP @@ -4190,6 +5174,225 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, } +void cv::resizeOnnx(InputArray _src, OutputArray _dst, + Size dsize, Point2d scale, int interpolation, float cubicCoeff, Rect2d const& roi) +{ + CV_INSTRUMENT_REGION(); + + Size ssize = _src.size(); + CV_CheckEQ(_src.dims(), 2, "only 2 dim image is support now"); + CV_CheckFalse(ssize.empty(), "src size must not be empty"); + // allow user input both dsize and scale + if (dsize.empty()) + { + CV_CheckGT(scale.x, 0.0, "scale must > 0 if no dsize given"); + CV_CheckGT(scale.y, 0.0, "scale must > 0 if no dsize given"); + // https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py#L365 + // output_size = (scale_factors * np.array(data.shape)).astype(int) + dsize.width = static_cast(scale.x * ssize.width ); + dsize.height = static_cast(scale.y * ssize.height); + } + if (scale.x == 0 || scale.y == 0) + { + scale.x = static_cast(dsize.width ) / ssize.width; + scale.y = static_cast(dsize.height) / ssize.height; + } + CV_CheckFalse(dsize.empty(), "dst size must not empty"); + CV_CheckGT(scale.x, 0.0, "computed scale <= 0 with given dsize"); + CV_CheckGT(scale.y, 0.0, "computed scale <= 0 with given dsize"); + + int sampler = interpolation & INTER_SAMPLER_MASK; + int nearest = interpolation & INTER_NEAREST_MODE_MASK; + int coordinate = interpolation & INTER_COORDINATE_MASK; + CV_Assert( + sampler == INTER_NEAREST || + sampler == INTER_LINEAR || + sampler == INTER_CUBIC); + CV_Assert( + nearest == INTER_NEAREST_PREFER_FLOOR || + nearest == INTER_NEAREST_PREFER_CEIL || + nearest == INTER_NEAREST_FLOOR || + nearest == INTER_NEAREST_CEIL); + CV_Assert( + coordinate == INTER_HALF_PIXEL || + coordinate == INTER_HALF_PIXEL_PYTORCH || + coordinate == INTER_HALF_PIXEL_SYMMETRIC || + coordinate == INTER_ALIGN_CORNERS || + coordinate == INTER_ASYMMETRIC || + coordinate == INTER_TF_CROP_RESIZE); + + // affine transformation matrix: x' = ax + b + Matx22f M; + Vec2f xcoef = interCoordinate( + coordinate, dsize.width, ssize.width, scale.x, roi.x, roi.x + roi.width); + Vec2f ycoef = interCoordinate( + coordinate, dsize.height, ssize.height, scale.y, roi.y, roi.y + roi.height); + M(0, 0) = xcoef[0]; + M(0, 1) = xcoef[1]; + M(1, 0) = ycoef[0]; + M(1, 1) = ycoef[1]; + + _dst.create(dsize, _src.type()); + if (dsize == ssize && coordinate != INTER_TF_CROP_RESIZE) + { + // Source and destination are of same size. Use simple copy. + _src.copyTo(_dst); + return; + } + + // Fake reference to source. Resolves issue 13577 in case of src == dst. + UMat srcUMat; + if (_src.isUMat()) + srcUMat = _src.getUMat(); + + CV_OCL_RUN(_src.isUMat() && _dst.isUMat(), + ocl_resizeOnnx(_src, _dst, M, scale, interpolation, cubicCoeff)) + + Mat src = _src.getMat(), dst = _dst.getMat(); + + if (sampler == INTER_NEAREST) + { + parallel_for_(Range(0, dsize.height), + ResizeOnnxNNInvoker(src, dst, M, nearest), + static_cast(dsize.height) * dsize.width / (1 << 16)); + return; + } + + static ResizeOnnxFunc linear_tab[] = + { + resizeOnnx_< + HResizeLinear, + VResizeLinear, + VResizeLinearVec_32s8u>, + float>, + resizeOnnx_< + HResizeLinear, + VResizeLinear, + VResizeNoVec>, + float>, + resizeOnnx_< + HResizeLinear, + VResizeLinear, VResizeLinearVec_32f16u>, + float>, + resizeOnnx_< + HResizeLinear, + VResizeLinear, VResizeLinearVec_32f16s>, + float>, + resizeOnnx_< + HResizeLinear, + VResizeLinear, VResizeNoVec>, + double>, + resizeOnnx_< + HResizeLinear, + VResizeLinear, VResizeLinearVec_32f>, + float>, + resizeOnnx_< + HResizeLinear, + VResizeLinear, VResizeNoVec>, + double>, + nullptr + }; + + static ResizeOnnxFunc cubic_tab[] = + { + resizeOnnx_< + HResizeCubic, + VResizeCubic, + VResizeCubicVec_32s8u>, + float>, + resizeOnnx_< + HResizeCubic, + VResizeCubic, + VResizeNoVec>, + float>, + resizeOnnx_< + HResizeCubic, + VResizeCubic, VResizeCubicVec_32f16u>, + float>, + resizeOnnx_< + HResizeCubic, + VResizeCubic, VResizeCubicVec_32f16s>, + float>, + resizeOnnx_< + HResizeCubic, + VResizeCubic, VResizeNoVec>, + double>, + resizeOnnx_< + HResizeCubic, + VResizeCubic, VResizeCubicVec_32f>, + float>, + resizeOnnx_< + HResizeCubic, + VResizeCubic, VResizeNoVec>, + double>, + nullptr + }; + +#if 0 + static ResizeAreaFastFunc areafast_tab[] = + { + resizeAreaFast_ >, + resizeAreaFast_ >, + resizeAreaFast_ >, + resizeAreaFast_ >, + resizeAreaFast_ >, + resizeAreaFast_, + resizeAreaFast_ >, + nullptr + }; + + // check if can use area fast + Point2d inv_scale(1.0 / scale.x, 1.0 / scale.y); + bool areafast_scale = fabs(inv_scale.y - 2.0) + fabs(inv_scale.x - 2.0) <= DBL_EPSILON; + bool areafast_size = (fabs(ssize.height - dsize.height * inv_scale.y) <= DBL_EPSILON) + && (fabs(ssize.width - dsize.width * inv_scale.x) <= DBL_EPSILON); + bool areafast_coordiante = (coordinate == INTER_HALF_PIXEL) + || (coordinate == INTER_HALF_PIXEL_SYMMETRIC) + || (coordinate == INTER_HALF_PIXEL_PYTORCH && min(dsize.height, dsize.width) > 1); + bool areafast_sampler = (sampler == INTER_LINEAR) && !(interpolation & INTER_ANTIALIAS_MASK); + if (areafast_scale && areafast_size && areafast_coordiante && areafast_sampler) + { + int iiy = static_cast(inv_scale.y); + int iix = static_cast(inv_scale.x); + int area = iiy * iix; + int srcstep = static_cast(src.step1()); + AutoBuffer _ofs(area + dsize.width * cn); + int* ofs = _ofs.data(); + int* xofs = ofs + area; + ResizeAreaFastFunc func = areafast_tab[depth]; + CV_Check(0, func, "empty implementation in area fast"); + // offsets of a pixel's sources to its left-top + for (int sy = 0, k = 0; sy < iiy; ++sy) + for (int sx = 0; sx < iix; ++sx) + ofs[k++] = sy * srcstep + sx * cn; + // left-top offsets of all pixels on a row + for (int dx = 0; dx < dsize.width; ++dx) + { + int j = dx * cn; + int sx = iix * j; + for(int k = 0; k < cn; k++ ) + xofs[j + k] = sx + k; + } + func(src, dst, ofs, xofs, iix, iiy); + return; + } +#endif + + int depth = src.depth(); + ResizeOnnxCtrl ctrl(interpolation, src.type(), cubicCoeff, ssize, dsize, scale, M); + ResizeOnnxFunc func = linear_tab[depth]; + if (sampler == INTER_LINEAR) + func = linear_tab[depth]; + else if (sampler == INTER_CUBIC) + func = cubic_tab[depth]; + else + CV_Error(CV_StsBadArg, format("Unknown sampler %d", sampler)); + CV_Check(0, func, "empty implementation in area fast"); + + func(src, dst, ctrl); +} + + CV_IMPL void cvResize( const CvArr* srcarr, CvArr* dstarr, int method ) { diff --git a/modules/imgproc/test/ocl/test_warp.cpp b/modules/imgproc/test/ocl/test_warp.cpp index 852dc465ab..963fd8bc0c 100644 --- a/modules/imgproc/test/ocl/test_warp.cpp +++ b/modules/imgproc/test/ocl/test_warp.cpp @@ -160,11 +160,7 @@ PARAM_TEST_CASE(WarpTest_cols4_Base, MatType, Interpolation, bool, bool) } }; -/////warpAffine - -typedef WarpTestBase WarpAffine; - -/////warpAffine +//// warpAffine typedef WarpTestBase WarpAffine; @@ -340,6 +336,116 @@ OCL_TEST(Resize, overflow_21198) EXPECT_LE(cv::norm(dst_u, dst, NORM_INF), 1.0f); } +PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int) +{ + int type, interpolation; + int widthMultiple; + double fx, fy; + bool useRoi; + Mat middle; + + TEST_DECLARE_INPUT_PARAMETER(src); + TEST_DECLARE_OUTPUT_PARAMETER(dst); + + virtual void SetUp() + { + type = GET_PARAM(0); + fx = GET_PARAM(1); + fy = GET_PARAM(2); + interpolation = GET_PARAM(3); + useRoi = GET_PARAM(4); + widthMultiple = GET_PARAM(5); + } + + void random_roi() + { + CV_Assert(fx > 0 && fy > 0); + + Size srcRoiSize = randomSize(10, MAX_VALUE), dstRoiSize; + // Make sure the width is a multiple of the requested value, and no more + srcRoiSize.width += widthMultiple - 1 - (srcRoiSize.width - 1) % widthMultiple; + dstRoiSize.width = cvRound(srcRoiSize.width * fx); + dstRoiSize.height = cvRound(srcRoiSize.height * fy); + + if (dstRoiSize.empty()) + { + random_roi(); + return; + } + + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); + +#if 0 + // if nearest test failed, maybe the fma issue, try open this #if + // set pixels' value to their coordinate + if ((interpolation & INTER_SAMPLER_MASK) == INTER_NEAREST) + { + int channel = CV_MAT_CN(type); + middle.create(src.rows, src.cols, CV_16SC(channel)); + for (int h = 0; h < src.rows; ++h) + { + for (int c = 0; c < channel; c += 2) + { + // even x; odd y + short* S = middle.ptr(h) + c; + for (int w = 0; w < src.cols; ++w, S += channel) + S[0] = static_cast(w); + } + for (int c = 1; c < channel; c += 2) + { + // even x; odd y + short* S = middle.ptr(h) + c; + for (int w = 0; w < src.cols; ++w, S += channel) + S[0] = static_cast(h); + } + } + middle.convertTo(src, type); + src_roi = src(Rect(srcBorder.lef, srcBorder.top, srcRoiSize.width, srcRoiSize.height)); + } +#endif + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE); + + UMAT_UPLOAD_INPUT_PARAMETER(src); + UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + } +}; + +OCL_TEST_P(ResizeOnnx, Mat) +{ + Size whole; + Point offset; + Mat host, host_roi; + int cn = CV_MAT_CN(type); + int depth = CV_MAT_DEPTH(type); + double eps = depth <= CV_32S ? integerEps : 5e-2; + + for (int j = 0; j < test_loop_times; j++) + { + random_roi(); + + OCL_OFF(cv::resizeOnnx(src_roi, dst_roi, + dst_roi.size(), Point2d(fx, fy), interpolation)); + OCL_ON(cv::resizeOnnx(usrc_roi, udst_roi, + dst_roi.size(), Point2d(fx, fy), interpolation)); + + dst_roi.locateROI(whole, offset); + udst.copyTo(host); + host_roi = host(Rect(offset, dst_roi.size())); + if (cn <= 4 && depth != CV_8S && depth != CV_32S) + OCL_EXPECT_MAT_N_DIFF(dst, eps); + else + { + // more strict than OCL_EXPECT_MAT_N_DIFF + double dif = cv::norm(dst_roi, host_roi, NORM_INF); + EXPECT_LE(dif, eps) + << "Size: " << src_roi.size() + << ", NormInf: " << dif << std::endl; + } + } +} + ///////////////////////////////////////////////////////////////////////////////////////////////// // remap @@ -583,6 +689,44 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine( Bool(), Values(1, 16))); +OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAlias, ResizeOnnx, Combine( + Values( + CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7), + CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10), + CV_32FC1, CV_32FC4, CV_32FC(11)), + Values(0.5, 0.31, 1.4), + Values(0.5, 0.73, 3.7), + Values((int)(INTER_LINEAR), (int)(INTER_CUBIC)), + Bool(), + Values(1, 16))); +OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine( + Values( + CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7), + CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10), + CV_32FC1, CV_32FC4, CV_32FC(11)), + Values(0.5, 0.27, 2.6), + Values(0.5, 0.71, 4.1), + Values( + (int)(INTER_ANTIALIAS | INTER_LINEAR), + (int)(INTER_ANTIALIAS | INTER_CUBIC )), + Bool(), + Values(1, 16))); + +OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpNearest, ResizeOnnx, Combine( + Values( + CV_8UC1, CV_8SC2, CV_8UC4, CV_8SC(7), + CV_16UC1, CV_16SC3, CV_16UC(9), CV_32SC(10), + CV_32FC1, CV_32FC4, CV_32FC(11)), + Values(0.5, 0.27, 2.6), + Values(0.5, 0.71, 4.1), + Values( + (int)(INTER_NEAREST | INTER_NEAREST_PREFER_FLOOR), + (int)(INTER_NEAREST | INTER_NEAREST_PREFER_CEIL), + (int)(INTER_NEAREST | INTER_NEAREST_CEIL), + (int)(INTER_NEAREST | INTER_NEAREST_FLOOR)), + Bool(), + Values(1, 16))); + OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine( Values(CV_8U, CV_16U, CV_32F), Values(1, 3, 4), diff --git a/modules/imgproc/test/test_resize_onnx.cpp b/modules/imgproc/test/test_resize_onnx.cpp new file mode 100644 index 0000000000..a9a4271e72 --- /dev/null +++ b/modules/imgproc/test/test_resize_onnx.cpp @@ -0,0 +1,498 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +struct ResizeOnnx +{ + int interpolate; + Size szsrc, szref, szdst; + Point2d scale; + float cubic; + Rect2d roi; + /* make sure insrc is: + * (1) integer + * (2) range [-127, 127] + * (3) all non-positive or non-negative */ + vector insrc, inref; + + void rand_roi(RNG& rng, Mat& src, Size size, int type) + { + int const border = 16; + int t = rng.next() % border; + int b = rng.next() % border; + int l = rng.next() % border; + int r = rng.next() % border; + if (rng.next() & 1) + { + src.create(size.height + t + b, size.width + l + r, type); + src.setTo(127); + src = src(Rect(l, t, size.width, size.height)); + } + else + src.create(size, type); + } + + void run() + { + CV_CheckGE(static_cast(insrc.size()), szsrc.area(), "unexpected src size"); + CV_CheckEQ(static_cast(inref.size()), szref.area(), "unexpected ref size"); + Mat iS(szsrc, CV_64F, insrc.data()); + Mat iR(szref, CV_64F, inref.data()); + Mat S = iS, R = iR, nS, nR; + double alpha[6] = {1, 1, 5, 5, -1, -3}; + double beta[6] = {0, 7, 0, 7, +0, -7}; + RNG rng; + for (int cn = 1; cn <= 6; ++cn) + { + if (cn > 1) + { + iS.convertTo(nS, -1, alpha[cn - 1], beta[cn - 1]); + iR.convertTo(nR, -1, alpha[cn - 1], beta[cn - 1]); + merge(vector{S, nS}, S); + merge(vector{R, nR}, R); + } + for (int depth = CV_8U; depth <= CV_64F; ++depth) + { + double eps = (depth <= CV_32S) ? 1.0 : 1e-3; + int type = CV_MAKETYPE(depth, cn); + string errinfo = "failed on type " + typeToString(type); + Mat src, ref, dst; + rand_roi(rng, src, szsrc, type); + if (szdst.area()) + rand_roi(rng, dst, szdst, type); + S.convertTo(src, type); + R.convertTo(ref, type); + resizeOnnx(src, dst, szdst, scale, interpolate, cubic, roi); + EXPECT_EQ(ref.size(), dst.size()) << errinfo; + // nearest must give bit-same result + if ((interpolate & INTER_SAMPLER_MASK) == INTER_NEAREST) + EXPECT_EQ(cv::norm(ref, dst, NORM_INF), 0.0) << errinfo; + // cvRound(4.5) = 4, but when doing resize with int, we may get 5 + else + EXPECT_LE(cv::norm(ref, dst, NORM_INF), eps) << errinfo; + } + } + } +}; + +// https://github.com/onnx/onnx/blob/main/docs/Operators.md#examples-128 + +TEST(ResizeOnnx, downsample_scales_cubic) +{ + ResizeOnnx{ + INTER_CUBIC, + Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.47119141, 2.78125 , 4.08251953, + 6.71142578, 8.02148438, 9.32275391, + 11.91650391, 13.2265625, 14.52783203, + } + }.run(); +} + +TEST(ResizeOnnx, downsample_scales_cubic_align_corners) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_ALIGN_CORNERS, + Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.0 , 2.39519159, 3.79038317, + 6.58076634, 7.97595793, 9.37114951, + 12.16153268, 13.55672427, 14.95191585, + } + }.run(); +} + +TEST(ResizeOnnx, downsample_scales_cubic_antialias) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_ANTIALIAS, + Size(4, 4), Size(2, 2), Size(), Point2d(0.6, 0.6), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 2.5180721, 4.2858863, + 9.589329 , 11.357142 , + } + }.run(); +} + +TEST(ResizeOnnx, downsample_scales_linear) +{ + ResizeOnnx{ + INTER_LINEAR, + Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8}, + {2.6666665, 4.3333331} + }.run(); +} + +TEST(ResizeOnnx, downsample_scales_linear_align_corners) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_ALIGN_CORNERS, + Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8}, + {1.0, 3.142857} + }.run(); +} + +TEST(ResizeOnnx, downsample_scales_linear_antialias) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_ANTIALIAS, + Size(4, 4), Size(2, 2), Size(), Point2d(0.6, 0.6), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 2.875, 4.5, + 9.375, 11.0, + } + }.run(); +} + +TEST(ResizeOnnx, downsample_scales_linear_half_pixel_symmetric) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_HALF_PIXEL_SYMMETRIC, + Size(4, 1), Size(2, 1), Size(), Point2d(0.6, 1.0), + -0.75f, Rect2d(), + {1, 2, 3, 4}, + {1.6666667, 3.3333333} + }.run(); +} + +TEST(ResizeOnnx, downsample_scales_nearest) +{ + ResizeOnnx{ + INTER_NEAREST, + Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8}, + {1, 3} + }.run(); +} + +TEST(ResizeOnnx, downsample_sizes_cubic) +{ + ResizeOnnx{ + INTER_CUBIC, + Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.63078704, 3.00462963, 4.37847222, + 7.12615741, 8.5 , 9.87384259, + 12.62152778, 13.99537037, 15.36921296, + } + }.run(); +} + +TEST(ResizeOnnx, downsample_sizes_cubic_antialias) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_ANTIALIAS, + Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.7750092, 3.1200073, 4.4650054, + 7.1550016, 8.5 , 9.844998 , + 12.534994, 13.8799925, 15.224991 , + } + }.run(); +} + +TEST(ResizeOnnx, downsample_sizes_linear_antialias) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_ANTIALIAS, + Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 2.3636363, 3.590909, 4.818182, + 7.2727275, 8.5 , 9.727273, + 12.181818 , 13.409091, 14.636364, + } + }.run(); +} + +TEST(ResizeOnnx, downsample_sizes_linear_pytorch_half_pixel) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_HALF_PIXEL_PYTORCH, + Size(4, 4), Size(1, 3), Size(1, 3), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.6666666, + 7.0 , + 12.333333, + } + }.run(); +} + +TEST(ResizeOnnx, downsample_sizes_nearest) +{ + ResizeOnnx{ + INTER_NEAREST, + Size(4, 2), Size(3, 1), Size(3, 1), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8}, + {1, 2, 4} + }.run(); +} + +TEST(ResizeOnnx, tf_crop_and_resize) +{ + // Note: for some rois, the result may be different with that of TF for inaccurate floating point + ResizeOnnx{ + INTER_LINEAR | INTER_TF_CROP_RESIZE, + Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), + -0.75f, Rect2d(0.6, 0.4, 0.2, 0.2), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 7.6000004, 7.9, 8.2 , + 8.8 , 9.1, 9.400001, + 10.0 , 10.3, 10.6 , + } + }.run(); +} + +TEST(ResizeOnnx, upsample_scales_cubic) +{ + ResizeOnnx{ + INTER_CUBIC, + Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 0.47265625, 0.76953125, 1.24609375, 1.875, 2.28125, 2.91015625, 3.38671875, 3.68359375, + 1.66015625, 1.95703125, 2.43359375, 3.0625, 3.46875, 4.09765625, 4.57421875, 4.87109375, + 3.56640625, 3.86328125, 4.33984375, 4.96875, 5.375, 6.00390625, 6.48046875, 6.77734375, + 6.08203125, 6.37890625, 6.85546875, 7.484375, 7.890625, 8.51953125, 8.99609375, 9.29296875, + 7.70703125, 8.00390625, 8.48046875, 9.109375, 9.515625, 10.14453125, 10.62109375, 10.91796875, + 10.22265625, 10.51953125, 10.99609375, 11.625, 12.03125, 12.66015625, 13.13671875, 13.43359375, + 12.12890625, 12.42578125, 12.90234375, 13.53125, 13.9375, 14.56640625, 15.04296875, 15.33984375, + 13.31640625, 13.61328125, 14.08984375, 14.71875, 15.125, 15.75390625, 16.23046875, 16.52734375, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_scales_cubic_align_corners) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_ALIGN_CORNERS, + Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.0, 1.34110787, 1.80029155, 2.32944606, 2.67055394, 3.19970845, 3.65889213, 4.0, + 2.36443149, 2.70553936, 3.16472303, 3.69387755, 4.03498542, 4.56413994, 5.02332362, 5.36443149, + 4.20116618, 4.54227405, 5.00145773, 5.53061224, 5.87172012, 6.40087464, 6.86005831, 7.20116618, + 6.31778426, 6.65889213, 7.1180758, 7.64723032, 7.98833819, 8.51749271, 8.97667638, 9.31778426, + 7.68221574, 8.02332362, 8.48250729, 9.01166181, 9.35276968, 9.8819242, 10.34110787, 10.68221574, + 9.79883382, 10.13994169, 10.59912536, 11.12827988, 11.46938776, 11.99854227, 12.45772595, 12.79883382, + 11.63556851, 11.97667638, 12.43586006, 12.96501458, 13.30612245, 13.83527697, 14.29446064, 14.63556851, + 13.0, 13.34110787, 13.80029155, 14.32944606, 14.67055394, 15.19970845, 15.65889213, 16.0, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_scales_cubic_asymmetric) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_ASYMMETRIC, + Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.0, 1.40625, 2.0, 2.5, 3.0, 3.59375, 4.0, 4.09375, + 2.625, 3.03125, 3.625, 4.125, 4.625, 5.21875, 5.625, 5.71875, + 5.0, 5.40625, 6.0, 6.5, 7.0, 7.59375, 8.0, 8.09375, + 7.0, 7.40625, 8.0, 8.5, 9.0, 9.59375, 10.0, 10.09375, + 9.0, 9.40625, 10.0, 10.5, 11.0, 11.59375, 12.0, 12.09375, + 11.375, 11.78125, 12.375, 12.875, 13.375, 13.96875, 14.375, 14.46875, + 13.0, 13.40625, 14.0, 14.5, 15.0, 15.59375, 16.0, 16.09375, + 13.375, 13.78125, 14.375, 14.875, 15.375, 15.96875, 16.375, 16.46875, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_scales_linear) +{ + ResizeOnnx{ + INTER_LINEAR, + Size(2, 2), Size(4, 4), Size(), Point2d(2.0, 2.0), + -0.75f, Rect2d(), + {1, 2, 3, 4}, + { + 1.0, 1.25, 1.75, 2.0, + 1.5, 1.75, 2.25, 2.5, + 2.5, 2.75, 3.25, 3.5, + 3.0, 3.25, 3.75, 4.0, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_scales_linear_align_corners) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_ALIGN_CORNERS, + Size(2, 2), Size(4, 4), Size(), Point2d(2.0, 2.0), + -0.75f, Rect2d(), + {1, 2, 3, 4}, + { + 1.0, 1.33333333, 1.66666667, 2.0, + 1.66666667, 2.0, 2.33333333, 2.66666667, + 2.33333333, 2.66666667, 3.0, 3.33333333, + 3.0, 3.33333333, 3.66666667, 4.0, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_scales_linear_half_pixel_symmetric) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_HALF_PIXEL_SYMMETRIC, + Size(2, 2), Size(5, 4), Size(), Point2d(2.94, 2.3), + -0.75f, Rect2d(), + {1, 2, 3, 4}, + { + 1.0 , 1.15986395, 1.5 , 1.84013605, 2.0 , + 1.56521738, 1.72508133, 2.06521738, 2.40535343, 2.56521738, + 2.43478262, 2.59464657, 2.93478262, 3.27491867, 3.43478262, + 3.0 , 3.15986395, 3.5 , 3.84013605, 4.0 , + } + }.run(); +} + +TEST(ResizeOnnx, upsample_scales_nearest) +{ + ResizeOnnx{ + INTER_NEAREST, + Size(2, 2), Size(6, 4), Size(), Point2d(3.0, 2.0), + -0.75f, Rect2d(), + {1, 2, 3, 4}, + { + 1, 1, 1, 2, 2, 2, + 1, 1, 1, 2, 2, 2, + 3, 3, 3, 4, 4, 4, + 3, 3, 3, 4, 4, 4, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_sizes_cubic) +{ + ResizeOnnx{ + INTER_CUBIC, + Size(4, 4), Size(10, 9), Size(10, 9), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 0.45507922, 0.64057922, 0.97157922, 1.42257922, 1.90732922, 2.22332922, 2.70807922, 3.15907922, 3.49007922, 3.67557922, + 1.39437963, 1.57987963, 1.91087963, 2.36187963, 2.84662963, 3.16262963, 3.64737963, 4.09837963, 4.42937963, 4.61487963, + 2.95130693, 3.13680693, 3.46780693, 3.91880693, 4.40355693, 4.71955693, 5.20430693, 5.65530693, 5.98630693, 6.17180693, + 5.20525069, 5.39075069, 5.72175069, 6.17275069, 6.65750069, 6.97350069, 7.45825069, 7.90925069, 8.24025069, 8.42575069, + 6.88975, 7.07525, 7.40625, 7.85725, 8.342, 8.658, 9.14275, 9.59375, 9.92475, 10.11025, + 8.57424931, 8.75974931, 9.09074931, 9.54174931, 10.02649931, 10.34249931, 10.82724931, 11.27824931, 11.60924931, 11.79474931, + 10.82819307, 11.01369307, 11.34469307, 11.79569307, 12.28044307, 12.59644307, 13.08119307, 13.53219307, 13.86319307, 14.04869307, + 12.38512037, 12.57062037, 12.90162037, 13.35262037, 13.83737037, 14.15337037, 14.63812037, 15.08912037, 15.42012037, 15.60562037, + 13.32442078, 13.50992078, 13.84092078, 14.29192078, 14.77667078, 15.09267078, 15.57742078, 16.02842078, 16.35942078, 16.54492078, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_sizes_nearest) +{ + ResizeOnnx{ + INTER_NEAREST, + Size(2, 2), Size(8, 7), Size(8, 7), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4}, + { + 1, 1, 1, 1, 2, 2, 2, 2, + 1, 1, 1, 1, 2, 2, 2, 2, + 1, 1, 1, 1, 2, 2, 2, 2, + 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, + 3, 3, 3, 3, 4, 4, 4, 4, + 3, 3, 3, 3, 4, 4, 4, 4, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_sizes_nearest_ceil_half_pixel) +{ + ResizeOnnx{ + INTER_NEAREST | INTER_NEAREST_CEIL, + Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1, 2, 2, 3, 3, 4, 4, 4, + 5, 6, 6, 7, 7, 8, 8, 8, + 5, 6, 6, 7, 7, 8, 8, 8, + 9, 10, 10, 11, 11, 12, 12, 12, + 9, 10, 10, 11, 11, 12, 12, 12, + 13, 14, 14, 15, 15, 16, 16, 16, + 13, 14, 14, 15, 15, 16, 16, 16, + 13, 14, 14, 15, 15, 16, 16, 16, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_sizes_nearest_floor_align_corners) +{ + ResizeOnnx{ + INTER_NEAREST | INTER_NEAREST_FLOOR | INTER_ALIGN_CORNERS, + Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1, 1, 1, 2, 2, 3, 3, 4, + 1, 1, 1, 2, 2, 3, 3, 4, + 1, 1, 1, 2, 2, 3, 3, 4, + 5, 5, 5, 6, 6, 7, 7, 8, + 5, 5, 5, 6, 6, 7, 7, 8, + 9, 9, 9, 10, 10, 11, 11, 12, + 9, 9, 9, 10, 10, 11, 11, 12, + 13, 13, 13, 14, 14, 15, 15, 16, + } + }.run(); +} + +TEST(ResizeOnnx, upsample_sizes_nearest_round_prefer_ceil_asymmetric) +{ + ResizeOnnx{ + INTER_NEAREST | INTER_NEAREST_PREFER_CEIL | INTER_ASYMMETRIC, + Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), + -0.75f, Rect2d(), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1, 2, 2, 3, 3, 4, 4, 4, + 5, 6, 6, 7, 7, 8, 8, 8, + 5, 6, 6, 7, 7, 8, 8, 8, + 9, 10, 10, 11, 11, 12, 12, 12, + 9, 10, 10, 11, 11, 12, 12, 12, + 13, 14, 14, 15, 15, 16, 16, 16, + 13, 14, 14, 15, 15, 16, 16, 16, + 13, 14, 14, 15, 15, 16, 16, 16, + } + }.run(); +} + +}} + diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 717eb7b14c..ed456385b9 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -134,11 +134,11 @@ do \ mask(cv::Rect(1, 1, mask.cols - 2, mask.rows - 2)).setTo(0); \ cv::threshold(diff, binary, (double)eps, 255, cv::THRESH_BINARY); \ EXPECT_LE(countNonZero(binary.reshape(1)), (int)(binary.cols*binary.rows*5/1000)) \ - << "Size: " << name ## _roi.size() << std::endl; \ + << "Size: " << name ## _roi.size() << ", NormInf: " << cv::norm(diff, NORM_INF) << std::endl; \ binary.convertTo(binary_8, mask.type()); \ binary_8 = binary_8 & mask; \ EXPECT_LE(countNonZero(binary_8.reshape(1)), (int)((binary_8.cols+binary_8.rows)/100)) \ - << "Size: " << name ## _roi.size() << std::endl; \ + << "Size: " << name ## _roi.size() << ", NormInf: " << cv::norm(diff, NORM_INF) << std::endl; \ } while ((void)0, 0) #define OCL_EXPECT_MATS_NEAR(name, eps) \ From 379c16e10613902160cd4a6ca03d00e71ff2f1f2 Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Sat, 8 Jun 2024 19:03:43 +0800 Subject: [PATCH 02/12] delete trailing whitespace --- .../include/opencv2/core/cuda/vec_math.hpp | 69 +++++++++++++------ modules/imgproc/src/opencl/resize_onnx.cl | 30 ++++---- modules/imgproc/src/resize.cpp | 24 +++---- modules/imgproc/test/test_resize_onnx.cpp | 3 +- 4 files changed, 74 insertions(+), 52 deletions(-) diff --git a/modules/core/include/opencv2/core/cuda/vec_math.hpp b/modules/core/include/opencv2/core/cuda/vec_math.hpp index 80b1303681..f22205fcc0 100644 --- a/modules/core/include/opencv2/core/cuda/vec_math.hpp +++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp @@ -374,6 +374,26 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) #undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC +// a += b + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 & operator op ## = (output_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return a = VecTraits::make(a.x op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 & operator op ## = (output_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return a = VecTraits::make(a.x op b.x, a.y op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 & operator op ## = (output_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return a = VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 & operator op ## = (output_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return a = VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ + } + // binary operators (vec & vec) #define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \ @@ -392,7 +412,8 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \ { \ return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ - } + } \ + CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN(op, input_type, output_type) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) @@ -430,6 +451,30 @@ CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN(op, input_type, output_type) + CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar) @@ -502,28 +547,8 @@ CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) - -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) - -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) - #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN // binary operators (vec & scalar) diff --git a/modules/imgproc/src/opencl/resize_onnx.cl b/modules/imgproc/src/opencl/resize_onnx.cl index 03a64e2bc1..19c6c69cb4 100644 --- a/modules/imgproc/src/opencl/resize_onnx.cl +++ b/modules/imgproc/src/opencl/resize_onnx.cl @@ -50,7 +50,6 @@ __kernel void resizeOnnx_nearest( if (dx < dst_cols && dy < dst_rows) { float fx = fma(dx, m00 , m01), fy = fma(dy, m10, m11); - #if defined(INTER_NEAREST_PREFER_FLOOR) || defined(INTER_NEAREST_CEIL) // x, y will >= 0, so `round toward positive infinity' is equivalent to ceil int sx = convert_int_rtp(fx + offset); @@ -121,13 +120,14 @@ __kernel void resizeOnnx_linear( VT d0 = TO_VEC_TYPE((u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3); storepix(d0, D); #else + W coeff[4] = { u0 * v0, u1 * v0, u0 * v1, u1 * v1 }; for (int i = 0; i < channel; ++i) { W s0 = TO_WORK(((__global T const*)(S0))[i]); W s1 = TO_WORK(((__global T const*)(S1))[i]); W s2 = TO_WORK(((__global T const*)(S2))[i]); W s3 = TO_WORK(((__global T const*)(S3))[i]); - W d0 = (u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3; + W d0 = coeff[0] * s0 + coeff[1] * s1 + coeff[2] * s2 + coeff[3] * s3; ((__global T*)(D))[i] = TO_TYPE(d0); } #endif @@ -271,10 +271,10 @@ __kernel void resizeOnnx_cubic( for (int y = ystart; y <= ylimit; ++y) { int yoffset = clamp(y, 0, src_rows - 1) * src_step + src_offset; - VW line = (VW)(0); + VW sline = (VW)(0); for (int x = 0; x < 4; ++x) - line += (VW)(xcoeff[x]) * TO_VEC_WORK(loadpix(srcptr + yoffset + xoffset[x])); - sum += line * (VW)(cubicCoeff(A, A2, A3, y - fy)); + sline += (VW)(xcoeff[x]) * TO_VEC_WORK(loadpix(srcptr + yoffset + xoffset[x])); + sum += sline * (VW)(cubicCoeff(A, A2, A3, y - fy)); } storepix(TO_VEC_TYPE(sum), D); #else @@ -290,11 +290,11 @@ __kernel void resizeOnnx_cubic( W sum = 0; for (int y = 0; y < 4; ++y) { - W line = 0; + W sline = 0; for (int x = 0; x < 4; ++x) - line += xcoeff[x] * TO_WORK(((__global T const*) + sline += xcoeff[x] * TO_WORK(((__global T const*) (srcptr + yoffset[y] + xoffset[x]))[i]); - sum += line * ycoeff[y]; + sum += sline * ycoeff[y]; } ((__global T*)(D))[i] = TO_TYPE(sum); } @@ -329,10 +329,10 @@ __kernel void resizeOnnx_table( { // offset is already clamped. xoffset is given by uchar __global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); - VW line = (VW)(0); + VW sline = (VW)(0); for (int x = dx; x < xstride; x += dst_cols) - line += xcoeff[x] * TO_VEC_WORK(loadpix(S + xoffset[x])); - sum += line * ycoeff[y]; + sline += xcoeff[x] * TO_VEC_WORK(loadpix(S + xoffset[x])); + sum += sline * ycoeff[y]; } storepix(TO_VEC_TYPE(sum), D); #else @@ -342,10 +342,10 @@ __kernel void resizeOnnx_table( for (int y = dy; y < ystride; y += dst_rows) { __global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); - W line = 0; + W sline = 0; for (int x = dx; x < xstride; x += dst_cols) - line += xcoeff[x] * TO_WORK(((__global T const*)(S + xoffset[x]))[i]); - sum += line * ycoeff[y]; + sline += xcoeff[x] * TO_WORK(((__global T const*)(S + xoffset[x]))[i]); + sum += sline * ycoeff[y]; } ((__global T*)(D))[i] = TO_TYPE(sum); } @@ -353,7 +353,7 @@ __kernel void resizeOnnx_table( } } -#else +#else #error "empty kernel" diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 9fa892de98..324b4034f6 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3698,7 +3698,7 @@ class ResizeOnnxInvoker : public ParallelLoopBody VResize vresize; ResizeOnnxInvoker& operator =(ResizeOnnxInvoker const&); - + public: typedef typename HResize::value_type T; typedef typename HResize::buf_type WT; @@ -3710,7 +3710,6 @@ public: CV_CheckLE(ctrl.ksize, MAX_ESIZE, "resampler kernel's size is too larger"); CV_Check(ctrl.is_fixpt, !(ctrl.is_fixpt && ctrl.is_double), "can not be both types"); // prefer static_assert, but how ? -#ifdef CV_CXX11 // check generic resize if (ctrl.is_fixpt) { @@ -3754,7 +3753,6 @@ public: CV_Check(sizeof(IdxT) * 10 + sizeof(WT), (std::is_same::type>::value), "something wrong"); -#endif } void hori_antialias_accumulate(T const* S, IdxT* L) const @@ -3800,7 +3798,7 @@ public: L[di + 2] += S[si + 2] * alpha; L[di + 3] += S[si + 3] * alpha; } - else + else for (int k = 0; k < len; ++k) { int di = ctrl.xtab[k].di; @@ -3817,7 +3815,7 @@ public: int dwidth = dst.cols * cn; #ifdef CV_CXX11 constexpr bool same_wt_idxt = std::is_same::value; -#else +#else bool const same_wt_idxt = false; #endif for (int i = 0; i < count; ++i) @@ -3917,7 +3915,7 @@ public: { if (ctrl.xkanti) vert_antialias_hori_antialias(dy, L, A); - else + else vert_antialias_hori_generic(dy, Lw, A); } } @@ -3966,7 +3964,7 @@ public: { if (ctrl.xkanti) hori_antialias_lines(srows + k0, rows + k0, L, ksize - k0); - else + else hori_generic_lines(srows + k0, rows + k0, ksize - k0); } vresize(const_cast(rows), dst.template ptr(dy), beta, dwidth); @@ -4321,10 +4319,10 @@ static void ocl_resizeOnnxTable(int srclen, int dstlen, int esz, float sum = 0.f; for (int i = start; i < end; ++i) { - float x = fabs(i - ratio) * scale; + float x = fabsf(i - ratio) * scale; if (sampler == INTER_LINEAR) x = min(max(x, 0.f), 1.f); - else + else { if (x <= 1) x = ((A + 2) * x - (A + 3)) * x * x + 1; @@ -4351,7 +4349,7 @@ static void ocl_resizeOnnxTable(int srclen, int dstlen, int esz, static char const* ocl_resizeOnnx_typeToString(int type, char* buf, size_t size) { // typeToStr CV_Assert will failed - static char const* tab[CV_64F + 1] = + static char const* tab[CV_64F + 1] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); if (cn == 1) @@ -4388,9 +4386,6 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, int nearest = interpolation & INTER_NEAREST_MODE_MASK; int antialias = interpolation & INTER_ANTIALIAS_MASK; Point2f scale = static_cast(scaled); - if (depth > CV_64F) - return false; - ocl::Kernel k; UMat src = _src.getUMat(), dst = _dst.getUMat(); size_t globalsize[] = {static_cast(dst.cols), static_cast(dst.rows)}; @@ -4398,6 +4393,9 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, int pixel_size = static_cast(src.elemSize()); int T = depth, VT = type; String buildopts, errmsg; + // opencv ocl kernel use int for step and offset + if (depth > CV_64F || src.size[0] * src.step[0] > INT_MAX) + return false; if (sampler == INTER_NEAREST) { diff --git a/modules/imgproc/test/test_resize_onnx.cpp b/modules/imgproc/test/test_resize_onnx.cpp index a9a4271e72..0de233c63c 100644 --- a/modules/imgproc/test/test_resize_onnx.cpp +++ b/modules/imgproc/test/test_resize_onnx.cpp @@ -235,7 +235,7 @@ TEST(ResizeOnnx, downsample_sizes_linear_pytorch_half_pixel) Size(4, 4), Size(1, 3), Size(1, 3), Point2d(), -0.75f, Rect2d(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - { + { 1.6666666, 7.0 , 12.333333, @@ -495,4 +495,3 @@ TEST(ResizeOnnx, upsample_sizes_nearest_round_prefer_ceil_asymmetric) } }} - From 358b64ad0e844f9621b7cde1c9d371da770d7a1d Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Mon, 10 Jun 2024 13:50:54 +0800 Subject: [PATCH 03/12] cuda resize onnx done --- .../include/opencv2/core/cuda/vec_math.hpp | 12 +- modules/imgproc/include/opencv2/imgproc.hpp | 20 +-- modules/imgproc/src/opencl/resize_onnx.cl | 33 ++-- modules/imgproc/src/resize.cpp | 165 ++++++++---------- modules/imgproc/test/ocl/test_warp.cpp | 156 +++++++---------- modules/imgproc/test/test_resize_onnx.cpp | 8 +- 6 files changed, 168 insertions(+), 226 deletions(-) diff --git a/modules/core/include/opencv2/core/cuda/vec_math.hpp b/modules/core/include/opencv2/core/cuda/vec_math.hpp index f22205fcc0..0a1205e25b 100644 --- a/modules/core/include/opencv2/core/cuda/vec_math.hpp +++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp @@ -458,12 +458,12 @@ CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uchar, uchar) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , char, char) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , ushort, ushort) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , short, short) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , int, int) -CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(| , uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 8e6eecb667..ba002bbc70 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -281,14 +281,11 @@ enum InterpolationFlags { //! ONNX Resize Flags enum ResizeONNXFlags { - // static_assert((1 << INTER_COORDINATE_SHIFT) > INTER_MAX, ""); - // https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize - INTER_SAMPLER_SHIFT = 0, - INTER_SAMPLER_BIT = 4, + INTER_SAMPLER_BIT = 3, INTER_SAMPLER_MASK = ((1 << INTER_SAMPLER_BIT) - 1) << INTER_SAMPLER_SHIFT, - INTER_COORDINATE_SHIFT = 4, + INTER_COORDINATE_SHIFT = INTER_SAMPLER_SHIFT + INTER_SAMPLER_BIT, INTER_COORDINATE_BIT = 3, INTER_COORDINATE_MASK = ((1 << INTER_COORDINATE_BIT) - 1) << INTER_COORDINATE_SHIFT, /** x_original = (x_resized + 0.5) / scale - 0.5 */ @@ -305,8 +302,8 @@ enum ResizeONNXFlags /** x_original = x_resized / scale */ INTER_ASYMMETRIC = 4 << INTER_COORDINATE_SHIFT, /** x_original = length_resized > 1 - * ? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1) - * : 0.5 * (start_x + end_x) * (length_original - 1) */ + ? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1) + : 0.5 * (start_x + end_x) * (length_original - 1) */ INTER_TF_CROP_RESIZE = 5 << INTER_COORDINATE_SHIFT, INTER_NEAREST_MODE_SHIFT = INTER_COORDINATE_SHIFT + INTER_COORDINATE_BIT, @@ -2468,12 +2465,12 @@ CV_EXPORTS_W void resize( InputArray src, OutputArray dst, int interpolation = INTER_LINEAR ); /** @brief onnx resize op + https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py - Not support `exclude_outside` and `extrapolation_value` yet. -To get a similar result to resize, give dsize and: +To get a similar result to `cv::resize`, give dsize and: INTER_NEAREST : ASYMMETRIC + NEAREST_FLOOR INTER_LINEAR : HALF_PIXEL INTER_CUBIC : HALF_PIXEL + cubicCoeff(-0.75) @@ -2490,9 +2487,8 @@ To get a similar result to resize, give dsize and: @sa resize */ -CV_EXPORTS_W void resizeOnnx( - InputArray src, OutputArray dst, Size dsize, Point2d scale = Point2d(), - int interpolation = INTER_LINEAR | INTER_HALF_PIXEL, +CV_EXPORTS_W void resizeOnnx(InputArray src, OutputArray dst, Size dsize, + Point2d scale = Point2d(), int interpolation = INTER_LINEAR | INTER_HALF_PIXEL, float cubicCoeff = -0.75f, Rect2d const& roi = Rect2d()); /** @brief Applies an affine transformation to an image. diff --git a/modules/imgproc/src/opencl/resize_onnx.cl b/modules/imgproc/src/opencl/resize_onnx.cl index 19c6c69cb4..8b7c96cea0 100644 --- a/modules/imgproc/src/opencl/resize_onnx.cl +++ b/modules/imgproc/src/opencl/resize_onnx.cl @@ -67,22 +67,20 @@ __kernel void resizeOnnx_nearest( #if PIXEL_SIZE == 1 *D = *S; -#elif PIXEL_SIZE == 2 || PIXEL_SIZE == 4 || PIXEL_SIZE == 8 || PIXEL_SIZE == 16 - *(__global VT*)(D) = *(__global const VT*)(S); +#elif PIXEL_SIZE == 2 + *(__global ushort*)(D) = *(__global const ushort*)(S); #elif PIXEL_SIZE == 3 vstore3(vload3(0, S), 0, D); +#elif PIXEL_SIZE == 4 + *(__global uint*)(D) = *(__global const uint*)(S); #elif PIXEL_SIZE == 6 vstore3(vload3(0, (__global ushort const*)(S)), 0, (__global ushort*)(D)); +#elif PIXEL_SIZE == 8 + *(__global uint2*)(D) = *(__global const uint2*)(S); #elif PIXEL_SIZE == 12 vstore3(vload3(0, (__global const uint*)(S)), 0, (__global uint*)(D)); -#elif PIXEL_SIZE == 24 - vstore3(vload3(0, (__global ulong const*)(S)), 0, (__global ulong*)(D)); -#elif PIXEL_SIZE == 32 - *(__global uint8*)(D) = *(__global uint8 const*)(S); -#elif PIXEL_SIZE == 64 - *(__global uint16*)(D) = *(__global uint16 const*)(S); -#elif PIXEL_SIZE == 128 - *(__global ulong16*)(D) = *(__global ulong16 const*)(S); +#elif PIXEL_SIZE == 16 + *(__global uint4*)(D) = *(__global const uint4*)(S); #else for (int i = 0; i < pixel_size; ++i) D[i] = S[i]; @@ -114,7 +112,7 @@ __kernel void resizeOnnx_linear( __global uchar const* S2 = srcptr + (y1 * src_step + mad24(x0, pixel_size, src_offset)); __global uchar const* S3 = srcptr + (y1 * src_step + mad24(x1, pixel_size, src_offset)); __global uchar * D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); -#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 VW s0 = TO_VEC_WORK(loadpix(S0)); VW s1 = TO_VEC_WORK(loadpix(S1)); VW s2 = TO_VEC_WORK(loadpix(S2)); VW s3 = TO_VEC_WORK(loadpix(S3)); VT d0 = TO_VEC_TYPE((u0 * v0) * s0 + (u1 * v0) * s1 + (u0 * v1) * s2 + (u1 * v1) * s3); @@ -154,7 +152,7 @@ __kernel void resizeOnnx_linear_antialias( int ix = convert_int_rtn(fx), iy = convert_int_rtn(fy); float rx = fx - ix, ry = fy - iy; __global uchar* D = dstptr + dy * dst_step + mad24(dx, pixel_size, dst_offset); -#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 VW sumval = (VW)(0); float weight = 0; for (int h = ystart; h < yend; ++h) @@ -266,7 +264,7 @@ __kernel void resizeOnnx_cubic( xcoeff [x - xstart] = cubicCoeff(A, A2, A3, x - fx); } __global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); -#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 VW sum = (VW)(0); for (int y = ystart; y <= ylimit; ++y) { @@ -322,13 +320,14 @@ __kernel void resizeOnnx_table( __global int const* yoffset = xoffset + xstride; __global float const* xcoeff = (__global float const*)(yoffset + ystride); __global float const* ycoeff = (__global float const*)(xcoeff + xstride); -#if CN == 1 || CN == 2 || CN == 3 || CN == 4 || CN == 8 || CN == 16 +#if CN == 1 || CN == 2 || CN == 3 || CN == 4 VW sum = (VW)(0); // exact ykanti / xkanti loops for (int y = dy; y < ystride; y += dst_rows) { - // offset is already clamped. xoffset is given by uchar - __global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); + // offset is already clamped + // xoffset is given by uchar, yoffset already multiply by src_step + __global const uchar* S = srcptr + yoffset[y] + src_offset; VW sline = (VW)(0); for (int x = dx; x < xstride; x += dst_cols) sline += xcoeff[x] * TO_VEC_WORK(loadpix(S + xoffset[x])); @@ -341,7 +340,7 @@ __kernel void resizeOnnx_table( W sum = 0; for (int y = dy; y < ystride; y += dst_rows) { - __global const uchar* S = (srcptr + yoffset[y] * src_step + src_offset); + __global const uchar* S = (srcptr + yoffset[y] + src_offset); W sline = 0; for (int x = dx; x < xstride; x += dst_cols) sline += xcoeff[x] * TO_WORK(((__global T const*)(S + xoffset[x]))[i]); diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 324b4034f6..c763379bb9 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -1356,36 +1356,36 @@ public: break; case 2: for (; x < width; ++x) - reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); + reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); break; case 3: for (; x < width; ++x, D += 3) { - const uchar* _tS = S + x_ofs[x]; + uchar const* _tS = S + x_ofs[x]; D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2]; } break; case 4: for (; x < width; ++x) - reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); + reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); break; case 6: for (; x < width; ++x, D += 6) { - short const* _tS = reinterpret_cast(S + x_ofs[x]); - short* _tD = reinterpret_cast(D); + ushort const* _tS = reinterpret_cast(S + x_ofs[x]); + ushort* _tD = reinterpret_cast(D); _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2]; } break; case 8: for (; x < width; ++x) - reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); + reinterpret_cast(D)[x] = *(reinterpret_cast(S + x_ofs[x])); break; case 12: for (; x < width; ++x, D += 12) { - int const* _tS = reinterpret_cast(S + x_ofs[x]); - int* _tD = reinterpret_cast(D); + uint const* _tS = reinterpret_cast(S + x_ofs[x]); + uint* _tD = reinterpret_cast(D); _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2]; } break; @@ -3465,7 +3465,8 @@ public: int xmin, xmax; private: - void cubic_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, float A, TabIdx* elem) + void cubicCoeffsAntiAlias( + int dstlen, int cn, float srcpos, float scale, int srclen, float A, TabIdx* elem) { scale = min(scale, 1.f); int index = cvFloor(srcpos); @@ -3497,7 +3498,7 @@ private: } } - void cubic_coeffs(float x, float A, float* coeffs) + void cubicCoeffs(float x, float A, float* coeffs) { coeffs[0] = A * ((((x + 1) - 5) * (x + 1) + 8) * (x + 1) - 4); coeffs[1] = ((A + 2) * x - (A + 3)) * x * x + 1; @@ -3505,7 +3506,8 @@ private: coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2]; } - void linear_coeffs_antialias(int dstlen, int cn, float srcpos, float scale, int srclen, TabIdx* elem) + void linearCoeffsAntialias( + int dstlen, int cn, float srcpos, float scale, int srclen, TabIdx* elem) { scale = min(scale, 1.f); int index = cvFloor(srcpos); @@ -3532,7 +3534,7 @@ private: } } - void linear_coeffs(float x, float* coeffs) + void linearCoeffs(float x, float* coeffs) { coeffs[0] = 1.f - x; coeffs[1] = x; @@ -3570,16 +3572,17 @@ private: area.commit(); CV_CheckLE(ksize, MAX_ESIZE, "resampler kernel's size is too larger"); - if (antialias) + // when upsampling, `antialias` is same to `generic`, so use `generic` to speed up + if (antialias && scaled.x < 1.0) { float a = M(0, 0), b = M(0, 1); for (int d = 0; d < dsize.width; ++d) { float f = fmaf(static_cast(d), a, b); if (sampler == INTER_LINEAR) - linear_coeffs_antialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti); + linearCoeffsAntialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti); else // if (sampler == INTER_CUBIC) - cubic_coeffs_antialias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti); + cubicCoeffsAntiAlias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti); } } else @@ -3608,9 +3611,9 @@ private: for (int k = 0; k < cn; ++k) xofs[cn * d + k] = cn * s + k; if (sampler == INTER_LINEAR) - linear_coeffs(f, cbuf); + linearCoeffs(f, cbuf); else // if (sampler == INTER_CUBIC) - cubic_coeffs(f, cubicCoeff, cbuf); + cubicCoeffs(f, cubicCoeff, cbuf); if (is_fixpt) { short* coeffs = reinterpret_cast(xcoeffs) + cn * ksize * d; @@ -3638,16 +3641,16 @@ private: } } - if (antialias) + if (antialias && scaled.y < 1.0) { float a = M(1, 0), b = M(1, 1); for (int d = 0; d < dsize.height; ++d) { float f = fmaf(static_cast(d), a, b); if (sampler == INTER_LINEAR) - linear_coeffs_antialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti); + linearCoeffsAntialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti); else // if (sampler == INTER_CUBIC) - cubic_coeffs_antialias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti); + cubicCoeffsAntiAlias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti); } } else @@ -3662,9 +3665,9 @@ private: f -= s; yofs[d] = s; if (sampler == INTER_LINEAR) - linear_coeffs(f, cbuf); + linearCoeffs(f, cbuf); else // if (sampler == INTER_CUBIC) - cubic_coeffs(f, cubicCoeff, cbuf); + cubicCoeffs(f, cubicCoeff, cbuf); if (is_fixpt) { short* coeffs = reinterpret_cast(ycoeffs) + 1 * ksize * d; @@ -3755,7 +3758,7 @@ public: "something wrong"); } - void hori_antialias_accumulate(T const* S, IdxT* L) const + void horiAntialiasAccumulate(T const* S, IdxT* L) const { IdxT alpha; int const cn = dst.channels(); @@ -3809,15 +3812,11 @@ public: } } - void hori_antialias_lines(T const** srcptr, WT** dstptr, IdxT* L, int count) const + void horiAntialiasLines(T const** srcptr, WT** dstptr, IdxT* L, int count) const { int cn = dst.channels(); int dwidth = dst.cols * cn; -#ifdef CV_CXX11 - constexpr bool same_wt_idxt = std::is_same::value; -#else - bool const same_wt_idxt = false; -#endif + bool const same_wt_idxt = std::is_same::value; for (int i = 0; i < count; ++i) { T const* S = srcptr[i]; @@ -3825,7 +3824,7 @@ public: if (same_wt_idxt) L = reinterpret_cast(dstptr[i]); memset(L, 0, sizeof(IdxT) * dwidth); - hori_antialias_accumulate(S, L); + horiAntialiasAccumulate(S, L); if (!same_wt_idxt) { WT* D = dstptr[i]; @@ -3844,7 +3843,7 @@ public: } } - void hori_generic_lines(T const** srcptr, WT** dstptr, int count) const + void horiGenericLines(T const** srcptr, WT** dstptr, int count) const { int cn = src.channels(); int ssize = src.cols * cn; @@ -3857,53 +3856,7 @@ public: ssize, dsize, cn, xmin, xmax); } - void vert_antialias_hori_antialias(int dy, IdxT* L, IdxT* A) const - { - // the start and end of ytab - int dwidth = dst.channels() * dst.cols; - int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti; - memset(A, 0, dwidth * sizeof(IdxT)); - for (int t = tstart; t < tend; ++t) - { - IdxT beta; - int sy = ctrl.ytab[t].si; - CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong"); - ctrl.ytab[t].as(beta); - memset(L, 0, dwidth * sizeof(IdxT)); - hori_antialias_accumulate(src.template ptr(sy), L); - for (int w = 0; w < dwidth; ++w) - A[w] += L[w] * beta; - } - T* D = dst.template ptr(dy); - for (int w = 0; w < dwidth; ++w) - D[w] = saturate_cast(A[w]); - } - - void vert_antialias_hori_generic(int dy, WT* L, IdxT* A) const - { - // FixedPtCast cast; - int dwidth = dst.channels() * dst.cols; - int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti; - memset(A, 0, dwidth * sizeof(IdxT)); - for (int t = tstart; t < tend; ++t) - { - IdxT beta; - int sy = ctrl.ytab[t].si; - CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong"); - ctrl.ytab[t].as(beta); - T const* S = src.template ptr(sy); - hori_generic_lines(&S, &L, 1); - if (ctrl.is_fixpt) - beta /= INTER_RESIZE_COEF_SCALE; - for (int w = 0; w < dwidth; ++w) - A[w] += L[w] * beta; - } - T* D = dst.template ptr(dy); - for (int w = 0; w < dwidth; ++w) - D[w] = saturate_cast(A[w]); - } - - void vert_antialias(Range const& range) const + void vertAntialias(Range const& range) const { int cn = dst.channels(); int dwidth = dst.cols * cn; @@ -3913,14 +3866,38 @@ public: WT* Lw = reinterpret_cast(L); for (int dy = range.start; dy < range.end; ++dy) { - if (ctrl.xkanti) - vert_antialias_hori_antialias(dy, L, A); - else - vert_antialias_hori_generic(dy, Lw, A); + int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti; + memset(A, 0, dwidth * sizeof(IdxT)); + for (int t = tstart; t < tend; ++t) + { + IdxT beta; + int sy = ctrl.ytab[t].si; + CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong"); + ctrl.ytab[t].as(beta); + T const* S = src.template ptr(sy); + if (ctrl.xkanti) + { + memset(L, 0, dwidth * sizeof(IdxT)); + horiAntialiasAccumulate(S, L); + for (int w = 0; w < dwidth; ++w) + A[w] += L[w] * beta; + } + else + { + horiGenericLines(&S, &Lw, 1); + if (ctrl.is_fixpt) + beta /= INTER_RESIZE_COEF_SCALE; + for (int w = 0; w < dwidth; ++w) + A[w] += Lw[w] * beta; + } + } + T* D = dst.template ptr(dy); + for (int w = 0; w < dwidth; ++w) + D[w] = saturate_cast(A[w]); } } - void vert_generic(Range const& range) const + void vertGeneric(Range const& range) const { int ksize = ctrl.ksize, ksize2 = ksize / 2; int cn = src.channels(); @@ -3963,9 +3940,9 @@ public: if (k0 < ksize) { if (ctrl.xkanti) - hori_antialias_lines(srows + k0, rows + k0, L, ksize - k0); + horiAntialiasLines(srows + k0, rows + k0, L, ksize - k0); else - hori_generic_lines(srows + k0, rows + k0, ksize - k0); + horiGenericLines(srows + k0, rows + k0, ksize - k0); } vresize(const_cast(rows), dst.template ptr(dy), beta, dwidth); } @@ -3974,9 +3951,9 @@ public: virtual void operator() (Range const& range) const CV_OVERRIDE { if (ctrl.ykanti) - vert_antialias(range); + vertAntialias(range); else - vert_generic(range); + vertGeneric(range); } }; @@ -4003,7 +3980,7 @@ typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst, const DecimateAlpha* ytab, int ytab_size, const int* yofs); -typedef void (*ResizeOnnxFunc)(Mat const& src, Mat& dst, ResizeOnnxCtrl const&); +typedef void (*ResizeOnnxFunc)(Mat const& src, Mat& dst, ResizeOnnxCtrl const& ctrl); static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab ) @@ -4517,7 +4494,7 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, float* ycoeff = reinterpret_cast(xcoeff + xstride); ocl_resizeOnnxTable(src.cols, dst.cols, pixel_size, sampler, M(0, 0), M(0, 1), cubicCoeff, scale.x, xoffset, xcoeff); - ocl_resizeOnnxTable(src.rows, dst.rows, 1, + ocl_resizeOnnxTable(src.rows, dst.rows, static_cast(src.step[0]), sampler, M(1, 0), M(1, 1), cubicCoeff, scale.y, yoffset, ycoeff); UMat utable; Mat(1, tabsize, CV_32S, table.data()).copyTo(utable); @@ -5175,6 +5152,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, void cv::resizeOnnx(InputArray _src, OutputArray _dst, Size dsize, Point2d scale, int interpolation, float cubicCoeff, Rect2d const& roi) { + static_assert((1 << INTER_SAMPLER_BIT) >= INTER_MAX, ""); CV_INSTRUMENT_REGION(); Size ssize = _src.size(); @@ -5185,7 +5163,7 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, { CV_CheckGT(scale.x, 0.0, "scale must > 0 if no dsize given"); CV_CheckGT(scale.y, 0.0, "scale must > 0 if no dsize given"); - // https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py#L365 + // https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py // output_size = (scale_factors * np.array(data.shape)).astype(int) dsize.width = static_cast(scale.x * ssize.width ); dsize.height = static_cast(scale.y * ssize.height); @@ -5196,8 +5174,8 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, scale.y = static_cast(dsize.height) / ssize.height; } CV_CheckFalse(dsize.empty(), "dst size must not empty"); - CV_CheckGT(scale.x, 0.0, "computed scale <= 0 with given dsize"); - CV_CheckGT(scale.y, 0.0, "computed scale <= 0 with given dsize"); + CV_CheckGT(scale.x, 0.0, "require computed or given scale > 0"); + CV_CheckGT(scale.y, 0.0, "require computed or given scale > 0"); int sampler = interpolation & INTER_SAMPLER_MASK; int nearest = interpolation & INTER_NEAREST_MODE_MASK; @@ -5237,6 +5215,9 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, _src.copyTo(_dst); return; } + // Antialias is applied when downsampling + if (scale.x >= 1.0 && scale.y >= 1.0) + interpolation &= ~INTER_ANTIALIAS_MASK; // Fake reference to source. Resolves issue 13577 in case of src == dst. UMat srcUMat; diff --git a/modules/imgproc/test/ocl/test_warp.cpp b/modules/imgproc/test/ocl/test_warp.cpp index 963fd8bc0c..5c33b03b97 100644 --- a/modules/imgproc/test/ocl/test_warp.cpp +++ b/modules/imgproc/test/ocl/test_warp.cpp @@ -338,18 +338,18 @@ OCL_TEST(Resize, overflow_21198) PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int) { - int type, interpolation; + int depth, interpolation; int widthMultiple; double fx, fy; bool useRoi; - Mat middle; - TEST_DECLARE_INPUT_PARAMETER(src); - TEST_DECLARE_OUTPUT_PARAMETER(dst); + Rect src_loc, dst_loc; + Mat src, dst, src_roi, dst_roi; + UMat gsrc, gdst, gsrc_roi, gdst_roi; virtual void SetUp() { - type = GET_PARAM(0); + depth = GET_PARAM(0); fx = GET_PARAM(1); fy = GET_PARAM(2); interpolation = GET_PARAM(3); @@ -357,96 +357,70 @@ PARAM_TEST_CASE(ResizeOnnx, MatType, double, double, int, bool, int) widthMultiple = GET_PARAM(5); } - void random_roi() + void random_submat(int type, + Size& size, Rect& roi, Mat& mat, Mat& sub, UMat& gmat, UMat& gsub) { - CV_Assert(fx > 0 && fy > 0); + int border = useRoi ? 65 : 0; + roi.x = randomInt(0, border); + roi.y = randomInt(0, border); + roi.width = size.width; + roi.height = size.height; + size.width += roi.x + randomInt(0, border); + size.height += roi.y + randomInt(0, border); + mat = randomMat(size, type, -127, 127); + mat.copyTo(gmat); + sub = mat(roi); + gsub = gmat(roi); + } - Size srcRoiSize = randomSize(10, MAX_VALUE), dstRoiSize; - // Make sure the width is a multiple of the requested value, and no more - srcRoiSize.width += widthMultiple - 1 - (srcRoiSize.width - 1) % widthMultiple; - dstRoiSize.width = cvRound(srcRoiSize.width * fx); - dstRoiSize.height = cvRound(srcRoiSize.height * fy); - - if (dstRoiSize.empty()) + void random_roi(int type) + { + Size srcSize, dstSize; + int minSize = min(fx, fy) < 1.0 ? 10 : 1; + while (dstSize.empty()) { - random_roi(); - return; + srcSize = randomSize(minSize, 129); + srcSize.width += widthMultiple - 1 - (srcSize.width - 1) % widthMultiple; + dstSize.width = cvRound(srcSize.width * fx); + dstSize.height = cvRound(srcSize.height * fy); } - Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); - randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); - -#if 0 - // if nearest test failed, maybe the fma issue, try open this #if - // set pixels' value to their coordinate - if ((interpolation & INTER_SAMPLER_MASK) == INTER_NEAREST) - { - int channel = CV_MAT_CN(type); - middle.create(src.rows, src.cols, CV_16SC(channel)); - for (int h = 0; h < src.rows; ++h) - { - for (int c = 0; c < channel; c += 2) - { - // even x; odd y - short* S = middle.ptr(h) + c; - for (int w = 0; w < src.cols; ++w, S += channel) - S[0] = static_cast(w); - } - for (int c = 1; c < channel; c += 2) - { - // even x; odd y - short* S = middle.ptr(h) + c; - for (int w = 0; w < src.cols; ++w, S += channel) - S[0] = static_cast(h); - } - } - middle.convertTo(src, type); - src_roi = src(Rect(srcBorder.lef, srcBorder.top, srcRoiSize.width, srcRoiSize.height)); - } -#endif - Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); - randomSubMat(dst, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE); - - UMAT_UPLOAD_INPUT_PARAMETER(src); - UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + random_submat(type, srcSize, src_loc, src, src_roi, gsrc, gsrc_roi); + random_submat(type, dstSize, dst_loc, dst, dst_roi, gdst, gdst_roi); } }; OCL_TEST_P(ResizeOnnx, Mat) { - Size whole; - Point offset; Mat host, host_roi; - int cn = CV_MAT_CN(type); - int depth = CV_MAT_DEPTH(type); double eps = depth <= CV_32S ? integerEps : 5e-2; - for (int j = 0; j < test_loop_times; j++) + // loop on channel to reduce the number of test + for (int cn = 1; cn <= 6; ++cn) { - random_roi(); - - OCL_OFF(cv::resizeOnnx(src_roi, dst_roi, - dst_roi.size(), Point2d(fx, fy), interpolation)); - OCL_ON(cv::resizeOnnx(usrc_roi, udst_roi, - dst_roi.size(), Point2d(fx, fy), interpolation)); - - dst_roi.locateROI(whole, offset); - udst.copyTo(host); - host_roi = host(Rect(offset, dst_roi.size())); - if (cn <= 4 && depth != CV_8S && depth != CV_32S) - OCL_EXPECT_MAT_N_DIFF(dst, eps); - else + int type = CV_MAKETYPE(depth, cn); + for (int j = 0; j < test_loop_times; ++j) { - // more strict than OCL_EXPECT_MAT_N_DIFF - double dif = cv::norm(dst_roi, host_roi, NORM_INF); - EXPECT_LE(dif, eps) - << "Size: " << src_roi.size() - << ", NormInf: " << dif << std::endl; + random_roi(type); + + OCL_OFF(cv::resizeOnnx(src_roi, dst_roi, + dst_roi.size(), Point2d(fx, fy), interpolation)); + OCL_ON(cv::resizeOnnx(gsrc_roi, gdst_roi, + dst_roi.size(), Point2d(fx, fy), interpolation)); + + // copy whole gdst to make sure that + // we really use the given roi memory and not allocate a new one + gdst.copyTo(host); + host_roi = host(dst_loc); + string info = cv::format( + "fail on type %sC%d src %dx%d dst %dx%d src_roi %dx%d dst_roi %dx%d", + depthToString(depth), cn, src.cols, src.rows, dst.cols, dst.rows, + src_roi.cols, src_roi.rows, dst_roi.cols, dst_roi.rows); + EXPECT_LE(cv::norm(dst_roi, host_roi, NORM_INF), eps) << info; } } } - ///////////////////////////////////////////////////////////////////////////////////////////////// // remap @@ -689,23 +663,18 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine( Bool(), Values(1, 16))); -OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAlias, ResizeOnnx, Combine( - Values( - CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7), - CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10), - CV_32FC1, CV_32FC4, CV_32FC(11)), - Values(0.5, 0.31, 1.4), - Values(0.5, 0.73, 3.7), +OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, ResizeOnnx, Combine( + Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F), + Values(0.4, 0.27, 1.6), + Values(0.5, 0.71, 2.7), Values((int)(INTER_LINEAR), (int)(INTER_CUBIC)), Bool(), Values(1, 16))); + OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine( - Values( - CV_8UC1, CV_8SC2, CV_8UC(5), CV_8SC(7), - CV_16UC1, CV_16SC3, CV_16UC(9), CV_16SC(10), - CV_32FC1, CV_32FC4, CV_32FC(11)), - Values(0.5, 0.27, 2.6), - Values(0.5, 0.71, 4.1), + Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F), + Values(0.4, 0.27, 1.6), + Values(0.5, 0.71, 2.7), Values( (int)(INTER_ANTIALIAS | INTER_LINEAR), (int)(INTER_ANTIALIAS | INTER_CUBIC )), @@ -713,12 +682,9 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine( Values(1, 16))); OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpNearest, ResizeOnnx, Combine( - Values( - CV_8UC1, CV_8SC2, CV_8UC4, CV_8SC(7), - CV_16UC1, CV_16SC3, CV_16UC(9), CV_32SC(10), - CV_32FC1, CV_32FC4, CV_32FC(11)), - Values(0.5, 0.27, 2.6), - Values(0.5, 0.71, 4.1), + Values(CV_8S, CV_16S, CV_32F, CV_64F), + Values(0.4, 0.27, 1.6), + Values(0.5, 0.71, 2.7), Values( (int)(INTER_NEAREST | INTER_NEAREST_PREFER_FLOOR), (int)(INTER_NEAREST | INTER_NEAREST_PREFER_CEIL), diff --git a/modules/imgproc/test/test_resize_onnx.cpp b/modules/imgproc/test/test_resize_onnx.cpp index 0de233c63c..4a4a8b143c 100644 --- a/modules/imgproc/test/test_resize_onnx.cpp +++ b/modules/imgproc/test/test_resize_onnx.cpp @@ -43,10 +43,10 @@ struct ResizeOnnx Mat iS(szsrc, CV_64F, insrc.data()); Mat iR(szref, CV_64F, inref.data()); Mat S = iS, R = iR, nS, nR; - double alpha[6] = {1, 1, 5, 5, -1, -3}; - double beta[6] = {0, 7, 0, 7, +0, -7}; + double alpha[8] = {1, -1, 5, 5, 0, -3, -2, +4}; + double beta[8] = {0, -0, 0, 7, 7, -7, -6, +6}; RNG rng; - for (int cn = 1; cn <= 6; ++cn) + for (int cn = 1; cn <= 8; ++cn) { if (cn > 1) { @@ -59,7 +59,7 @@ struct ResizeOnnx { double eps = (depth <= CV_32S) ? 1.0 : 1e-3; int type = CV_MAKETYPE(depth, cn); - string errinfo = "failed on type " + typeToString(type); + string errinfo = "fail on type " + typeToString(type); Mat src, ref, dst; rand_roi(rng, src, szsrc, type); if (szdst.area()) From 47d086916929c611463148ddddc668212722d321 Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Tue, 11 Jun 2024 10:01:32 +0800 Subject: [PATCH 04/12] fix some typos --- modules/imgproc/src/opencl/resize_onnx.cl | 11 +-- modules/imgproc/src/resize.cpp | 93 +++++----------------- modules/ts/include/opencv2/ts/ocl_test.hpp | 4 +- 3 files changed, 27 insertions(+), 81 deletions(-) diff --git a/modules/imgproc/src/opencl/resize_onnx.cl b/modules/imgproc/src/opencl/resize_onnx.cl index 8b7c96cea0..611e0d6b35 100644 --- a/modules/imgproc/src/opencl/resize_onnx.cl +++ b/modules/imgproc/src/opencl/resize_onnx.cl @@ -138,16 +138,12 @@ __kernel void resizeOnnx_linear_antialias( __global uchar const* srcptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, int pixel_size, int channel, float m00, float m01, float m10, float m11, - float xscale, float yscale) + float xscale, float yscale, int xstart, int ystart, int xend, int yend) { int dx = get_global_id(0); int dy = get_global_id(1); if (dx < dst_cols && dy < dst_rows) { - int xstart = convert_int_rtn(-1.f / xscale) + 1; - int xend = 2 - xstart; - int ystart = convert_int_rtn(-1.f / yscale) + 1; - int yend = 2 - ystart; float fx = fma(dx, m00, m01), fy = fma(dy, m10, m11); int ix = convert_int_rtn(fx), iy = convert_int_rtn(fy); float rx = fx - ix, ry = fy - iy; @@ -307,14 +303,13 @@ __kernel void resizeOnnx_cubic( __kernel void resizeOnnx_table( __global uchar const* srcptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, - int pixel_size, int channel, int xkanti, int ykanti, __global int const* table) + int pixel_size, int channel, int xkanti, int ykanti, int xstride, int ystride, + __global int const* table) { int dx = get_global_id(0); int dy = get_global_id(1); if (dx < dst_cols && dy < dst_rows) { - int xstride = xkanti * dst_cols; - int ystride = ykanti * dst_rows; __global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); __global int const* xoffset = table; __global int const* yoffset = xoffset + xstride; diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index c763379bb9..5d8fb0d30e 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3735,7 +3735,7 @@ public: else { CV_Check(sizeof(AT), (std::is_same::value), - "when use float coeffs, AT is expected to be short"); + "when use float coeffs, AT is expected to be float"); CV_Check(sizeof(T) * 10 + sizeof(WT), (std::is_same::value && (std::is_same::value || std::is_same::value @@ -3751,11 +3751,11 @@ public: else { CV_Check(ctrl.is_double, (std::is_same::value), - "when use double coeffs, AT is expected to be double"); + "when use float coeffs, IdxT is expected to be float"); } CV_Check(sizeof(IdxT) * 10 + sizeof(WT), (std::is_same::type>::value), - "something wrong"); + "we need that IdxT is same or more accurate than WT"); } void horiAntialiasAccumulate(T const* S, IdxT* L) const @@ -3851,9 +3851,8 @@ public: int xmin = ctrl.xmin * cn; int xmax = ctrl.xmax * cn; // just call hresize - hresize(srcptr, dstptr, count, - ctrl.xofs, reinterpret_cast(ctrl.xcoeffs), - ssize, dsize, cn, xmin, xmax); + hresize(srcptr, dstptr, count, ctrl.xofs, + reinterpret_cast(ctrl.xcoeffs), ssize, dsize, cn, xmin, xmax); } void vertAntialias(Range const& range) const @@ -3870,9 +3869,9 @@ public: memset(A, 0, dwidth * sizeof(IdxT)); for (int t = tstart; t < tend; ++t) { + CV_DbgCheckEQ(dy, ctrl.ytab[t].di, "something wrong"); IdxT beta; int sy = ctrl.ytab[t].si; - CV_CheckEQ(dy, ctrl.ytab[t].di, "something wrong"); ctrl.ytab[t].as(beta); T const* S = src.template ptr(sy); if (ctrl.xkanti) @@ -3931,7 +3930,7 @@ public: } } // remember the first row that needs to be computed - if( k1 == ksize ) + if (k1 == ksize) k0 = min(k0, k); srows[k] = src.template ptr(sy); prev_sy[k] = sy; @@ -4363,6 +4362,11 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, int nearest = interpolation & INTER_NEAREST_MODE_MASK; int antialias = interpolation & INTER_ANTIALIAS_MASK; Point2f scale = static_cast(scaled); + int khalf = (sampler == INTER_LINEAR ? 2 : 4) / 2; + float xscale = min(scale.x, 1.f), yscale = min(scale.y, 1.f); + int xstart = cvFloor(-khalf / xscale) + 1, xend = 2 - xstart; + int ystart = cvFloor(-khalf / yscale) + 1, yend = 2 - ystart; + ocl::Kernel k; UMat src = _src.getUMat(), dst = _dst.getUMat(); size_t globalsize[] = {static_cast(dst.cols), static_cast(dst.rows)}; @@ -4452,7 +4456,7 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), pixel_size, cn, M(0, 0), M(0, 1), M(1, 0), M(1, 1), - min(scale.x, 1.f), min(scale.y, 1.f)); + xscale, yscale, xstart, ystart, xend, yend); } else if (sampler == INTER_CUBIC && !antialias) { @@ -4480,12 +4484,8 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, } else if (sampler == INTER_CUBIC && antialias) { - int ksize = 4; - int khalf = ksize / 2; - int xkanti = 2 * cvCeil(khalf / min(scale.x, 1.f)); - int ykanti = 2 * cvCeil(khalf / min(scale.y, 1.f)); - int xstride = xkanti * dst.cols; - int ystride = ykanti * dst.rows; + int xkanti = xend - xstart, xstride = xkanti * dst.cols; + int ykanti = yend - ystart, ystride = ykanti * dst.rows; int tabsize = (xstride + ystride) * 2; AutoBuffer table(tabsize); int* xoffset = table.data(); @@ -4518,7 +4518,8 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, if (k.empty()) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), - pixel_size, cn, xkanti, ykanti, ocl::KernelArg::PtrReadOnly(utable)); + pixel_size, cn, xkanti, ykanti, xstride, ystride, + ocl::KernelArg::PtrReadOnly(utable)); } else CV_Error(cv::Error::StsError, "should not got here"); @@ -5197,7 +5198,7 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, coordinate == INTER_ASYMMETRIC || coordinate == INTER_TF_CROP_RESIZE); - // affine transformation matrix: x' = ax + b + // x_org = x * a + b Matx22f M; Vec2f xcoef = interCoordinate( coordinate, dsize.width, ssize.width, scale.x, roi.x, roi.x + roi.width); @@ -5237,7 +5238,7 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, return; } - static ResizeOnnxFunc linear_tab[] = + static ResizeOnnxFunc linear_tab[CV_DEPTH_MAX] = { resizeOnnx_< HResizeLinear, @@ -5272,7 +5273,7 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, nullptr }; - static ResizeOnnxFunc cubic_tab[] = + static ResizeOnnxFunc cubic_tab[CV_DEPTH_MAX] = { resizeOnnx_< HResizeCubic, @@ -5307,58 +5308,8 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, nullptr }; -#if 0 - static ResizeAreaFastFunc areafast_tab[] = - { - resizeAreaFast_ >, - resizeAreaFast_ >, - resizeAreaFast_ >, - resizeAreaFast_ >, - resizeAreaFast_ >, - resizeAreaFast_, - resizeAreaFast_ >, - nullptr - }; - - // check if can use area fast - Point2d inv_scale(1.0 / scale.x, 1.0 / scale.y); - bool areafast_scale = fabs(inv_scale.y - 2.0) + fabs(inv_scale.x - 2.0) <= DBL_EPSILON; - bool areafast_size = (fabs(ssize.height - dsize.height * inv_scale.y) <= DBL_EPSILON) - && (fabs(ssize.width - dsize.width * inv_scale.x) <= DBL_EPSILON); - bool areafast_coordiante = (coordinate == INTER_HALF_PIXEL) - || (coordinate == INTER_HALF_PIXEL_SYMMETRIC) - || (coordinate == INTER_HALF_PIXEL_PYTORCH && min(dsize.height, dsize.width) > 1); - bool areafast_sampler = (sampler == INTER_LINEAR) && !(interpolation & INTER_ANTIALIAS_MASK); - if (areafast_scale && areafast_size && areafast_coordiante && areafast_sampler) - { - int iiy = static_cast(inv_scale.y); - int iix = static_cast(inv_scale.x); - int area = iiy * iix; - int srcstep = static_cast(src.step1()); - AutoBuffer _ofs(area + dsize.width * cn); - int* ofs = _ofs.data(); - int* xofs = ofs + area; - ResizeAreaFastFunc func = areafast_tab[depth]; - CV_Check(0, func, "empty implementation in area fast"); - // offsets of a pixel's sources to its left-top - for (int sy = 0, k = 0; sy < iiy; ++sy) - for (int sx = 0; sx < iix; ++sx) - ofs[k++] = sy * srcstep + sx * cn; - // left-top offsets of all pixels on a row - for (int dx = 0; dx < dsize.width; ++dx) - { - int j = dx * cn; - int sx = iix * j; - for(int k = 0; k < cn; k++ ) - xofs[j + k] = sx + k; - } - func(src, dst, ofs, xofs, iix, iiy); - return; - } -#endif - - int depth = src.depth(); - ResizeOnnxCtrl ctrl(interpolation, src.type(), cubicCoeff, ssize, dsize, scale, M); + int depth = src.depth(), type = src.type(); + ResizeOnnxCtrl ctrl(interpolation, type, cubicCoeff, ssize, dsize, scale, M); ResizeOnnxFunc func = linear_tab[depth]; if (sampler == INTER_LINEAR) func = linear_tab[depth]; diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index ed456385b9..717eb7b14c 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -134,11 +134,11 @@ do \ mask(cv::Rect(1, 1, mask.cols - 2, mask.rows - 2)).setTo(0); \ cv::threshold(diff, binary, (double)eps, 255, cv::THRESH_BINARY); \ EXPECT_LE(countNonZero(binary.reshape(1)), (int)(binary.cols*binary.rows*5/1000)) \ - << "Size: " << name ## _roi.size() << ", NormInf: " << cv::norm(diff, NORM_INF) << std::endl; \ + << "Size: " << name ## _roi.size() << std::endl; \ binary.convertTo(binary_8, mask.type()); \ binary_8 = binary_8 & mask; \ EXPECT_LE(countNonZero(binary_8.reshape(1)), (int)((binary_8.cols+binary_8.rows)/100)) \ - << "Size: " << name ## _roi.size() << ", NormInf: " << cv::norm(diff, NORM_INF) << std::endl; \ + << "Size: " << name ## _roi.size() << std::endl; \ } while ((void)0, 0) #define OCL_EXPECT_MATS_NEAR(name, eps) \ From 971df57c41a29dbe8fd9b92f9632123de6d2d3ae Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Sat, 15 Jun 2024 18:02:02 +0800 Subject: [PATCH 05/12] use simd for vertical antialias - but no significant improvement - add perf_test for cpu / ocl --- modules/imgproc/perf/opencl/perf_imgwarp.cpp | 23 +++++ modules/imgproc/perf/perf_resize.cpp | 27 ++++++ modules/imgproc/src/resize.cpp | 95 ++++++++++++++------ modules/imgproc/test/test_resize_onnx.cpp | 20 ++--- 4 files changed, 126 insertions(+), 39 deletions(-) diff --git a/modules/imgproc/perf/opencl/perf_imgwarp.cpp b/modules/imgproc/perf/opencl/perf_imgwarp.cpp index d13b54bdce..477e17d085 100644 --- a/modules/imgproc/perf/opencl/perf_imgwarp.cpp +++ b/modules/imgproc/perf/opencl/perf_imgwarp.cpp @@ -192,6 +192,29 @@ OCL_PERF_TEST_P(ResizeLinearExactFixture, Resize, SANITY_CHECK(dst, eps); } +typedef tuple ResizeOnnxParams; +typedef TestBaseWithParam ResizeOnnxFixture; + +OCL_PERF_TEST_P(ResizeOnnxFixture, ResizeAntialias, + Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134, Values(0.3, 0.5, 0.6))) +{ + const ResizeOnnxParams params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + double scale = get<2>(params); + const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale)); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + checkDeviceMaxMemoryAllocSize(dstSize, type); + + UMat src(srcSize, type), dst(dstSize, type); + declare.in(src, WARMUP_RNG).out(dst); + + OCL_TEST_CYCLE() cv::resizeOnnx(src, dst, dstSize, Point2d(), INTER_LINEAR | INTER_ANTIALIAS); + + SANITY_CHECK_NOTHING(); +} + ///////////// Remap //////////////////////// typedef tuple RemapParams; diff --git a/modules/imgproc/perf/perf_resize.cpp b/modules/imgproc/perf/perf_resize.cpp index 0f470a5f81..22bf75434d 100644 --- a/modules/imgproc/perf/perf_resize.cpp +++ b/modules/imgproc/perf/perf_resize.cpp @@ -280,4 +280,31 @@ PERF_TEST_P(MatInfo_Size_Scale_NN, ResizeNNExact, SANITY_CHECK_NOTHING(); } +// The complexity of non-antialias and NN resize is same as that in cv::resize +// The complexity of antialias resize is ralated to dst size and ceil(1.0 / scale) +PERF_TEST_P(MatInfo_Size_Size, ResizeOnnxDownLinearAntialias, Combine( + Values(CV_8UC1, CV_16UC1, CV_16UC4, CV_32SC1, CV_32FC1, CV_32FC3), + Values(sz1440p), + Values(szVGA, szqHD, sz720p, sz1080p) +)) +{ + int matType = get<0>(GetParam()); + Size from = get<1>(GetParam()); + Size to = get<2>(GetParam()); + + cv::Mat src(from, matType), dst(to, matType); + switch(src.depth()) + { + case CV_8U: cvtest::fillGradient(src); break; + case CV_16U: fillFPGradient(src); break; + case CV_32S: fillFPGradient(src); break; + case CV_32F: fillFPGradient(src); break; + } + declare.in(src).out(dst); + + TEST_CYCLE_MULTIRUN(10) resizeOnnx(src, dst, to, Point2d(), INTER_LINEAR | INTER_ANTIALIAS); + + SANITY_CHECK_NOTHING(); +} + } // namespace diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 5d8fb0d30e..e3f3067117 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3205,6 +3205,14 @@ inline void saturate_store(const float* src, uchar* dst) { v_store(dst, v_pack(v_pack_u(tmp0, tmp1), v_pack_u(tmp2, tmp3))); } +inline void saturate_store(const float* src, schar* dst) { + const v_int32 tmp0 = v_round(vx_load(src + 0 * VTraits::vlanes())); + const v_int32 tmp1 = v_round(vx_load(src + 1 * VTraits::vlanes())); + const v_int32 tmp2 = v_round(vx_load(src + 2 * VTraits::vlanes())); + const v_int32 tmp3 = v_round(vx_load(src + 3 * VTraits::vlanes())); + v_store(dst, v_pack(v_pack(tmp0, tmp1), v_pack(tmp2, tmp3))); +} + inline void saturate_store(const float* src, ushort* dst) { const v_int32 tmp0 = v_round(vx_load(src + 0 * VTraits::vlanes())); const v_int32 tmp1 = v_round(vx_load(src + 1 * VTraits::vlanes())); @@ -3236,6 +3244,18 @@ struct VArea { typedef v_float64 vWT; }; +inline void saturate_store(const double* sum, int width, int* D) { + const int step = VTraits::vlanes() * sizeof(double) / sizeof(int); + int dx = 0, limit = width - step; + for (; dx <= limit; dx += step) + { + v_store(D + dx, v_round( + vx_load(sum + dx + 0 * VTraits::vlanes()), + vx_load(sum + dx + 1 * VTraits::vlanes()))); + } + for (; dx < width; ++dx) + D[dx] = saturate_cast(sum[dx]); +} #else inline void mul(const double* buf, int width, double beta, double* sum) { for (int dx = 0; dx < width; ++dx) { @@ -3248,6 +3268,11 @@ inline void muladd(const double* buf, int width, double beta, double* sum) { sum[dx] += beta * buf[dx]; } } + +inline void saturate_store(const double* sum, int width, int* D) { + for (int dx = 0; dx < width; ++dx) + D[dx] = saturate_cast(sum[dx]); +} #endif template @@ -3275,8 +3300,10 @@ inline void mul(const WT* buf, int width, WT beta, WT* sum) { int dx = 0; #if (CV_SIMD || CV_SIMD_SCALABLE) const int step = VTraits::vWT>::vlanes(); - for (; dx + step < width; dx += step) { - vx_store(sum + dx, v_mul(vx_setall(beta), vx_load(buf + dx))); + const typename VArea::vWT vbeta = vx_setall(beta); + int limit = width - step; + for (; dx <= limit; dx += step) { + vx_store(sum + dx, v_mul(vbeta, vx_load(buf + dx))); } #endif for (; dx < width; ++dx) { @@ -3289,8 +3316,10 @@ inline void muladd(const WT* buf, int width, WT beta, WT* sum) { int dx = 0; #if (CV_SIMD || CV_SIMD_SCALABLE) const int step = VTraits::vWT>::vlanes(); - for (; dx + step < width; dx += step) { - vx_store(sum + dx, v_add(vx_load(sum + dx), v_mul(vx_setall(beta), vx_load(buf + dx)))); + const typename VArea::vWT vbeta = vx_setall(beta); + int limit = width - step; + for (; dx <= limit; dx += step) { + vx_store(sum + dx, v_add(vx_load(sum + dx), v_mul(vbeta, vx_load(buf + dx)))); } #endif for (; dx < width; ++dx) { @@ -3721,7 +3750,7 @@ public: CV_Check(sizeof(T) * 10 + sizeof(WT), (std::is_same::value && (std::is_same::value || std::is_same::value)), - "something wrong"); + "fixpt works when T is uchar or schar"); } else if (ctrl.is_double) { @@ -3730,7 +3759,7 @@ public: CV_Check(sizeof(T) * 10 + sizeof(WT), (std::is_same::value && (std::is_same::value || std::is_same::value)), - "something wrong"); + "double WT works when T is int or double"); } else { @@ -3740,13 +3769,13 @@ public: (std::is_same::value && (std::is_same::value || std::is_same::value || std::is_same::value)), - "something wrong"); + "float WT works for other types"); } // check antialias resize if (ctrl.is_double) { CV_Check(ctrl.is_double, (std::is_same::value), - "when use double coeffs, AT is expected to be double"); + "when use double coeffs, IdxT is expected to be double"); } else { @@ -3827,18 +3856,21 @@ public: horiAntialiasAccumulate(S, L); if (!same_wt_idxt) { - WT* D = dstptr[i]; - if (ctrl.is_fixpt) - { - float const alpha = INTER_RESIZE_COEF_SCALE; - for (int k = 0; k < dwidth; ++k) - D[k] = saturate_cast(L[k] * alpha); - } - else - { - for (int k = 0; k < dwidth; ++k) - D[k] = saturate_cast(L[k]); - } + // only when is_fixpt, wt (int) and idxt (float) can be different + CV_Check(ctrl.is_fixpt, ctrl.is_fixpt && (std::is_same::value) + && (std::is_same::value), ""); + float* Lf = reinterpret_cast(L); + int* D = reinterpret_cast(dstptr[i]); + float const alpha = INTER_RESIZE_COEF_SCALE; + int k = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 valpha = vx_setall_f32(alpha); + int limit = dwidth - VTraits::vlanes(); + for (; k <= limit; k += VTraits::vlanes()) + v_store(D + k, v_round(v_mul(vx_load(Lf + k), valpha))); +#endif + for (; k < dwidth; ++k) + D[k] = cvRound(Lf[k] * alpha); } } } @@ -3866,7 +3898,6 @@ public: for (int dy = range.start; dy < range.end; ++dy) { int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti; - memset(A, 0, dwidth * sizeof(IdxT)); for (int t = tstart; t < tend; ++t) { CV_DbgCheckEQ(dy, ctrl.ytab[t].di, "something wrong"); @@ -3878,21 +3909,28 @@ public: { memset(L, 0, dwidth * sizeof(IdxT)); horiAntialiasAccumulate(S, L); - for (int w = 0; w < dwidth; ++w) - A[w] += L[w] * beta; + if (t == tstart) + inter_area::mul(L, dwidth, beta, A); + else + inter_area::muladd(L, dwidth, beta, A); } else { + // A & Lw maybe different type, can not use inter_area + // A double : Lw double + // A float : Lw float / int horiGenericLines(&S, &Lw, 1); if (ctrl.is_fixpt) beta /= INTER_RESIZE_COEF_SCALE; - for (int w = 0; w < dwidth; ++w) - A[w] += Lw[w] * beta; + if (t == tstart) + for (int w = 0; w < dwidth; ++w) + A[w] = saturate_cast(Lw[w] * beta); + else + for (int w = 0; w < dwidth; ++w) + A[w] += Lw[w] * beta; } } - T* D = dst.template ptr(dy); - for (int w = 0; w < dwidth; ++w) - D[w] = saturate_cast(A[w]); + inter_area::saturate_store(A, dwidth, dst.template ptr(dy)); } } @@ -5317,7 +5355,6 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, func = cubic_tab[depth]; else CV_Error(CV_StsBadArg, format("Unknown sampler %d", sampler)); - CV_Check(0, func, "empty implementation in area fast"); func(src, dst, ctrl); } diff --git a/modules/imgproc/test/test_resize_onnx.cpp b/modules/imgproc/test/test_resize_onnx.cpp index 4a4a8b143c..6b8c4fc3de 100644 --- a/modules/imgproc/test/test_resize_onnx.cpp +++ b/modules/imgproc/test/test_resize_onnx.cpp @@ -45,7 +45,7 @@ struct ResizeOnnx Mat S = iS, R = iR, nS, nR; double alpha[8] = {1, -1, 5, 5, 0, -3, -2, +4}; double beta[8] = {0, -0, 0, 7, 7, -7, -6, +6}; - RNG rng; + RNG& rng = TS::ptr()->get_rng(); for (int cn = 1; cn <= 8; ++cn) { if (cn > 1) @@ -206,9 +206,9 @@ TEST(ResizeOnnx, downsample_sizes_cubic_antialias) -0.75f, Rect2d(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { - 1.7750092, 3.1200073, 4.4650054, - 7.1550016, 8.5 , 9.844998 , - 12.534994, 13.8799925, 15.224991 , + 1.7750092, 3.1200073, 4.4650054, + 7.1550016, 8.5 , 9.844998 , + 12.534994 , 13.8799925, 15.224991 , } }.run(); } @@ -236,9 +236,9 @@ TEST(ResizeOnnx, downsample_sizes_linear_pytorch_half_pixel) -0.75f, Rect2d(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { - 1.6666666, - 7.0 , - 12.333333, + 1.6666666, + 7.0 , + 12.333333 , } }.run(); } @@ -263,9 +263,9 @@ TEST(ResizeOnnx, tf_crop_and_resize) -0.75f, Rect2d(0.6, 0.4, 0.2, 0.2), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { - 7.6000004, 7.9, 8.2 , - 8.8 , 9.1, 9.400001, - 10.0 , 10.3, 10.6 , + 7.6000004, 7.9, 8.2 , + 8.8 , 9.1, 9.400001, + 10.0 , 10.3, 10.6 , } }.run(); } From 6a795ee253d75169daa9b4d68665b3ece7cfcda7 Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Sat, 15 Jun 2024 22:00:03 +0800 Subject: [PATCH 06/12] reduce the number of resizeOnnx perf - because it takes too long --- modules/imgproc/perf/opencl/perf_imgwarp.cpp | 2 +- modules/imgproc/perf/perf_resize.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/imgproc/perf/opencl/perf_imgwarp.cpp b/modules/imgproc/perf/opencl/perf_imgwarp.cpp index 477e17d085..97a35e8bd2 100644 --- a/modules/imgproc/perf/opencl/perf_imgwarp.cpp +++ b/modules/imgproc/perf/opencl/perf_imgwarp.cpp @@ -195,7 +195,7 @@ OCL_PERF_TEST_P(ResizeLinearExactFixture, Resize, typedef tuple ResizeOnnxParams; typedef TestBaseWithParam ResizeOnnxFixture; -OCL_PERF_TEST_P(ResizeOnnxFixture, ResizeAntialias, +OCL_PERF_TEST_P(ResizeOnnxFixture, LinearAntialias, Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134, Values(0.3, 0.5, 0.6))) { const ResizeOnnxParams params = GetParam(); diff --git a/modules/imgproc/perf/perf_resize.cpp b/modules/imgproc/perf/perf_resize.cpp index 22bf75434d..39c1311e78 100644 --- a/modules/imgproc/perf/perf_resize.cpp +++ b/modules/imgproc/perf/perf_resize.cpp @@ -282,10 +282,10 @@ PERF_TEST_P(MatInfo_Size_Scale_NN, ResizeNNExact, // The complexity of non-antialias and NN resize is same as that in cv::resize // The complexity of antialias resize is ralated to dst size and ceil(1.0 / scale) -PERF_TEST_P(MatInfo_Size_Size, ResizeOnnxDownLinearAntialias, Combine( - Values(CV_8UC1, CV_16UC1, CV_16UC4, CV_32SC1, CV_32FC1, CV_32FC3), - Values(sz1440p), - Values(szVGA, szqHD, sz720p, sz1080p) +PERF_TEST_P(MatInfo_Size_Size, ResizeOnnxLinearAntialias, Combine( + Values(CV_8UC1, CV_8UC3, CV_8UC4), + Values(sz1080p), + Values(szVGA, szqHD, sz720p) )) { int matType = get<0>(GetParam()); From c7815826c4c1f389995b4a23bbcc135a557319b9 Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Mon, 17 Jun 2024 00:44:27 +0800 Subject: [PATCH 07/12] optimize antialias row compute - cache the src-row results within adjacent dst-row --- modules/imgproc/perf/perf_resize.cpp | 8 +-- modules/imgproc/src/resize.cpp | 92 ++++++++++++++++++++-------- 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/modules/imgproc/perf/perf_resize.cpp b/modules/imgproc/perf/perf_resize.cpp index 39c1311e78..ead4696c22 100644 --- a/modules/imgproc/perf/perf_resize.cpp +++ b/modules/imgproc/perf/perf_resize.cpp @@ -32,7 +32,7 @@ static void fillFPGradient(Mat& img) } } } - +#if 0 PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear, testing::Values( MatInfo_Size_Size_t(CV_8UC1, szVGA, szqHD), @@ -279,13 +279,13 @@ PERF_TEST_P(MatInfo_Size_Scale_NN, ResizeNNExact, EXPECT_GT(countNonZero(dst.reshape(1)), 0); SANITY_CHECK_NOTHING(); } - +#endif // The complexity of non-antialias and NN resize is same as that in cv::resize // The complexity of antialias resize is ralated to dst size and ceil(1.0 / scale) PERF_TEST_P(MatInfo_Size_Size, ResizeOnnxLinearAntialias, Combine( Values(CV_8UC1, CV_8UC3, CV_8UC4), - Values(sz1080p), - Values(szVGA, szqHD, sz720p) + Values(sz1440p), + Values(szVGA, szqHD, sz720p, sz1080p) )) { int matType = get<0>(GetParam()); diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index e3f3067117..50a2d72d1a 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3482,6 +3482,7 @@ public: /* resize parameter */ bool is_fixpt, is_double; int ksize, xkanti, ykanti; + Point2f scalef; /* for antialias resize */ TabIdx* xtab; @@ -3575,13 +3576,13 @@ private: { int sampler = interpolation & INTER_SAMPLER_MASK; int antialias = interpolation & INTER_ANTIALIAS_MASK; - Point2f scale = static_cast(scaled); CV_CheckGE(cubicCoeff, -1.f, "cubic coefficient should range [-1, 0)"); CV_CheckLT(cubicCoeff, +0.f, "cubic coefficient should range [-1, 0)"); CV_Check(sampler, sampler == INTER_LINEAR || sampler == INTER_CUBIC, "should not error"); int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); + scalef = static_cast(scaled); ksize = (sampler == INTER_LINEAR ? 2 : 4); is_double = (depth == CV_64F); is_fixpt = (depth == CV_8U || depth == CV_8S); @@ -3590,8 +3591,8 @@ private: xofs = yofs = nullptr; xcoeffs = ycoeffs = nullptr; int khalf = ksize / 2; - xkanti = 2 * cvCeil(khalf / min(scale.x, 1.f)); - ykanti = 2 * cvCeil(khalf / min(scale.y, 1.f)); + xkanti = 2 * cvCeil(khalf / min(scalef.x, 1.f)); + ykanti = 2 * cvCeil(khalf / min(scalef.y, 1.f)); area.allocate(xtab, xkanti * dsize.width ); area.allocate(ytab, ykanti * dsize.height); area.allocate(xofs, dsize.width * cn + 1); @@ -3609,9 +3610,9 @@ private: { float f = fmaf(static_cast(d), a, b); if (sampler == INTER_LINEAR) - linearCoeffsAntialias(d, cn, f, scale.x, ssize.width, xtab + d * xkanti); + linearCoeffsAntialias(d, cn, f, scalef.x, ssize.width, xtab + d * xkanti); else // if (sampler == INTER_CUBIC) - cubicCoeffsAntiAlias(d, cn, f, scale.x, ssize.width, cubicCoeff, xtab + d * xkanti); + cubicCoeffsAntiAlias(d, cn, f, scalef.x, ssize.width, cubicCoeff, xtab + d * xkanti); } } else @@ -3677,9 +3678,9 @@ private: { float f = fmaf(static_cast(d), a, b); if (sampler == INTER_LINEAR) - linearCoeffsAntialias(d, 1, f, scale.y, ssize.height, ytab + d * ykanti); + linearCoeffsAntialias(d, 1, f, scalef.y, ssize.height, ytab + d * ykanti); else // if (sampler == INTER_CUBIC) - cubicCoeffsAntiAlias(d, 1, f, scale.y, ssize.height, cubicCoeff, ytab + d * ykanti); + cubicCoeffsAntiAlias(d, 1, f, scalef.y, ssize.height, cubicCoeff, ytab + d * ykanti); } } else @@ -3857,7 +3858,8 @@ public: if (!same_wt_idxt) { // only when is_fixpt, wt (int) and idxt (float) can be different - CV_Check(ctrl.is_fixpt, ctrl.is_fixpt && (std::is_same::value) + CV_DbgCheck(ctrl.is_fixpt, ctrl.is_fixpt + && (std::is_same::value) && (std::is_same::value), ""); float* Lf = reinterpret_cast(L); int* D = reinterpret_cast(dstptr[i]); @@ -3891,44 +3893,84 @@ public: { int cn = dst.channels(); int dwidth = dst.cols * cn; - AutoBuffer line(dwidth * 2); - IdxT* L = line.data(); - IdxT* A = line.data() + dwidth; - WT* Lw = reinterpret_cast(L); + // the sample lines on src of the i-th row (i + 1)-th dst-row + // will overlap at most these src-rows + int bufrow = ctrl.ykanti - cvFloor(1.f / ctrl.scalef.y); + Mat buffer(bufrow + 2, dwidth, DataType::depth); + AutoBuffer line((bufrow + 1) * 2); + IdxT* A = buffer.template ptr(bufrow + 1); + int* ysrc = reinterpret_cast(line.data() + bufrow + 1); + size_t szcopy = (ctrl.xkanti ? sizeof(WT) : sizeof(IdxT)) * dwidth; + for (int i = 0; i <= bufrow; ++i) + { + line[i] = buffer.template ptr(i); + ysrc[i] = -1; + } for (int dy = range.start; dy < range.end; ++dy) { - int tstart = dy * ctrl.ykanti, tend = tstart + ctrl.ykanti; - for (int t = tstart; t < tend; ++t) + int tidx = dy * ctrl.ykanti; + for (int t = 0; t < ctrl.ykanti; ++t, ++tidx) { - CV_DbgCheckEQ(dy, ctrl.ytab[t].di, "something wrong"); + CV_DbgCheckEQ(dy, ctrl.ytab[tidx].di, "something wrong"); IdxT beta; - int sy = ctrl.ytab[t].si; - ctrl.ytab[t].as(beta); + ctrl.ytab[tidx].as(beta); + int sy = ctrl.ytab[tidx].si; T const* S = src.template ptr(sy); + // if the sy-th row has been computed already, reuse it. + int y0 = -1; + IdxT* L = line[bufrow]; + for (int i = 0; i < bufrow; ++i) + if (ysrc[i] == sy) + { + y0 = i; + break; + } + // have found, reuse it + if (y0 != -1) + L = line[y0]; + else + { + // not found, compute it + if (ctrl.xkanti) + { + memset(L, 0, dwidth * sizeof(IdxT)); + horiAntialiasAccumulate(S, L); + } + else + { + // A & Lw maybe different type, can not use inter_area + // A double : Lw double + // A float : Lw float / int + WT* Lw = reinterpret_cast(L); + horiGenericLines(&S, &Lw, 1); + } + } if (ctrl.xkanti) { - memset(L, 0, dwidth * sizeof(IdxT)); - horiAntialiasAccumulate(S, L); - if (t == tstart) + if (t == 0) inter_area::mul(L, dwidth, beta, A); else inter_area::muladd(L, dwidth, beta, A); } else { - // A & Lw maybe different type, can not use inter_area - // A double : Lw double - // A float : Lw float / int - horiGenericLines(&S, &Lw, 1); + WT* Lw = reinterpret_cast(L); if (ctrl.is_fixpt) beta /= INTER_RESIZE_COEF_SCALE; - if (t == tstart) + if (t == 0) for (int w = 0; w < dwidth; ++w) A[w] = saturate_cast(Lw[w] * beta); else for (int w = 0; w < dwidth; ++w) A[w] += Lw[w] * beta; } + // backup the last bufrow results + y0 = bufrow - (ctrl.ykanti - t); + if (y0 >= 0 && ysrc[y0] != sy /* line[y0] != L */) + { + ysrc[y0] = sy; + memcpy(line[y0], L, szcopy); + } } inter_area::saturate_store(A, dwidth, dst.template ptr(dy)); } From 734dbbd984059956c690dc70282c04499062ae90 Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Wed, 19 Jun 2024 15:35:50 +0800 Subject: [PATCH 08/12] avoid the copy between cache lines in vertical antialias - but not see significant improvement --- modules/imgproc/perf/perf_resize.cpp | 14 ++---- modules/imgproc/src/resize.cpp | 66 +++++++++++++--------------- 2 files changed, 34 insertions(+), 46 deletions(-) diff --git a/modules/imgproc/perf/perf_resize.cpp b/modules/imgproc/perf/perf_resize.cpp index 355a0482b6..1c133d7b88 100644 --- a/modules/imgproc/perf/perf_resize.cpp +++ b/modules/imgproc/perf/perf_resize.cpp @@ -280,8 +280,6 @@ PERF_TEST_P(MatInfo_Size_Scale_NN, ResizeNNExact, SANITY_CHECK_NOTHING(); } -// The complexity of non-antialias and NN resize is same as that in cv::resize -// The complexity of antialias resize is ralated to dst size and ceil(1.0 / scale) PERF_TEST_P(MatInfo_Size_Size, ResizeOnnxLinearAntialias, Combine( Values(CV_8UC1, CV_8UC3, CV_8UC4), Values(sz1440p), @@ -293,18 +291,12 @@ PERF_TEST_P(MatInfo_Size_Size, ResizeOnnxLinearAntialias, Combine( Size to = get<2>(GetParam()); cv::Mat src(from, matType), dst(to, matType); - switch(src.depth()) - { - case CV_8U: cvtest::fillGradient(src); break; - case CV_16U: fillFPGradient(src); break; - case CV_32S: fillFPGradient(src); break; - case CV_32F: fillFPGradient(src); break; - } declare.in(src).out(dst); + declare.time(100); - TEST_CYCLE_MULTIRUN(10) resizeOnnx(src, dst, to, Point2d(), INTER_LINEAR | INTER_ANTIALIAS); + TEST_CYCLE() resizeOnnx(src, dst, to, Point2d(), INTER_LINEAR | INTER_ANTIALIAS); SANITY_CHECK_NOTHING(); } -} // namespace +} diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 3632d40aaa..ffc28da20d 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3740,6 +3740,10 @@ public: ResizeOnnxInvoker(const Mat& _src, Mat& _dst, ResizeOnnxCtrl const& _ctrl) : src(_src), dst(_dst), ctrl(_ctrl) { + static_assert(sizeof(WT) == sizeof(IdxT), "expected"); + static_assert(std::is_same::type>::value, + "IdxT double : WT double | IdxT float : WT float / int"); + CV_CheckLE(ctrl.ksize, MAX_ESIZE, "resampler kernel's size is too larger"); CV_Check(ctrl.is_fixpt, !(ctrl.is_fixpt && ctrl.is_double), "can not be both types"); // prefer static_assert, but how ? @@ -3783,9 +3787,6 @@ public: CV_Check(ctrl.is_double, (std::is_same::value), "when use float coeffs, IdxT is expected to be float"); } - CV_Check(sizeof(IdxT) * 10 + sizeof(WT), - (std::is_same::type>::value), - "we need that IdxT is same or more accurate than WT"); } void horiAntialiasAccumulate(T const* S, IdxT* L) const @@ -3894,18 +3895,14 @@ public: int cn = dst.channels(); int dwidth = dst.cols * cn; // the sample lines on src of the i-th and (i + 1)-th dst-row - // will overlap at most these src-rows - int bufrow = ctrl.ykanti - cvFloor(1.f / ctrl.scalef.y); - Mat buffer(bufrow + 2, dwidth, DataType::depth); - AutoBuffer line((bufrow + 1) * 2); - IdxT* A = buffer.template ptr(bufrow + 1); - int* ysrc = reinterpret_cast(line.data() + bufrow + 1); - size_t szcopy = (ctrl.xkanti ? sizeof(WT) : sizeof(IdxT)) * dwidth; - for (int i = 0; i <= bufrow; ++i) - { - line[i] = buffer.template ptr(i); + // will overlap at most bufrow src-rows + int bstart = 0, bufrow = ctrl.ykanti - cvFloor(1.f / ctrl.scalef.y); + // a ring buffer, have bufrow lines, begin with bstart + Mat buffer(bufrow + 1, dwidth * sizeof(IdxT), CV_8U); + AutoBuffer ysrc(bufrow); + IdxT* A = buffer.template ptr(bufrow); + for (int i = 0; i < bufrow; ++i) ysrc[i] = -1; - } for (int dy = range.start; dy < range.end; ++dy) { int tidx = dy * ctrl.ykanti; @@ -3915,25 +3912,24 @@ public: IdxT beta; ctrl.ytab[tidx].as(beta); int sy = ctrl.ytab[tidx].si; - T const* S = src.template ptr(sy); + IdxT* L = nullptr; // if the sy-th row has been computed already, reuse it. - int y0 = -1; - IdxT* L = line[bufrow]; for (int i = 0; i < bufrow; ++i) if (ysrc[i] == sy) { - y0 = i; + L = buffer.template ptr(i); break; } - // have found, reuse it - if (y0 != -1) - L = line[y0]; - else + // else, compute and save to the buffer line with the minimum ysrc + if (!L) { - // not found, compute it + T const* S = src.template ptr(sy); + L = buffer.template ptr(bstart); + ysrc[bstart] = sy; + bstart = (bstart + 1) % bufrow; if (ctrl.xkanti) { - memset(L, 0, dwidth * sizeof(IdxT)); + memset(L, 0, buffer.cols * sizeof(uchar)); horiAntialiasAccumulate(S, L); } else @@ -3951,8 +3947,7 @@ public: } else { - // A & Lw maybe different type, can not use inter_area - // A double : Lw double | A float : Lw float / int + // A & Lw (IdxT / WT) maybe different type, can not use inter_area WT* Lw = reinterpret_cast(L); if (ctrl.is_fixpt) beta /= INTER_RESIZE_COEF_SCALE; @@ -3963,13 +3958,6 @@ public: for (int w = 0; w < dwidth; ++w) A[w] += Lw[w] * beta; } - // backup the last bufrow results - y0 = bufrow - (ctrl.ykanti - t); - if (y0 >= 0 && ysrc[y0] != sy /* line[y0] != L */) - { - ysrc[y0] = sy; - memcpy(line[y0], L, szcopy); - } } inter_area::saturate_store(A, dwidth, dst.template ptr(dy)); } @@ -4038,9 +4026,17 @@ public: template static void resizeOnnx_(Mat const& src, Mat& dst, ResizeOnnxCtrl const& ctrl) { + /* The complexity of resize is relate to ksize and: + - non-antialias and NN: dstsize, same as that in cv::resize. + - antialias: dstsize and ceil(1.0 / scale). */ + double nstripes = static_cast(dst.rows) * dst.cols / (1 << 16); + // only parallel by rows + if (ctrl.ykanti) + nstripes *= ceil(1.0 / ctrl.scalef.y); + // do not wake too many threads, really use the cache lines + nstripes = min(nstripes, 2.0 * getNumberOfCPUs()); parallel_for_(Range(0, dst.rows), - ResizeOnnxInvoker(src, dst, ctrl), - static_cast(dst.rows) * dst.cols / (1 << 16)); + ResizeOnnxInvoker(src, dst, ctrl), nstripes); } From 7395881d2d634cae031e05c23bba2d1a8af6a00d Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Sat, 27 Jul 2024 12:35:27 +0800 Subject: [PATCH 09/12] fix warnning cbuf --- .../include/opencv2/core/cuda/vec_math.hpp | 18 +++++++++--------- modules/imgproc/src/resize.cpp | 8 +++----- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/modules/core/include/opencv2/core/cuda/vec_math.hpp b/modules/core/include/opencv2/core/cuda/vec_math.hpp index 0a1205e25b..6e5f1a3b6a 100644 --- a/modules/core/include/opencv2/core/cuda/vec_math.hpp +++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp @@ -376,22 +376,22 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) // a += b -#define CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN(op, input_type, output_type) \ +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP_ASSIGN(op, input_type, output_type) \ __device__ __forceinline__ output_type ## 1 & operator op ## = (output_type ## 1 & a, const input_type ## 1 & b) \ { \ - return a = VecTraits::make(a.x op b.x); \ + return a = VecTraits::make(a.x op b.x); \ } \ __device__ __forceinline__ output_type ## 2 & operator op ## = (output_type ## 2 & a, const input_type ## 2 & b) \ { \ - return a = VecTraits::make(a.x op b.x, a.y op b.y); \ + return a = VecTraits::make(a.x op b.x, a.y op b.y); \ } \ __device__ __forceinline__ output_type ## 3 & operator op ## = (output_type ## 3 & a, const input_type ## 3 & b) \ { \ - return a = VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ + return a = VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ } \ __device__ __forceinline__ output_type ## 4 & operator op ## = (output_type ## 4 & a, const input_type ## 4 & b) \ { \ - return a = VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ + return a = VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ } // binary operators (vec & vec) @@ -413,7 +413,7 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) { \ return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ } \ - CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN(op, input_type, output_type) + CV_CUDEV_IMPLEMENT_VEC_BINARY_OP_ASSIGN(op, input_type, output_type) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) @@ -472,8 +472,8 @@ CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) -#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN -#define CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN(op, input_type, output_type) +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP_ASSIGN +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP_ASSIGN(op, input_type, output_type) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) @@ -548,7 +548,7 @@ CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP -#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_ASSIGN +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP_ASSIGN // binary operators (vec & scalar) diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index ffc28da20d..c7fc534021 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3600,9 +3600,11 @@ private: area.allocate(xcoeffs, ksize * dsize.width * cn); area.allocate(ycoeffs, ksize * dsize.height * 1); area.commit(); + float cbuf[MAX_ESIZE] = { 0 }; CV_CheckLE(ksize, MAX_ESIZE, "resampler kernel's size is too larger"); - // when upsampling, `antialias` is same to `generic`, so use `generic` to speed up + // when upsampling, `antialias` is same as `generic` + // so use `generic` to speed up if (antialias && scaled.x < 1.0) { float a = M(0, 0), b = M(0, 1); @@ -3620,7 +3622,6 @@ private: xkanti = 0; xmin = 0; xmax = dsize.width; - float cbuf[MAX_ESIZE]; float a = M(0, 0), b = M(0, 1); for (int d = 0; d < dsize.width; ++d) { @@ -3686,7 +3687,6 @@ private: else { ykanti = 0; - float cbuf[MAX_ESIZE]; float a = M(1, 0), b = M(1, 1); for (int d = 0; d < dsize.height; ++d) { @@ -4599,8 +4599,6 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, else CV_Error(cv::Error::StsError, "should not got here"); - if (errmsg.size()) - fputs(errmsg.data(), stderr); return k.run(2, globalsize, 0, false); } From 0630a39b53c1ffbff1f6e871da3f8211e3be2bbb Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Sun, 4 Aug 2024 20:18:42 +0800 Subject: [PATCH 10/12] add exclude_outside and delete tf_crop_resize --- modules/imgproc/include/opencv2/imgproc.hpp | 75 ++++---- modules/imgproc/src/opencl/resize_onnx.cl | 83 +++++++-- modules/imgproc/src/resize.cpp | 111 ++++++++---- modules/imgproc/test/ocl/test_warp.cpp | 11 ++ modules/imgproc/test/test_resize_onnx.cpp | 191 ++++++++++++-------- 5 files changed, 312 insertions(+), 159 deletions(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index b9a1e9e54b..0ca89edf54 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -281,47 +281,51 @@ enum InterpolationFlags { //! ONNX Resize Flags enum ResizeONNXFlags { - INTER_SAMPLER_SHIFT = 0, - INTER_SAMPLER_BIT = 3, - INTER_SAMPLER_MASK = ((1 << INTER_SAMPLER_BIT) - 1) << INTER_SAMPLER_SHIFT, + INTER_SAMPLER_SHIFT = 0, + INTER_SAMPLER_BIT = 3, + INTER_SAMPLER_MASK = ((1 << INTER_SAMPLER_BIT) - 1) << INTER_SAMPLER_SHIFT, - INTER_COORDINATE_SHIFT = INTER_SAMPLER_SHIFT + INTER_SAMPLER_BIT, - INTER_COORDINATE_BIT = 3, - INTER_COORDINATE_MASK = ((1 << INTER_COORDINATE_BIT) - 1) << INTER_COORDINATE_SHIFT, + INTER_COORDINATE_SHIFT = INTER_SAMPLER_SHIFT + INTER_SAMPLER_BIT, + INTER_COORDINATE_BIT = 3, + INTER_COORDINATE_MASK = ((1 << INTER_COORDINATE_BIT) - 1) << INTER_COORDINATE_SHIFT, /** x_original = (x_resized + 0.5) / scale - 0.5 */ - INTER_HALF_PIXEL = 0 << INTER_COORDINATE_SHIFT, + INTER_HALF_PIXEL = 0 << INTER_COORDINATE_SHIFT, /** adjustment = output_width_int / output_width center = input_width / 2 offset = center * (1 - adjustment) x_ori = offset + (x + 0.5) / scale - 0.5 */ - INTER_HALF_PIXEL_SYMMETRIC = 1 << INTER_COORDINATE_SHIFT, + INTER_HALF_PIXEL_SYMMETRIC = 1 << INTER_COORDINATE_SHIFT, /** x_original = length_resized > 1 ? (x_resized + 0.5) / scale - 0.5 : 0 */ - INTER_HALF_PIXEL_PYTORCH = 2 << INTER_COORDINATE_SHIFT, + INTER_HALF_PIXEL_PYTORCH = 2 << INTER_COORDINATE_SHIFT, /** x_original = x_resized * (length_original - 1) / (length_resized - 1) */ - INTER_ALIGN_CORNERS = 3 << INTER_COORDINATE_SHIFT, + INTER_ALIGN_CORNERS = 3 << INTER_COORDINATE_SHIFT, /** x_original = x_resized / scale */ - INTER_ASYMMETRIC = 4 << INTER_COORDINATE_SHIFT, - /** x_original = length_resized > 1 - ? start_x * (length_original - 1) + x_resized * (end_x - start_x) * (length_original - 1) / (length_resized - 1) - : 0.5 * (start_x + end_x) * (length_original - 1) */ - INTER_TF_CROP_RESIZE = 5 << INTER_COORDINATE_SHIFT, + INTER_ASYMMETRIC = 4 << INTER_COORDINATE_SHIFT, - INTER_NEAREST_MODE_SHIFT = INTER_COORDINATE_SHIFT + INTER_COORDINATE_BIT, - INTER_NEAREST_MODE_BIT = 2, - INTER_NEAREST_MODE_MASK = ((1 << INTER_NEAREST_MODE_BIT) - 1) << INTER_NEAREST_MODE_SHIFT, + INTER_NEAREST_MODE_SHIFT = INTER_COORDINATE_SHIFT + INTER_COORDINATE_BIT, + INTER_NEAREST_MODE_BIT = 2, + INTER_NEAREST_MODE_MASK = ((1 << INTER_NEAREST_MODE_BIT) - 1) << INTER_NEAREST_MODE_SHIFT, /** round half down: x = ceil(x - 0.5) */ - INTER_NEAREST_PREFER_FLOOR = 0 << INTER_NEAREST_MODE_SHIFT, + INTER_NEAREST_PREFER_FLOOR = 0 << INTER_NEAREST_MODE_SHIFT, /** round half up : x = floor(x + 0.5) */ - INTER_NEAREST_PREFER_CEIL = 1 << INTER_NEAREST_MODE_SHIFT, + INTER_NEAREST_PREFER_CEIL = 1 << INTER_NEAREST_MODE_SHIFT, /** x = floor(x) */ - INTER_NEAREST_FLOOR = 2 << INTER_NEAREST_MODE_SHIFT, + INTER_NEAREST_FLOOR = 2 << INTER_NEAREST_MODE_SHIFT, /** x = ceil(x) */ - INTER_NEAREST_CEIL = 3 << INTER_NEAREST_MODE_SHIFT, + INTER_NEAREST_CEIL = 3 << INTER_NEAREST_MODE_SHIFT, - INTER_ANTIALIAS_SHIFT = INTER_NEAREST_MODE_SHIFT + INTER_NEAREST_MODE_BIT, - INTER_ANTIALIAS_BIT = 1, - INTER_ANTIALIAS_MASK = ((1 << INTER_ANTIALIAS_BIT) - 1) << INTER_ANTIALIAS_SHIFT, - INTER_ANTIALIAS = 1 << INTER_ANTIALIAS_SHIFT, + INTER_ANTIALIAS_SHIFT = INTER_NEAREST_MODE_SHIFT + INTER_NEAREST_MODE_BIT, + INTER_ANTIALIAS_BIT = 1, + INTER_ANTIALIAS_MASK = ((1 << INTER_ANTIALIAS_BIT) - 1) << INTER_ANTIALIAS_SHIFT, + INTER_ANTIALIAS = 1 << INTER_ANTIALIAS_SHIFT, + + INTER_EXCLUDE_OUTSIDE_SHIFT = INTER_ANTIALIAS_SHIFT + INTER_ANTIALIAS_BIT, + INTER_EXCLUDE_OUTSIDE_BIT = 1, + INTER_EXCLUDE_OUTSIDE_MASK = ((1 << INTER_EXCLUDE_OUTSIDE_BIT) - 1) << INTER_EXCLUDE_OUTSIDE_SHIFT, + /** If set, the weight of sampling locations outside the image + will be set to 0 and the weight will be renormalized so that their sum is 1.0. + Only available for antialias or bi-cubic resampling. */ + INTER_EXCLUDE_OUTSIDE = 1 << INTER_EXCLUDE_OUTSIDE_SHIFT, }; /** \brief Specify the polar mapping mode @@ -2452,8 +2456,8 @@ enlarge an image, it will generally look best with #INTER_CUBIC (slow) or #INTER @param dst output image; it has the size dsize (when it is non-zero) or the size computed from src.size(), fx, and fy; the type of dst is the same as of src. @param dsize output image size; if it equals zero (`None` in Python), it is computed as: - \f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] - Either dsize or both fx and fy must be non-zero. +\f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] +Either dsize or both fx and fy must be non-zero. @param fx scale factor along the horizontal axis; when it equals 0, it is computed as \f[\texttt{(double)dsize.width/src.cols}\f] @param fy scale factor along the vertical axis; when it equals 0, it is computed as @@ -2470,7 +2474,7 @@ CV_EXPORTS_W void resize( InputArray src, OutputArray dst, https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py -Not support `exclude_outside` and `extrapolation_value` yet. +Not support `tf_crop_resize` yet. To get a similar result to `cv::resize`, give dsize and: INTER_NEAREST : ASYMMETRIC + NEAREST_FLOOR @@ -2480,18 +2484,17 @@ To get a similar result to `cv::resize`, give dsize and: @param src input image. @param dst output image; it has the size dsize (when it is non-zero) or the size computed from src.size(), scale; the type of dst is the same as of src. @param dsize output image size; if it equals to zero, it is computed as: - \f[\texttt{dsize = Size(int(scale.x * src.cols), int(scale.y * src.rows))}\f] - Either dsize or scale must be non-zero. +\f[\texttt{dsize = Size((int)(scale.x * src.cols), (int)(scale.y * src.rows))}\f] +Either dsize or scale must be non-zero. @param scale scale factor; use same definition as ONNX, if scale > 1, it's upsampling. -@param interpolation interpolation / coordiante, see #InterpolationFlags and #ResizeONNXFlags +@param interpolation interpolation flags, see #InterpolationFlags and #ResizeONNXFlags @param cubicCoeff cubic sampling coeff; range \f[[-1.0, 0)\f] -@param roi crop region; if provided, the rois' coordinates are normalized in the coordinate system of the input image; it only takes effect with INTER_TF_CROP_RESIZE (ONNX tf_crop_and_resize) @sa resize */ -CV_EXPORTS_W void resizeOnnx(InputArray src, OutputArray dst, Size dsize, - Point2d scale = Point2d(), int interpolation = INTER_LINEAR | INTER_HALF_PIXEL, - float cubicCoeff = -0.75f, Rect2d const& roi = Rect2d()); +CV_EXPORTS_W void resizeOnnx( + InputArray src, OutputArray dst, Size dsize, Point2d scale = Point2d(), + int interpolation = INTER_LINEAR, float cubicCoeff = -0.75f); /** @brief Applies an affine transformation to an image. diff --git a/modules/imgproc/src/opencl/resize_onnx.cl b/modules/imgproc/src/opencl/resize_onnx.cl index 611e0d6b35..731a98f503 100644 --- a/modules/imgproc/src/opencl/resize_onnx.cl +++ b/modules/imgproc/src/opencl/resize_onnx.cl @@ -12,11 +12,13 @@ #define noconvert(x) (x) +// for debug and intellisense #ifndef T # define INTER_NEAREST1 # define INTER_LINEAR1 # define INTER_CUBIC # define INTER_ANTIALIAS1 +# define EXCLUDE_OUTSIDE 1 # define T int # define W double # define CN 3 @@ -155,12 +157,22 @@ __kernel void resizeOnnx_linear_antialias( { VW sline = (VW)(0); float wline = 0; - int sy = clamp(iy + h, 0, src_rows - 1); + int sy = iy + h; +#if EXCLUDE_OUTSIDE + if ((unsigned)(sy) >= (unsigned)(src_rows)) + continue; +#endif + sy = clamp(sy, 0, src_rows - 1); __global uchar const* S = srcptr + sy * src_step + src_offset; for (int w = xstart; w < xend; ++w) { + int sx = ix + w; +#if EXCLUDE_OUTSIDE + if ((unsigned)(sx) >= (unsigned)(src_cols)) + continue; +#endif + sx = clamp(sx, 0, src_cols - 1); // the computation of linear's weights is trival, so do it in kernel - int sx = clamp(ix + w, 0, src_cols - 1); float t = fabs(w - rx) * xscale; t = clamp(1.f - t, 0.f, 1.f); wline += t; @@ -171,8 +183,7 @@ __kernel void resizeOnnx_linear_antialias( weight += u * wline; sumval += u * sline; } - VT d0 = TO_VEC_TYPE(sumval / weight); - storepix(d0, D); + storepix(TO_VEC_TYPE(sumval / weight), D); #else W sumval = 0; float weight = 0; @@ -180,11 +191,21 @@ __kernel void resizeOnnx_linear_antialias( { W sline = 0; float wline = 0; - int sy = clamp(iy + h, 0, src_rows - 1); + int sy = iy + h; +#if EXCLUDE_OUTSIDE + if ((unsigned)(sy) >= (unsigned)(src_rows)) + continue; +#endif + sy = clamp(sy, 0, src_rows - 1); __global uchar const* S = srcptr + sy * src_step + src_offset; for (int w = xstart; w < xend; ++w) { - int sx = clamp(ix + w, 0, src_cols - 1); + int sx = ix + w; +#if EXCLUDE_OUTSIDE + if ((unsigned)(sx) >= (unsigned)(src_cols)) + continue; +#endif + sx = clamp(sx, 0, src_cols - 1); float t = fabs(w - rx) * xscale; t = clamp(1.f - t, 0.f, 1.f); wline += t; @@ -203,11 +224,21 @@ __kernel void resizeOnnx_linear_antialias( for (int h = ystart; h < yend; ++h) { W sline = 0; - int sy = clamp(iy + h, 0, src_rows - 1); + int sy = iy + h; +#if EXCLUDE_OUTSIDE + if ((unsigned)(sy) >= (unsigned)(src_rows)) + continue; +#endif + sy = clamp(sy, 0, src_rows - 1); __global uchar const* S = srcptr + sy * src_step + src_offset; for (int w = xstart; w < xend; ++w) { - int sx = clamp(ix + w, 0, src_cols - 1); + int sx = ix + w; +#if EXCLUDE_OUTSIDE + if ((unsigned)(sx) >= (unsigned)(src_cols)) + continue; +#endif + sx = clamp(sx, 0, src_cols - 1); float t = fabs(w - rx) * xscale; t = clamp(1.f - t, 0.f, 1.f); sline += t * TO_WORK(((__global T const*)(S + sx * pixel_size))[i]); @@ -222,7 +253,6 @@ __kernel void resizeOnnx_linear_antialias( } } - #elif defined(INTER_CUBIC) && !defined(INTER_ANTIALIAS) float cubicCoeff(float A, float A2, float A3, float x) @@ -253,31 +283,56 @@ __kernel void resizeOnnx_cubic( int xlimit = xstart + 3; int ylimit = ystart + 3; int xoffset[4]; - float xcoeff[4]; + float xcoeff[4], xcoeffsum = 0; for (int x = xstart; x <= xlimit; ++x) { xoffset[x - xstart] = clamp(x, 0, src_cols - 1) * pixel_size; xcoeff [x - xstart] = cubicCoeff(A, A2, A3, x - fx); +#if EXCLUDE_OUTSIDE + if ((unsigned)(x) >= (unsigned)(src_cols)) + xcoeff[x - xstart] = 0; + xcoeffsum += xcoeff[x - xstart]; +#endif } __global uchar* D = dstptr + (dy * dst_step + mad24(dx, pixel_size, dst_offset)); #if CN == 1 || CN == 2 || CN == 3 || CN == 4 VW sum = (VW)(0); +#if EXCLUDE_OUTSIDE + float ycoeffsum = 0; +#endif for (int y = ystart; y <= ylimit; ++y) { +#if EXCLUDE_OUTSIDE + if ((unsigned)(y) >= (unsigned)(src_rows)) + continue; +#endif int yoffset = clamp(y, 0, src_rows - 1) * src_step + src_offset; VW sline = (VW)(0); for (int x = 0; x < 4; ++x) sline += (VW)(xcoeff[x]) * TO_VEC_WORK(loadpix(srcptr + yoffset + xoffset[x])); - sum += sline * (VW)(cubicCoeff(A, A2, A3, y - fy)); + float u = cubicCoeff(A, A2, A3, y - fy); +#if EXCLUDE_OUTSIDE + ycoeffsum += u; +#endif + sum += sline * u; } +#if EXCLUDE_OUTSIDE + storepix(TO_VEC_TYPE(sum / (ycoeffsum * xcoeffsum)), D); +#else storepix(TO_VEC_TYPE(sum), D); +#endif #else int yoffset[4]; - float ycoeff[4]; + float ycoeff[4], weight = 0; for (int y = ystart; y <= ylimit; ++y) { yoffset[y - ystart] = clamp(y, 0, src_rows - 1) * src_step + src_offset; ycoeff [y - ystart] = cubicCoeff(A, A2, A3, y - fy); +#if EXCLUDE_OUTSIDE + if ((unsigned)(y) >= (unsigned)(src_rows)) + ycoeff[y - ystart] = 0; + weight += ycoeff[y - ystart] * xcoeffsum; +#endif } for (int i = 0; i < channel; ++i) { @@ -290,7 +345,11 @@ __kernel void resizeOnnx_cubic( (srcptr + yoffset[y] + xoffset[x]))[i]); sum += sline * ycoeff[y]; } +#if EXCLUDE_OUTSIDE + ((__global T*)(D))[i] = TO_TYPE(sum / weight); +#else ((__global T*)(D))[i] = TO_TYPE(sum); +#endif } #endif } diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index c7fc534021..23d9e5eb23 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -952,7 +952,7 @@ static inline void interpolateLanczos4( float x, float* coeffs ) * note: scale may be user input and not equal to (src / dst). * ref to onnx, length_resized is src * scale (float), not dst (int). */ -static Vec2f interCoordinate(int coordinate, int dst, int src, double scale, double start, double end) +static Vec2f interCoordinate(int coordinate, int dst, int src, double scale) { float a, b; if (coordinate == INTER_HALF_PIXEL @@ -979,22 +979,6 @@ static Vec2f interCoordinate(int coordinate, int dst, int src, double scale, dou a = static_cast(1.0 / scale); b = 0.f; } - else if (coordinate == INTER_TF_CROP_RESIZE) - { - CV_CheckGE(start, 0.0, "roi's start is out of image"); - CV_CheckLE(end , 1.0, "roi's end is out of image"); - CV_CheckLT(start, end, "roi's start must be less than its end"); - if (dst <= 1) - { - a = 0.f; - b = static_cast(0.5 * (start + end) * (src - 1.0)); - } - else - { - a = static_cast((end - start) * (src - 1.0) / (src * scale - 1.0)); - b = static_cast(start * (src - 1.0)); - } - } else CV_Error(Error::StsBadArg, format("Unknown coordinate transformation mode %d", coordinate)); return Vec2f(a, b); @@ -3481,6 +3465,14 @@ public: /* resize parameter */ bool is_fixpt, is_double; + int sampler, antialias; + /* only meaningful when do bi-cubic or antialias resampling. + For nearest neighbor, it will have no pixel to select. + For linear without antialias, + the two sample pixels are at least one inside and at most one outside. + So exclude_outside is simply equivalent to clamp. + */ + int exclude_outside; int ksize, xkanti, ykanti; Point2f scalef; @@ -3504,6 +3496,8 @@ private: int start = cvFloor(-2.f / scale) + 1; int end = 2 - start; int len = end - start; + // no need to add FLT_EPSILON. + // in antialias cubic resize, we will have at least ceil(2 / scale) pixels inside float sum = 0; for (int i = start; i < end; ++i) { @@ -3514,8 +3508,11 @@ private: x = A * (((x - 5) * x + 8) * x - 4); else x = 0; + int sx = index + i; + if (exclude_outside && static_cast(sx) >= static_cast(srclen)) + x = 0; elem[i - start].di = cn * dstlen; - elem[i - start].si = cn * min(max(index + i, 0), srclen - 1); + elem[i - start].si = cn * min(max(sx, 0), srclen - 1); elem[i - start].f = x; sum += x; } @@ -3550,8 +3547,11 @@ private: { float x = fabsf(i - ratio) * scale; x = min(max(1.f - x, 0.f), 1.f); + int sx = index + i; + if (exclude_outside && static_cast(sx) >= static_cast(srclen)) + x = 0; elem[i - start].di = cn * dstlen; - elem[i - start].si = cn * min(max(index + i, 0), srclen - 1); + elem[i - start].si = cn * min(max(sx, 0), srclen - 1); elem[i - start].f = x; sum += x; } @@ -3574,8 +3574,9 @@ private: ResizeOnnxCtrl(int interpolation, int type, float cubicCoeff, Size ssize, Size dsize, Point2d const& scaled, Matx22f const& M) { - int sampler = interpolation & INTER_SAMPLER_MASK; - int antialias = interpolation & INTER_ANTIALIAS_MASK; + sampler = interpolation & INTER_SAMPLER_MASK; + antialias = interpolation & INTER_ANTIALIAS_MASK; + exclude_outside = interpolation & INTER_EXCLUDE_OUTSIDE_MASK; CV_CheckGE(cubicCoeff, -1.f, "cubic coefficient should range [-1, 0)"); CV_CheckLT(cubicCoeff, +0.f, "cubic coefficient should range [-1, 0)"); CV_Check(sampler, sampler == INTER_LINEAR || sampler == INTER_CUBIC, @@ -3603,8 +3604,7 @@ private: float cbuf[MAX_ESIZE] = { 0 }; CV_CheckLE(ksize, MAX_ESIZE, "resampler kernel's size is too larger"); - // when upsampling, `antialias` is same as `generic` - // so use `generic` to speed up + // when upsampling, `antialias` is same as `generic`, use `generic` to speed up if (antialias && scaled.x < 1.0) { float a = M(0, 0), b = M(0, 1); @@ -3644,7 +3644,23 @@ private: if (sampler == INTER_LINEAR) linearCoeffs(f, cbuf); else // if (sampler == INTER_CUBIC) + { cubicCoeffs(f, cubicCoeff, cbuf); + if (exclude_outside && (s < 1 || s + 2 >= ssize.width)) + { + // no need to add FLT_EPSILON. + // in cubic without antialias, we will have at least 2 pixels inside + float sum = 0; + for (int k = 0; k < 4; ++k) + { + if (static_cast(s + k - 1) >= static_cast(ssize.width)) + cbuf[k] = 0; + sum += cbuf[k]; + } + for (int k = 0; k < 4; ++k) + cbuf[k] /= sum; + } + } if (is_fixpt) { short* coeffs = reinterpret_cast(xcoeffs) + cn * ksize * d; @@ -3697,7 +3713,21 @@ private: if (sampler == INTER_LINEAR) linearCoeffs(f, cbuf); else // if (sampler == INTER_CUBIC) + { cubicCoeffs(f, cubicCoeff, cbuf); + if (exclude_outside && (s < 1 || s + 2 >= ssize.height)) + { + float sum = 0; + for (int k = 0; k < 4; ++k) + { + if (static_cast(s + k - 1) >= static_cast(ssize.height)) + cbuf[k] = 0; + sum += cbuf[k]; + } + for (int k = 0; k < 4; ++k) + cbuf[k] /= sum; + } + } if (is_fixpt) { short* coeffs = reinterpret_cast(ycoeffs) + 1 * ksize * d; @@ -4353,7 +4383,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, return k.run(2, globalsize, 0, false); } -static void ocl_resizeOnnxTable(int srclen, int dstlen, int esz, +static void ocl_resizeOnnxTable(int srclen, int dstlen, int esz, int exclude_outside, int sampler, float a, float b, float A, float scale, int* offset, float* coeff) { // maybe want do linear resize in this way? @@ -4382,13 +4412,16 @@ static void ocl_resizeOnnxTable(int srclen, int dstlen, int esz, else x = 0; } + int sx = index + i; + if (exclude_outside && static_cast(sx) >= static_cast(srclen)) + x = 0; // make work-item(s) in a work-group load offset / coeff in one / fewer memory transaction // offsets & coeffs are arranged like // 00 10 20 ... n0 // 01 11 21 ... n1 ... // 0(k-1) 1(k-1) 2(k-1) ... n(k-1) int to = d + (i - start) * dstlen; - offset[to] = min(max(index + i, 0), srclen - 1) * esz; + offset[to] = min(max(sx, 0), srclen - 1) * esz; coeff [to] = x; sum += x; } @@ -4428,7 +4461,6 @@ static char const* ocl_resizeOnnx_convertTypeString(int sdepth, int ddepth, int return buf; } - static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, Matx22f const& M, Point2d const& scaled, int interpolation, float cubicCoeff) { @@ -4436,6 +4468,7 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, int sampler = interpolation & INTER_SAMPLER_MASK; int nearest = interpolation & INTER_NEAREST_MODE_MASK; int antialias = interpolation & INTER_ANTIALIAS_MASK; + int exclude_outside = interpolation & INTER_EXCLUDE_OUTSIDE_MASK; Point2f scale = static_cast(scaled); int khalf = (sampler == INTER_LINEAR ? 2 : 4) / 2; float xscale = min(scale.x, 1.f), yscale = min(scale.y, 1.f); @@ -4512,9 +4545,10 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, { int W = (T < CV_32S || T == CV_32F) ? CV_32F : CV_64F, VW = CV_MAKETYPE(W, cn); buildopts = format( - "-D INTER_LINEAR -D INTER_ANTIALIAS " + "-D INTER_LINEAR -D INTER_ANTIALIAS -D EXCLUDE_OUTSIDE=%d " "-D T=%s -D W=%s -D CN=%d -D PIXEL_SIZE=%d -D VT=%s -D VW=%s " "-D TO_WORK=%s -D TO_VEC_WORK=%s -D TO_TYPE=%s -D TO_VEC_TYPE=%s ", + exclude_outside, ocl_resizeOnnx_typeToString(T, nullptr, 0), ocl_resizeOnnx_typeToString(W, nullptr, 0), cn, pixel_size, @@ -4537,9 +4571,10 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, { int W = (T < CV_32S || T == CV_32F) ? CV_32F : CV_64F, VW = CV_MAKETYPE(W, cn); buildopts = format( - "-D INTER_CUBIC " + "-D INTER_CUBIC -D EXCLUDE_OUTSIDE=%d " "-D T=%s -D W=%s -D CN=%d -D PIXEL_SIZE=%d -D VT=%s -D VW=%s " "-D TO_WORK=%s -D TO_VEC_WORK=%s -D TO_TYPE=%s -D TO_VEC_TYPE=%s ", + exclude_outside, ocl_resizeOnnx_typeToString(T, nullptr, 0), ocl_resizeOnnx_typeToString(W, nullptr, 0), cn, pixel_size, @@ -4567,10 +4602,11 @@ static bool ocl_resizeOnnx(InputArray _src, OutputArray _dst, int* yoffset = xoffset + xstride; float* xcoeff = reinterpret_cast(yoffset + ystride); float* ycoeff = reinterpret_cast(xcoeff + xstride); + // use table coeffs, no need to define `-D EXCLUDE_OUTSIDE=%d` ocl_resizeOnnxTable(src.cols, dst.cols, pixel_size, - sampler, M(0, 0), M(0, 1), cubicCoeff, scale.x, xoffset, xcoeff); + exclude_outside, sampler, M(0, 0), M(0, 1), cubicCoeff, scale.x, xoffset, xcoeff); ocl_resizeOnnxTable(src.rows, dst.rows, static_cast(src.step[0]), - sampler, M(1, 0), M(1, 1), cubicCoeff, scale.y, yoffset, ycoeff); + exclude_outside, sampler, M(1, 0), M(1, 1), cubicCoeff, scale.y, yoffset, ycoeff); UMat utable; Mat(1, tabsize, CV_32S, table.data()).copyTo(utable); int W = (T < CV_32S || T == CV_32F) ? CV_32F : CV_64F, VW = CV_MAKETYPE(W, cn); @@ -5224,7 +5260,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, void cv::resizeOnnx(InputArray _src, OutputArray _dst, - Size dsize, Point2d scale, int interpolation, float cubicCoeff, Rect2d const& roi) + Size dsize, Point2d scale, int interpolation, float cubicCoeff) { static_assert((1 << INTER_SAMPLER_BIT) >= INTER_MAX, ""); CV_INSTRUMENT_REGION(); @@ -5268,22 +5304,19 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, coordinate == INTER_HALF_PIXEL_PYTORCH || coordinate == INTER_HALF_PIXEL_SYMMETRIC || coordinate == INTER_ALIGN_CORNERS || - coordinate == INTER_ASYMMETRIC || - coordinate == INTER_TF_CROP_RESIZE); + coordinate == INTER_ASYMMETRIC); // x_org = x * a + b Matx22f M; - Vec2f xcoef = interCoordinate( - coordinate, dsize.width, ssize.width, scale.x, roi.x, roi.x + roi.width); - Vec2f ycoef = interCoordinate( - coordinate, dsize.height, ssize.height, scale.y, roi.y, roi.y + roi.height); + Vec2f xcoef = interCoordinate(coordinate, dsize.width, ssize.width, scale.x); + Vec2f ycoef = interCoordinate(coordinate, dsize.height, ssize.height, scale.y); M(0, 0) = xcoef[0]; M(0, 1) = xcoef[1]; M(1, 0) = ycoef[0]; M(1, 1) = ycoef[1]; _dst.create(dsize, _src.type()); - if (dsize == ssize && coordinate != INTER_TF_CROP_RESIZE) + if (dsize == ssize) { // Source and destination are of same size. Use simple copy. _src.copyTo(_dst); @@ -5300,6 +5333,8 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, CV_OCL_RUN(_src.isUMat() && _dst.isUMat(), ocl_resizeOnnx(_src, _dst, M, scale, interpolation, cubicCoeff)) + // if (cv::ocl::isOpenCLActivated() && _src.isUMat() && _dst.isUMat()) + // CV_Assert(ocl_resizeOnnx(_src, _dst, M, scale, interpolation, cubicCoeff)); Mat src = _src.getMat(), dst = _dst.getMat(); diff --git a/modules/imgproc/test/ocl/test_warp.cpp b/modules/imgproc/test/ocl/test_warp.cpp index 19682c8502..b964b0fdaa 100644 --- a/modules/imgproc/test/ocl/test_warp.cpp +++ b/modules/imgproc/test/ocl/test_warp.cpp @@ -701,6 +701,17 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpAntiAlias, ResizeOnnx, Combine( Bool(), Values(1, 16))); +OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpExcludeOutside, ResizeOnnx, Combine( + Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F), + Values(0.4, 0.27, 1.6), + Values(0.5, 0.71, 2.7), + Values( + (int)( INTER_CUBIC | INTER_EXCLUDE_OUTSIDE), + (int)(INTER_ANTIALIAS | INTER_CUBIC | INTER_EXCLUDE_OUTSIDE), + (int)(INTER_ANTIALIAS | INTER_LINEAR | INTER_EXCLUDE_OUTSIDE)), + Bool(), + Values(1, 16))); + OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpNearest, ResizeOnnx, Combine( Values(CV_8S, CV_16S, CV_32F, CV_64F), Values(0.4, 0.27, 1.6), diff --git a/modules/imgproc/test/test_resize_onnx.cpp b/modules/imgproc/test/test_resize_onnx.cpp index 6b8c4fc3de..86798a4152 100644 --- a/modules/imgproc/test/test_resize_onnx.cpp +++ b/modules/imgproc/test/test_resize_onnx.cpp @@ -12,7 +12,6 @@ struct ResizeOnnx Size szsrc, szref, szdst; Point2d scale; float cubic; - Rect2d roi; /* make sure insrc is: * (1) integer * (2) range [-127, 127] @@ -44,7 +43,7 @@ struct ResizeOnnx Mat iR(szref, CV_64F, inref.data()); Mat S = iS, R = iR, nS, nR; double alpha[8] = {1, -1, 5, 5, 0, -3, -2, +4}; - double beta[8] = {0, -0, 0, 7, 7, -7, -6, +6}; + double beta[8] = {0, -0, 0, 7, 7, +7, -6, -6}; RNG& rng = TS::ptr()->get_rng(); for (int cn = 1; cn <= 8; ++cn) { @@ -59,21 +58,19 @@ struct ResizeOnnx { double eps = (depth <= CV_32S) ? 1.0 : 1e-3; int type = CV_MAKETYPE(depth, cn); - string errinfo = "fail on type " + typeToString(type); Mat src, ref, dst; rand_roi(rng, src, szsrc, type); if (szdst.area()) rand_roi(rng, dst, szdst, type); S.convertTo(src, type); R.convertTo(ref, type); - resizeOnnx(src, dst, szdst, scale, interpolate, cubic, roi); - EXPECT_EQ(ref.size(), dst.size()) << errinfo; + resizeOnnx(src, dst, szdst, scale, interpolate, cubic); // nearest must give bit-same result if ((interpolate & INTER_SAMPLER_MASK) == INTER_NEAREST) - EXPECT_EQ(cv::norm(ref, dst, NORM_INF), 0.0) << errinfo; + EXPECT_MAT_NEAR(ref, dst, 0.0); // cvRound(4.5) = 4, but when doing resize with int, we may get 5 else - EXPECT_LE(cv::norm(ref, dst, NORM_INF), eps) << errinfo; + EXPECT_MAT_NEAR(ref, dst, eps); } } } @@ -85,8 +82,7 @@ TEST(ResizeOnnx, downsample_scales_cubic) { ResizeOnnx{ INTER_CUBIC, - Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), - -0.75f, Rect2d(), + Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1.47119141, 2.78125 , 4.08251953, @@ -96,12 +92,25 @@ TEST(ResizeOnnx, downsample_scales_cubic) }.run(); } +TEST(ResizeOnnx, downsample_scales_cubic_A_n0p5_exclude_outside) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_EXCLUDE_OUTSIDE, + Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), -0.5f, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.36812675, 2.6695014 , 4.0133367 , + 6.57362535, 7.875 , 9.2188353 , + 11.94896657, 13.25034122, 14.59417652, + } + }.run(); +} + TEST(ResizeOnnx, downsample_scales_cubic_align_corners) { ResizeOnnx{ INTER_CUBIC | INTER_ALIGN_CORNERS, - Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), - -0.75f, Rect2d(), + Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1.0 , 2.39519159, 3.79038317, @@ -115,8 +124,7 @@ TEST(ResizeOnnx, downsample_scales_cubic_antialias) { ResizeOnnx{ INTER_CUBIC | INTER_ANTIALIAS, - Size(4, 4), Size(2, 2), Size(), Point2d(0.6, 0.6), - -0.75f, Rect2d(), + Size(4, 4), Size(2, 2), Size(), Point2d(0.6, 0.6), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 2.5180721, 4.2858863, @@ -129,8 +137,7 @@ TEST(ResizeOnnx, downsample_scales_linear) { ResizeOnnx{ INTER_LINEAR, - Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), - -0.75f, Rect2d(), + Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8}, {2.6666665, 4.3333331} }.run(); @@ -140,8 +147,7 @@ TEST(ResizeOnnx, downsample_scales_linear_align_corners) { ResizeOnnx{ INTER_LINEAR | INTER_ALIGN_CORNERS, - Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), - -0.75f, Rect2d(), + Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8}, {1.0, 3.142857} }.run(); @@ -151,8 +157,7 @@ TEST(ResizeOnnx, downsample_scales_linear_antialias) { ResizeOnnx{ INTER_LINEAR | INTER_ANTIALIAS, - Size(4, 4), Size(2, 2), Size(), Point2d(0.6, 0.6), - -0.75f, Rect2d(), + Size(4, 4), Size(2, 2), Size(), Point2d(0.6, 0.6), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 2.875, 4.5, @@ -165,8 +170,7 @@ TEST(ResizeOnnx, downsample_scales_linear_half_pixel_symmetric) { ResizeOnnx{ INTER_LINEAR | INTER_HALF_PIXEL_SYMMETRIC, - Size(4, 1), Size(2, 1), Size(), Point2d(0.6, 1.0), - -0.75f, Rect2d(), + Size(4, 1), Size(2, 1), Size(), Point2d(0.6, 1.0), -0.75f, {1, 2, 3, 4}, {1.6666667, 3.3333333} }.run(); @@ -176,8 +180,7 @@ TEST(ResizeOnnx, downsample_scales_nearest) { ResizeOnnx{ INTER_NEAREST, - Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), - -0.75f, Rect2d(), + Size(4, 2), Size(2, 1), Size(), Point2d(0.6, 0.6), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 3} }.run(); @@ -187,8 +190,7 @@ TEST(ResizeOnnx, downsample_sizes_cubic) { ResizeOnnx{ INTER_CUBIC, - Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1.63078704, 3.00462963, 4.37847222, @@ -202,8 +204,7 @@ TEST(ResizeOnnx, downsample_sizes_cubic_antialias) { ResizeOnnx{ INTER_CUBIC | INTER_ANTIALIAS, - Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1.7750092, 3.1200073, 4.4650054, @@ -217,8 +218,7 @@ TEST(ResizeOnnx, downsample_sizes_linear_antialias) { ResizeOnnx{ INTER_LINEAR | INTER_ANTIALIAS, - Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 2.3636363, 3.590909, 4.818182, @@ -232,8 +232,7 @@ TEST(ResizeOnnx, downsample_sizes_linear_pytorch_half_pixel) { ResizeOnnx{ INTER_LINEAR | INTER_HALF_PIXEL_PYTORCH, - Size(4, 4), Size(1, 3), Size(1, 3), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(1, 3), Size(1, 3), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1.6666666, @@ -247,35 +246,17 @@ TEST(ResizeOnnx, downsample_sizes_nearest) { ResizeOnnx{ INTER_NEAREST, - Size(4, 2), Size(3, 1), Size(3, 1), Point2d(), - -0.75f, Rect2d(), + Size(4, 2), Size(3, 1), Size(3, 1), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 4} }.run(); } -TEST(ResizeOnnx, tf_crop_and_resize) -{ - // Note: for some rois, the result may be different with that of TF for inaccurate floating point - ResizeOnnx{ - INTER_LINEAR | INTER_TF_CROP_RESIZE, - Size(4, 4), Size(3, 3), Size(3, 3), Point2d(), - -0.75f, Rect2d(0.6, 0.4, 0.2, 0.2), - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - { - 7.6000004, 7.9, 8.2 , - 8.8 , 9.1, 9.400001, - 10.0 , 10.3, 10.6 , - } - }.run(); -} - TEST(ResizeOnnx, upsample_scales_cubic) { ResizeOnnx{ INTER_CUBIC, - Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), - -0.75f, Rect2d(), + Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 0.47265625, 0.76953125, 1.24609375, 1.875, 2.28125, 2.91015625, 3.38671875, 3.68359375, @@ -290,12 +271,30 @@ TEST(ResizeOnnx, upsample_scales_cubic) }.run(); } +TEST(ResizeOnnx, upsample_scales_cubic_A_n0p5_exclude_outside) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_EXCLUDE_OUTSIDE, + Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), -0.5f, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 0.55882353, 0.81494204, 1.35698249, 1.89705882, 2.39705882, 2.93713516, 3.47917561, 3.73529412, + 1.58329755, 1.83941606, 2.38145651, 2.92153285, 3.42153285, 3.96160918, 4.50364964, 4.75976814, + 3.75145936, 4.00757787, 4.54961832, 5.08969466, 5.58969466, 6.12977099, 6.67181144, 6.92792995, + 5.91176471, 6.16788321, 6.70992366, 7.25, 7.75, 8.29007634, 8.83211679, 9.08823529, + 7.91176471, 8.16788321, 8.70992366, 9.25, 9.75, 10.29007634, 10.83211679, 11.08823529, + 10.07207005, 10.32818856, 10.87022901, 11.41030534, 11.91030534, 12.45038168, 12.99242213, 13.24854064, + 12.24023186, 12.49635036, 13.03839082, 13.57846715, 14.07846715, 14.61854349, 15.16058394, 15.41670245, + 13.26470588, 13.52082439, 14.06286484, 14.60294118, 15.10294118, 15.64301751, 16.18505796, 16.44117647, + } + }.run(); +} + TEST(ResizeOnnx, upsample_scales_cubic_align_corners) { ResizeOnnx{ INTER_CUBIC | INTER_ALIGN_CORNERS, - Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), - -0.75f, Rect2d(), + Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1.0, 1.34110787, 1.80029155, 2.32944606, 2.67055394, 3.19970845, 3.65889213, 4.0, @@ -314,8 +313,7 @@ TEST(ResizeOnnx, upsample_scales_cubic_asymmetric) { ResizeOnnx{ INTER_CUBIC | INTER_ASYMMETRIC, - Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), - -0.75f, Rect2d(), + Size(4, 4), Size(8, 8), Size(), Point2d(2.0, 2.0), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1.0, 1.40625, 2.0, 2.5, 3.0, 3.59375, 4.0, 4.09375, @@ -334,8 +332,7 @@ TEST(ResizeOnnx, upsample_scales_linear) { ResizeOnnx{ INTER_LINEAR, - Size(2, 2), Size(4, 4), Size(), Point2d(2.0, 2.0), - -0.75f, Rect2d(), + Size(2, 2), Size(4, 4), Size(), Point2d(2.0, 2.0), -0.75f, {1, 2, 3, 4}, { 1.0, 1.25, 1.75, 2.0, @@ -350,8 +347,7 @@ TEST(ResizeOnnx, upsample_scales_linear_align_corners) { ResizeOnnx{ INTER_LINEAR | INTER_ALIGN_CORNERS, - Size(2, 2), Size(4, 4), Size(), Point2d(2.0, 2.0), - -0.75f, Rect2d(), + Size(2, 2), Size(4, 4), Size(), Point2d(2.0, 2.0), -0.75f, {1, 2, 3, 4}, { 1.0, 1.33333333, 1.66666667, 2.0, @@ -366,8 +362,7 @@ TEST(ResizeOnnx, upsample_scales_linear_half_pixel_symmetric) { ResizeOnnx{ INTER_LINEAR | INTER_HALF_PIXEL_SYMMETRIC, - Size(2, 2), Size(5, 4), Size(), Point2d(2.94, 2.3), - -0.75f, Rect2d(), + Size(2, 2), Size(5, 4), Size(), Point2d(2.94, 2.3), -0.75f, {1, 2, 3, 4}, { 1.0 , 1.15986395, 1.5 , 1.84013605, 2.0 , @@ -382,8 +377,7 @@ TEST(ResizeOnnx, upsample_scales_nearest) { ResizeOnnx{ INTER_NEAREST, - Size(2, 2), Size(6, 4), Size(), Point2d(3.0, 2.0), - -0.75f, Rect2d(), + Size(2, 2), Size(6, 4), Size(), Point2d(3.0, 2.0), -0.75f, {1, 2, 3, 4}, { 1, 1, 1, 2, 2, 2, @@ -398,8 +392,7 @@ TEST(ResizeOnnx, upsample_sizes_cubic) { ResizeOnnx{ INTER_CUBIC, - Size(4, 4), Size(10, 9), Size(10, 9), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(10, 9), Size(10, 9), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 0.45507922, 0.64057922, 0.97157922, 1.42257922, 1.90732922, 2.22332922, 2.70807922, 3.15907922, 3.49007922, 3.67557922, @@ -419,8 +412,7 @@ TEST(ResizeOnnx, upsample_sizes_nearest) { ResizeOnnx{ INTER_NEAREST, - Size(2, 2), Size(8, 7), Size(8, 7), Point2d(), - -0.75f, Rect2d(), + Size(2, 2), Size(8, 7), Size(8, 7), Point2d(), -0.75f, {1, 2, 3, 4}, { 1, 1, 1, 1, 2, 2, 2, 2, @@ -438,8 +430,7 @@ TEST(ResizeOnnx, upsample_sizes_nearest_ceil_half_pixel) { ResizeOnnx{ INTER_NEAREST | INTER_NEAREST_CEIL, - Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1, 2, 2, 3, 3, 4, 4, 4, @@ -458,8 +449,7 @@ TEST(ResizeOnnx, upsample_sizes_nearest_floor_align_corners) { ResizeOnnx{ INTER_NEAREST | INTER_NEAREST_FLOOR | INTER_ALIGN_CORNERS, - Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1, 1, 1, 2, 2, 3, 3, 4, @@ -478,8 +468,7 @@ TEST(ResizeOnnx, upsample_sizes_nearest_round_prefer_ceil_asymmetric) { ResizeOnnx{ INTER_NEAREST | INTER_NEAREST_PREFER_CEIL | INTER_ASYMMETRIC, - Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), - -0.75f, Rect2d(), + Size(4, 4), Size(8, 8), Size(8, 8), Point2d(), -0.75f, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { 1, 2, 2, 3, 3, 4, 4, 4, @@ -494,4 +483,60 @@ TEST(ResizeOnnx, upsample_sizes_nearest_round_prefer_ceil_asymmetric) }.run(); } +/* +import numpy as np +import onnx +from onnx.reference.ops.op_resize import ( + _interpolate_nd, + _cubic_coeffs, _cubic_coeffs_antialias, + _linear_coeffs, _linear_coeffs_antialias +) +data = np.arange(1, 17, dtype=np.float64).reshape(4, 4) +scales = np.array([0.8, 0.8], dtype=np.float64) +*/ + +/* +output = _interpolate_nd( + data, + lambda x, s: _cubic_coeffs_antialias(x, s, A=-0.5), + scale_factors=scales, + exclude_outside=True, +) +*/ +TEST(ResizeOnnx, downsample_scales_cubic_antialias_A_n0p5_exclude_outside) +{ + ResizeOnnx{ + INTER_CUBIC | INTER_ANTIALIAS | INTER_EXCLUDE_OUTSIDE, + Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), -0.5f, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 1.68342335, 2.90749817, 4.22822584, + 6.57972264, 7.80379747, 9.12452513, + 11.86263331, 13.08670813, 14.4074358 , + } + }.run(); +} + +/* +output = _interpolate_nd( + data, + _linear_coeffs_antialias, + scale_factors=scales, + exclude_outside=True, +) +*/ +TEST(ResizeOnnx, downsample_scales_linear_antialias_exclude_outside) +{ + ResizeOnnx{ + INTER_LINEAR | INTER_ANTIALIAS | INTER_EXCLUDE_OUTSIDE, + Size(4, 4), Size(3, 3), Size(), Point2d(0.8, 0.8), -0.75f, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + 2.25 , 3.41666667, 4.58333333, + 6.91666667, 8.08333333, 9.25 , + 11.58333333, 12.75 , 13.91666667, + } + }.run(); +} + }} From 820db2e236b4cab02eb61f33f2b5ff18b39142a5 Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Sun, 4 Aug 2024 22:08:19 +0800 Subject: [PATCH 11/12] remove unnecessary clamp when exclude_outside --- modules/imgproc/include/opencv2/imgproc.hpp | 2 +- modules/imgproc/src/opencl/resize_onnx.cl | 22 ++++++++++++++------- modules/imgproc/test/test_resize_onnx.cpp | 5 +++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 0ca89edf54..0c04d6fa0a 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -2488,7 +2488,7 @@ To get a similar result to `cv::resize`, give dsize and: Either dsize or scale must be non-zero. @param scale scale factor; use same definition as ONNX, if scale > 1, it's upsampling. @param interpolation interpolation flags, see #InterpolationFlags and #ResizeONNXFlags -@param cubicCoeff cubic sampling coeff; range \f[[-1.0, 0)\f] +@param cubicCoeff cubic sampling coefficient, range \f[[-1.0, 0)\f] @sa resize */ diff --git a/modules/imgproc/src/opencl/resize_onnx.cl b/modules/imgproc/src/opencl/resize_onnx.cl index 731a98f503..d3e2476a1e 100644 --- a/modules/imgproc/src/opencl/resize_onnx.cl +++ b/modules/imgproc/src/opencl/resize_onnx.cl @@ -161,8 +161,9 @@ __kernel void resizeOnnx_linear_antialias( #if EXCLUDE_OUTSIDE if ((unsigned)(sy) >= (unsigned)(src_rows)) continue; -#endif +#else sy = clamp(sy, 0, src_rows - 1); +#endif __global uchar const* S = srcptr + sy * src_step + src_offset; for (int w = xstart; w < xend; ++w) { @@ -170,8 +171,9 @@ __kernel void resizeOnnx_linear_antialias( #if EXCLUDE_OUTSIDE if ((unsigned)(sx) >= (unsigned)(src_cols)) continue; -#endif +#else sx = clamp(sx, 0, src_cols - 1); +#endif // the computation of linear's weights is trival, so do it in kernel float t = fabs(w - rx) * xscale; t = clamp(1.f - t, 0.f, 1.f); @@ -195,8 +197,9 @@ __kernel void resizeOnnx_linear_antialias( #if EXCLUDE_OUTSIDE if ((unsigned)(sy) >= (unsigned)(src_rows)) continue; -#endif +#else sy = clamp(sy, 0, src_rows - 1); +#endif __global uchar const* S = srcptr + sy * src_step + src_offset; for (int w = xstart; w < xend; ++w) { @@ -204,8 +207,9 @@ __kernel void resizeOnnx_linear_antialias( #if EXCLUDE_OUTSIDE if ((unsigned)(sx) >= (unsigned)(src_cols)) continue; -#endif +#else sx = clamp(sx, 0, src_cols - 1); +#endif float t = fabs(w - rx) * xscale; t = clamp(1.f - t, 0.f, 1.f); wline += t; @@ -228,8 +232,9 @@ __kernel void resizeOnnx_linear_antialias( #if EXCLUDE_OUTSIDE if ((unsigned)(sy) >= (unsigned)(src_rows)) continue; -#endif +#else sy = clamp(sy, 0, src_rows - 1); +#endif __global uchar const* S = srcptr + sy * src_step + src_offset; for (int w = xstart; w < xend; ++w) { @@ -237,8 +242,9 @@ __kernel void resizeOnnx_linear_antialias( #if EXCLUDE_OUTSIDE if ((unsigned)(sx) >= (unsigned)(src_cols)) continue; -#endif +#else sx = clamp(sx, 0, src_cols - 1); +#endif float t = fabs(w - rx) * xscale; t = clamp(1.f - t, 0.f, 1.f); sline += t * TO_WORK(((__global T const*)(S + sx * pixel_size))[i]); @@ -305,8 +311,10 @@ __kernel void resizeOnnx_cubic( #if EXCLUDE_OUTSIDE if ((unsigned)(y) >= (unsigned)(src_rows)) continue; -#endif + int yoffset = y * src_step + src_offset; +#else int yoffset = clamp(y, 0, src_rows - 1) * src_step + src_offset; +#endif VW sline = (VW)(0); for (int x = 0; x < 4; ++x) sline += (VW)(xcoeff[x]) * TO_VEC_WORK(loadpix(srcptr + yoffset + xoffset[x])); diff --git a/modules/imgproc/test/test_resize_onnx.cpp b/modules/imgproc/test/test_resize_onnx.cpp index 86798a4152..b995d24cbd 100644 --- a/modules/imgproc/test/test_resize_onnx.cpp +++ b/modules/imgproc/test/test_resize_onnx.cpp @@ -42,8 +42,9 @@ struct ResizeOnnx Mat iS(szsrc, CV_64F, insrc.data()); Mat iR(szref, CV_64F, inref.data()); Mat S = iS, R = iR, nS, nR; - double alpha[8] = {1, -1, 5, 5, 0, -3, -2, +4}; - double beta[8] = {0, -0, 0, 7, 7, +7, -6, -6}; + // values after convertTo need to be all positive or all negative + double alpha[8] = {1, -1, 5, 5, 0, -3, -2, 4}; + double beta[8] = {0, -0, 0, 2, 7, -1, -6, 9}; RNG& rng = TS::ptr()->get_rng(); for (int cn = 1; cn <= 8; ++cn) { From 61709877cdb5b1dd913e38f61b59269ea22c3de3 Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Wed, 9 Apr 2025 00:22:39 +0800 Subject: [PATCH 12/12] move OPENCV_EXCLUDE_C_API down --- modules/imgproc/src/resize.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index f2a151d58c..6af3a12bc2 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -5314,8 +5314,6 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, hal::resize(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, inv_scale_x, inv_scale_y, interpolation); } -#ifndef OPENCV_EXCLUDE_C_API - void cv::resizeOnnx(InputArray _src, OutputArray _dst, Size dsize, Point2d scale, int interpolation, float cubicCoeff) { @@ -5486,6 +5484,7 @@ void cv::resizeOnnx(InputArray _src, OutputArray _dst, func(src, dst, ctrl); } +#ifndef OPENCV_EXCLUDE_C_API CV_IMPL void cvResize( const CvArr* srcarr, CvArr* dstarr, int method )