diff --git a/3rdparty/hal_rvv/hal_rvv.hpp b/3rdparty/hal_rvv/hal_rvv.hpp index 2a724ae893..8278ca946e 100644 --- a/3rdparty/hal_rvv/hal_rvv.hpp +++ b/3rdparty/hal_rvv/hal_rvv.hpp @@ -22,6 +22,7 @@ #if defined(__riscv_v) && __riscv_v == 1000000 #include "hal_rvv_1p0/merge.hpp" // core #include "hal_rvv_1p0/mean.hpp" // core +#include "hal_rvv_1p0/minmax.hpp" // core #include "hal_rvv_1p0/atan.hpp" // core #endif diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp new file mode 100644 index 0000000000..8786764cf0 --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp @@ -0,0 +1,335 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED +#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED + +#include + +namespace cv { namespace cv_hal_rvv { + +#undef cv_hal_minMaxIdx +#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx +#undef cv_hal_minMaxIdxMaskStep +#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx + +namespace +{ + template struct rvv; + + #define HAL_RVV_GENERATOR(T, EEW, TYPE, IS_U, EMUL, M_EMUL, B_LEN) \ + template<> struct rvv \ + { \ + using vec_t = v##IS_U##int##EEW##EMUL##_t; \ + using bool_t = vbool##B_LEN##_t; \ + static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \ + static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \ + static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_vmv_v_x_##TYPE##EMUL(a, b); } \ + static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \ + static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \ + static inline vec_t vmin_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmin##IS_U##_tu(a, b, c, d); } \ + static inline vec_t vmax_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmax##IS_U##_tu(a, b, c, d); } \ + static inline vec_t vmin_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmin##IS_U##_tumu(a, b, c, d, e); } \ + static inline vec_t vmax_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmax##IS_U##_tumu(a, b, c, d, e); } \ + static inline vec_t vredmin(vec_t a, vec_t b, size_t c) { return __riscv_vredmin##IS_U(a, b, c); } \ + static inline vec_t vredmax(vec_t a, vec_t b, size_t c) { return __riscv_vredmax##IS_U(a, b, c); } \ + }; + HAL_RVV_GENERATOR(uchar , 8 , u8 , u, m1, m1 , 8 ) + HAL_RVV_GENERATOR(schar , 8 , i8 , , m1, m1 , 8 ) + HAL_RVV_GENERATOR(ushort, 16, u16, u, m1, mf2, 16) + HAL_RVV_GENERATOR(short , 16, i16, , m1, mf2, 16) + #undef HAL_RVV_GENERATOR + + #define HAL_RVV_GENERATOR(T, NAME, EEW, TYPE, IS_F, F_OR_S, F_OR_X, EMUL, M_EMUL, P_EMUL, B_LEN) \ + template<> struct rvv \ + { \ + using vec_t = v##NAME##EEW##EMUL##_t; \ + using bool_t = vbool##B_LEN##_t; \ + static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \ + static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \ + static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_v##IS_F##mv_v_##F_OR_X##_##TYPE##EMUL(a, b); } \ + static inline vuint32##P_EMUL##_t vid(size_t a) { return __riscv_vid_v_u32##P_EMUL(a); } \ + static inline vuint32##P_EMUL##_t vundefined() { return __riscv_vundefined_u32##P_EMUL(); } \ + static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \ + static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \ + static inline bool_t vmlt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##lt(a, b, c); } \ + static inline bool_t vmgt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##gt(a, b, c); } \ + static inline bool_t vmlt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##lt##_mu(a, b, c, d, e); } \ + static inline bool_t vmgt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##gt##_mu(a, b, c, d, e); } \ + static inline T vmv_x_s(vec_t a) { return __riscv_v##IS_F##mv_##F_OR_X(a); } \ + }; + HAL_RVV_GENERATOR(int , int , 32, i32, , s, x, m4, m1 , m4, 8 ) + HAL_RVV_GENERATOR(float , float, 32, f32, f, f, f, m4, m1 , m4, 8 ) + HAL_RVV_GENERATOR(double, float, 64, f64, f, f, f, m4, mf2, m2, 16) + #undef HAL_RVV_GENERATOR +} + +template +inline int minMaxIdxReadTwice(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal, + int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) +{ + int vlmax = rvv::vsetvlmax(); + auto vec_min = rvv::vmv_v_x(std::numeric_limits::max(), vlmax); + auto vec_max = rvv::vmv_v_x(std::numeric_limits::lowest(), vlmax); + T val_min, val_max; + + if (mask) + { + for (int i = 0; i < height; i++) + { + const T* src_row = reinterpret_cast(src_data + i * src_step); + const uchar* mask_row = mask + i * mask_step; + int vl; + for (int j = 0; j < width; j += vl) + { + vl = rvv::vsetvl(width - j); + auto vec_src = rvv::vle(src_row + j, vl); + auto vec_mask = rvv::vle_mask(mask_row + j, vl); + auto bool_mask = __riscv_vmsne(vec_mask, 0, vl); + vec_min = rvv::vmin_tumu(bool_mask, vec_min, vec_min, vec_src, vl); + vec_max = rvv::vmax_tumu(bool_mask, vec_max, vec_max, vec_src, vl); + } + } + + auto sc_minval = rvv::vmv_v_x(std::numeric_limits::max(), vlmax); + auto sc_maxval = rvv::vmv_v_x(std::numeric_limits::lowest(), vlmax); + sc_minval = rvv::vredmin(vec_min, sc_minval, vlmax); + sc_maxval = rvv::vredmax(vec_max, sc_maxval, vlmax); + val_min = __riscv_vmv_x(sc_minval); + val_max = __riscv_vmv_x(sc_maxval); + + bool found_min = !minIdx, found_max = !maxIdx; + for (int i = 0; i < height && (!found_min || !found_max); i++) + { + const T* src_row = reinterpret_cast(src_data + i * src_step); + const uchar* mask_row = mask + i * mask_step; + int vl; + for (int j = 0; j < width && (!found_min || !found_max); j += vl) + { + vl = rvv::vsetvl(width - j); + auto vec_src = rvv::vle(src_row + j, vl); + auto vec_mask = rvv::vle_mask(mask_row + j, vl); + auto bool_mask = __riscv_vmsne(vec_mask, 0, vl); + auto bool_zero = __riscv_vmxor(bool_mask, bool_mask, vl); + if (!found_min) + { + auto bool_minpos = __riscv_vmseq_mu(bool_mask, bool_zero, vec_src, val_min, vl); + int index = __riscv_vfirst(bool_minpos, vl); + if (index != -1) + { + found_min = true; + minIdx[0] = i; + minIdx[1] = j + index; + } + } + if (!found_max) + { + auto bool_maxpos = __riscv_vmseq_mu(bool_mask, bool_zero, vec_src, val_max, vl); + int index = __riscv_vfirst(bool_maxpos, vl); + if (index != -1) + { + found_max = true; + maxIdx[0] = i; + maxIdx[1] = j + index; + } + } + } + } + } + else + { + for (int i = 0; i < height; i++) + { + const T* src_row = reinterpret_cast(src_data + i * src_step); + int vl; + for (int j = 0; j < width; j += vl) + { + vl = rvv::vsetvl(width - j); + auto vec_src = rvv::vle(src_row + j, vl); + vec_min = rvv::vmin_tu(vec_min, vec_min, vec_src, vl); + vec_max = rvv::vmax_tu(vec_max, vec_max, vec_src, vl); + } + } + + auto sc_minval = rvv::vmv_v_x(std::numeric_limits::max(), vlmax); + auto sc_maxval = rvv::vmv_v_x(std::numeric_limits::lowest(), vlmax); + sc_minval = rvv::vredmin(vec_min, sc_minval, vlmax); + sc_maxval = rvv::vredmax(vec_max, sc_maxval, vlmax); + val_min = __riscv_vmv_x(sc_minval); + val_max = __riscv_vmv_x(sc_maxval); + + bool found_min = !minIdx, found_max = !maxIdx; + for (int i = 0; i < height && (!found_min || !found_max); i++) + { + const T* src_row = reinterpret_cast(src_data + i * src_step); + int vl; + for (int j = 0; j < width && (!found_min || !found_max); j += vl) + { + vl = rvv::vsetvl(width - j); + auto vec_src = rvv::vle(src_row + j, vl); + if (!found_min) + { + auto bool_minpos = __riscv_vmseq(vec_src, val_min, vl); + int index = __riscv_vfirst(bool_minpos, vl); + if (index != -1) + { + found_min = true; + minIdx[0] = i; + minIdx[1] = j + index; + } + } + if (!found_max) + { + auto bool_maxpos = __riscv_vmseq(vec_src, val_max, vl); + int index = __riscv_vfirst(bool_maxpos, vl); + if (index != -1) + { + found_max = true; + maxIdx[0] = i; + maxIdx[1] = j + index; + } + } + } + } + } + if (minVal) + { + *minVal = val_min; + } + if (maxVal) + { + *maxVal = val_max; + } + + return CV_HAL_ERROR_OK; +} + +template +inline int minMaxIdxReadOnce(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal, + int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) +{ + int vlmax = rvv::vsetvlmax(); + auto vec_min = rvv::vmv_v_x(std::numeric_limits::max(), vlmax); + auto vec_max = rvv::vmv_v_x(std::numeric_limits::lowest(), vlmax); + auto vec_pos = rvv::vid(vlmax); + auto vec_minpos = rvv::vundefined(), vec_maxpos = rvv::vundefined(); + T val_min, val_max; + + if (mask) + { + for (int i = 0; i < height; i++) + { + const T* src_row = reinterpret_cast(src_data + i * src_step); + const uchar* mask_row = mask + i * mask_step; + int vl; + for (int j = 0; j < width; j += vl) + { + vl = rvv::vsetvl(width - j); + auto vec_src = rvv::vle(src_row + j, vl); + auto vec_mask = rvv::vle_mask(mask_row + j, vl); + auto bool_mask = __riscv_vmsne(vec_mask, 0, vl); + auto bool_zero = __riscv_vmxor(bool_mask, bool_mask, vl); + + auto bool_minpos = rvv::vmlt_mu(bool_mask, bool_zero, vec_src, vec_min, vl); + auto bool_maxpos = rvv::vmgt_mu(bool_mask, bool_zero, vec_src, vec_max, vl); + vec_minpos = __riscv_vmerge_tu(vec_minpos, vec_minpos, vec_pos, bool_minpos, vl); + vec_maxpos = __riscv_vmerge_tu(vec_maxpos, vec_maxpos, vec_pos, bool_maxpos, vl); + + vec_min = __riscv_vmerge_tu(vec_min, vec_min, vec_src, bool_minpos, vl); + vec_max = __riscv_vmerge_tu(vec_max, vec_max, vec_src, bool_maxpos, vl); + vec_pos = __riscv_vadd(vec_pos, vl, vlmax); + } + } + } + else + { + for (int i = 0; i < height; i++) + { + const T* src_row = reinterpret_cast(src_data + i * src_step); + int vl; + for (int j = 0; j < width; j += vl) + { + vl = rvv::vsetvl(width - j); + auto vec_src = rvv::vle(src_row + j, vl); + + auto bool_minpos = rvv::vmlt(vec_src, vec_min, vl); + auto bool_maxpos = rvv::vmgt(vec_src, vec_max, vl); + vec_minpos = __riscv_vmerge_tu(vec_minpos, vec_minpos, vec_pos, bool_minpos, vl); + vec_maxpos = __riscv_vmerge_tu(vec_maxpos, vec_maxpos, vec_pos, bool_maxpos, vl); + + vec_min = __riscv_vmerge_tu(vec_min, vec_min, vec_src, bool_minpos, vl); + vec_max = __riscv_vmerge_tu(vec_max, vec_max, vec_src, bool_maxpos, vl); + vec_pos = __riscv_vadd(vec_pos, vl, vlmax); + } + } + } + + val_min = std::numeric_limits::max(); + val_max = std::numeric_limits::lowest(); + for (int i = 0; i < vlmax; i++) + { + if (val_min > rvv::vmv_x_s(vec_min)) + { + val_min = rvv::vmv_x_s(vec_min); + if (minIdx) + { + minIdx[0] = __riscv_vmv_x(vec_minpos) / width; + minIdx[1] = __riscv_vmv_x(vec_minpos) % width; + } + } + if (val_max < rvv::vmv_x_s(vec_max)) + { + val_max = rvv::vmv_x_s(vec_max); + if (maxIdx) + { + maxIdx[0] = __riscv_vmv_x(vec_maxpos) / width; + maxIdx[1] = __riscv_vmv_x(vec_maxpos) % width; + } + } + vec_min = __riscv_vslidedown(vec_min, 1, vlmax); + vec_max = __riscv_vslidedown(vec_max, 1, vlmax); + vec_minpos = __riscv_vslidedown(vec_minpos, 1, vlmax); + vec_maxpos = __riscv_vslidedown(vec_maxpos, 1, vlmax); + } + if (minVal) + { + *minVal = val_min; + } + if (maxVal) + { + *maxVal = val_max; + } + + return CV_HAL_ERROR_OK; +} + +inline int minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal, + int* minIdx, int* maxIdx, uchar* mask, size_t mask_step = 0) +{ + if (!mask_step) + mask_step = src_step; + + switch (depth) + { + case CV_8UC1: + return minMaxIdxReadTwice(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step); + case CV_8SC1: + return minMaxIdxReadTwice(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step); + case CV_16UC1: + return minMaxIdxReadTwice(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step); + case CV_16SC1: + return minMaxIdxReadTwice(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step); + case CV_32SC1: + return minMaxIdxReadOnce(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step); + case CV_32FC1: + return minMaxIdxReadOnce(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step); + case CV_64FC1: + return minMaxIdxReadOnce(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + +}} + +#endif diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index 474fe17393..d435db52f6 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -911,8 +911,26 @@ inline int hal_ni_gemm64fc(const double* src1, size_t src1_step, const double* s inline int hal_ni_minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal, int* minIdx, int* maxIdx, uchar* mask) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief Finds the global minimum and maximum in an array. + @param src_data Source image + @param src_step Source image + @param width Source image dimensions + @param height Source image dimensions + @param depth Depth of source image + @param minVal Pointer to the returned global minimum and maximum in an array. + @param maxVal Pointer to the returned global minimum and maximum in an array. + @param minIdx Pointer to the returned minimum and maximum location. + @param maxIdx Pointer to the returned minimum and maximum location. + @param mask Specified array region. + @param mask_step Mask array step. +*/ +inline int hal_ni_minMaxIdxMaskStep(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal, + int* minIdx, int* maxIdx, uchar* mask, size_t mask_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + //! @cond IGNORED #define cv_hal_minMaxIdx hal_ni_minMaxIdx +#define cv_hal_minMaxIdxMaskStep hal_ni_minMaxIdxMaskStep //! @endcond /** diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index 8a4a54522f..9c8b317a5a 100644 --- a/modules/core/src/minmax.cpp +++ b/modules/core/src/minmax.cpp @@ -1514,10 +1514,19 @@ void cv::minMaxIdx(InputArray _src, double* minVal, if (src.dims <= 2) { - CALL_HAL(minMaxIdx, cv_hal_minMaxIdx, src.data, src.step, src.cols*cn, src.rows, - src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data); + if ((size_t)src.step == (size_t)mask.step) + { + CALL_HAL(minMaxIdx, cv_hal_minMaxIdx, src.data, src.step, src.cols*cn, src.rows, + src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data); + } + else + { + CALL_HAL(minMaxIdxMaskStep, cv_hal_minMaxIdxMaskStep, src.data, src.step, src.cols*cn, src.rows, + src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data, mask.step); + } + } - else if (src.isContinuous()) + else if (src.isContinuous() && mask.isContinuous()) { int res = cv_hal_minMaxIdx(src.data, 0, (int)src.total()*cn, 1, src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data);