Add HAL RVV convertScale, for Mat.convertTo.

Co-authored-by: Liutong HAN <liutong2020@iscas.ac.cn>
2025-01-20 15:59:24 +08:00 · 2025-01-19 02:39:21 +08:00 · 2025-01-19 02:39:21 +08:00 · c4911e9fe4
commit c4911e9fe4
parent 88cb6c13ae
5 changed files with 151 additions and 1 deletions
--- a/3rdparty/hal_rvv/hal_rvv.hpp
+++ b/3rdparty/hal_rvv/hal_rvv.hpp
@ -23,7 +23,8 @@
 #include "hal_rvv_1p0/merge.hpp" // core
 #include "hal_rvv_1p0/mean.hpp" // core
 #include "hal_rvv_1p0/norm.hpp" // core
-#include "hal_rvv_1p0/normdiff.hpp" // core
+#include "hal_rvv_1p0/norm_diff.hpp" // core
+#include "hal_rvv_1p0/convert_scale.hpp" // core
 #endif

 #endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/convert_scale.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/convert_scale.hpp
@ -0,0 +1,120 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_HAL_RVV_CONVERT_SCALE_HPP_INCLUDED
+#define OPENCV_HAL_RVV_CONVERT_SCALE_HPP_INCLUDED
+
+#include <riscv_vector.h>
+
+namespace cv { namespace cv_hal_rvv {
+
+#undef cv_hal_convertScale
+#define cv_hal_convertScale cv::cv_hal_rvv::convertScale
+
+inline int convertScale_8U8U(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_b = __riscv_vfmv_v_f_f32m8(beta, vlmax);
+    float a = alpha;
+
+    for (int i = 0; i < height; i++)
+    {
+        const uchar* src_row = src + i * src_step;
+        uchar* dst_row = dst + i * dst_step;
+        int vl;
+        for (int j = 0; j < width; j += vl)
+        {
+            vl = __riscv_vsetvl_e8m2(width - j);
+            auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+            auto vec_src_u16 = __riscv_vzext_vf2(vec_src, vl);
+            auto vec_src_f32 = __riscv_vfwcvt_f(vec_src_u16, vl);
+            auto vec_fma = __riscv_vfmadd(vec_src_f32, a, vec_b, vl);
+            auto vec_dst_u16 = __riscv_vfncvt_xu(vec_fma, vl);
+            auto vec_dst = __riscv_vnclipu(vec_dst_u16, 0, __RISCV_VXRM_RNU, vl);
+            __riscv_vse8_v_u8m2(dst_row + j, vec_dst, vl);
+        }
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int convertScale_8U32F(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_b = __riscv_vfmv_v_f_f32m8(beta, vlmax);
+    float a = alpha;
+
+    for (int i = 0; i < height; i++)
+    {
+        const uchar* src_row = src + i * src_step;
+        float* dst_row = reinterpret_cast<float*>(dst + i * dst_step);
+        int vl;
+        for (int j = 0; j < width; j += vl)
+        {
+            vl = __riscv_vsetvl_e8m2(width - j);
+            auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
+            auto vec_src_u16 = __riscv_vzext_vf2(vec_src, vl);
+            auto vec_src_f32 = __riscv_vfwcvt_f(vec_src_u16, vl);
+            auto vec_fma = __riscv_vfmadd(vec_src_f32, a, vec_b, vl);
+            __riscv_vse32_v_f32m8(dst_row + j, vec_fma, vl);
+        }
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int convertScale_32F32F(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height, double alpha, double beta)
+{
+    int vlmax = __riscv_vsetvlmax_e32m8();
+    auto vec_b = __riscv_vfmv_v_f_f32m8(beta, vlmax);
+    float a = alpha;
+
+    for (int i = 0; i < height; i++)
+    {
+        const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
+        float* dst_row = reinterpret_cast<float*>(dst + i * dst_step);
+        int vl;
+        for (int j = 0; j < width; j += vl)
+        {
+            vl = __riscv_vsetvl_e32m8(width - j);
+            auto vec_src = __riscv_vle32_v_f32m8(src_row + j, vl);
+            auto vec_fma = __riscv_vfmadd(vec_src, a, vec_b, vl);
+            __riscv_vse32_v_f32m8(dst_row + j, vec_fma, vl);
+        }
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
+inline int convertScale(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height,
+                        int sdepth, int ddepth, double alpha, double beta)
+{
+    if (!dst)
+        return CV_HAL_ERROR_OK;
+
+    switch (sdepth)
+    {
+    case CV_8U:
+        switch (ddepth)
+        {
+        case CV_8U:
+            return convertScale_8U8U(src, src_step, dst, dst_step, width, height, alpha, beta);
+        case CV_32F:
+            return convertScale_8U32F(src, src_step, dst, dst_step, width, height, alpha, beta);
+        }
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    case CV_32F:
+        switch (ddepth)
+        {
+        case CV_32F:
+            return convertScale_32F32F(src, src_step, dst, dst_step, width, height, alpha, beta);
+        }
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    }
+
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
+}}
+
+#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp
--- a/modules/core/src/convert.dispatch.cpp
+++ b/modules/core/src/convert.dispatch.cpp
@ -281,6 +281,15 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const
    dst.create(dims, size, dtype);
    Mat dstMat = dst.getMat();

+    if( dims <= 2 )
+    {
+        CALL_HAL(convertScale, cv_hal_convertScale, data, step, dstMat.data, dstMat.step, cols * cn, rows, sdepth, ddepth, alpha, beta);
+    }
+    else if( isContinuous() && dstMat.isContinuous() )
+    {
+        CALL_HAL(convertScale, cv_hal_convertScale, data, 0, dstMat.data, 0, total() * cn, 1, sdepth, ddepth, alpha, beta);
+    }
+
    BinaryFunc func = noScale ? getConvertFunc(sdepth, ddepth) : getConvertScaleFunc(sdepth, ddepth);
    double scale[] = {alpha, beta};
    CV_Assert( func != 0 );
--- a/modules/core/src/hal_replacement.hpp
+++ b/modules/core/src/hal_replacement.hpp
@ -345,11 +345,31 @@ inline int hal_ni_normDiff(const uchar* src1, size_t src1_step, const uchar* src
                           size_t mask_step, int width, int height, int type, int norm_type, double* result) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
 //! @}

+/**
+@brief Convert array to another with specified type.
+@param src Source image
+@param src_step Source image
+@param dst Destination image
+@param dst_step Destination image
+@param width Source image dimensions
+@param height Source image dimensions
+@param sdepth Depth of source image
+@param ddepth Depth of destination image
+@param alpha Scale value
+@param beta Shift value
+*/
+//! @addtogroup core_hal_interface_convert Array convert
+//! @{
+inline int hal_ni_convertScale(const uchar* src, size_t src_step, uchar* dst, size_t dst_step, int width, int height,
+                               int sdepth, int ddepth, double alpha, double beta) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+
 //! @cond IGNORED
 #define cv_hal_normHamming8u hal_ni_normHamming8u
 #define cv_hal_normHammingDiff8u hal_ni_normHammingDiff8u
 #define cv_hal_norm hal_ni_norm
 #define cv_hal_normDiff hal_ni_normDiff
+#define cv_hal_convertScale hal_ni_convertScale
 //! @endcond

 /**