Merge pull request #27119 from amane-ame:warp_hal_rvv

Add RISC-V HAL implementation for cv::warp series #27119 This patch implements `cv_hal_remap`, `cv_hal_warpAffine` and `cv_hal_warpPerspective` using native intrinsics, optimizing the performance of `cv::remap/cv::warpAffine/cv::warpPerspective` for `CV_HAL_INTER_NEAREST/CV_HAL_INTER_LINEAR/CV_HAL_INTER_CUBIC/CV_HAL_INTER_LANCZOS4` modes. Tested on MUSE-PI (Spacemit X60) for both gcc 14.2 and clang 20.0. ``` $ ./opencv_test_imgproc --gtest_filter="*Remap*:*Warp*" $ ./opencv_perf_imgproc --gtest_filter="*Remap*:*remap*:*Warp*" --perf_min_samples=200 --perf_force_samples=200 ``` View the full perf table here: [hal_rvv_warp.pdf](https://github.com/user-attachments/files/19403718/hal_rvv_warp.pdf) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2025-07-25 14:47:07 +08:00 · 2025-03-25 16:57:47 +08:00 · 2025-03-25 16:57:47 +08:00 · fa58c1205b
commit fa58c1205b
parent 931af518d9
5 changed files with 1272 additions and 2 deletions
--- a/3rdparty/hal_rvv/hal_rvv.hpp
+++ b/3rdparty/hal_rvv/hal_rvv.hpp
@ -50,6 +50,7 @@
 #include "hal_rvv_1p0/filter.hpp" // imgproc
 #include "hal_rvv_1p0/pyramids.hpp" // imgproc
 #include "hal_rvv_1p0/color.hpp" // imgproc
+#include "hal_rvv_1p0/warp.hpp" // imgproc
 #include "hal_rvv_1p0/thresh.hpp" // imgproc
 #include "hal_rvv_1p0/histogram.hpp" // imgproc
 #endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/types.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/types.hpp
@ -91,10 +91,12 @@ using RVV_F64M2 = struct RVV<double, LMUL_2>;
 using RVV_F64M4 = struct RVV<double, LMUL_4>;
 using RVV_F64M8 = struct RVV<double, LMUL_8>;

-// Only for dst type lmul >= 1
 template <typename Dst_T, typename RVV_T>
 using RVV_SameLen =
-    RVV<Dst_T, RVV_LMUL(RVV_T::lmul * sizeof(Dst_T) / sizeof(typename RVV_T::ElemType))>;
+    RVV<Dst_T, RVV_LMUL(static_cast<int>((RVV_T::lmul <= 8 ? RVV_T::lmul * static_cast<float>(sizeof(Dst_T)) : RVV_T::lmul == 9 ? static_cast<float>(sizeof(Dst_T)) / 2 : RVV_T::lmul == 10 ? static_cast<float>(sizeof(Dst_T)) / 4 : static_cast<float>(sizeof(Dst_T)) / 8) / sizeof(typename RVV_T::ElemType) == 0.5   ? 9  : \
+                                         (RVV_T::lmul <= 8 ? RVV_T::lmul * static_cast<float>(sizeof(Dst_T)) : RVV_T::lmul == 9 ? static_cast<float>(sizeof(Dst_T)) / 2 : RVV_T::lmul == 10 ? static_cast<float>(sizeof(Dst_T)) / 4 : static_cast<float>(sizeof(Dst_T)) / 8) / sizeof(typename RVV_T::ElemType) == 0.25  ? 10 : \
+                                         (RVV_T::lmul <= 8 ? RVV_T::lmul * static_cast<float>(sizeof(Dst_T)) : RVV_T::lmul == 9 ? static_cast<float>(sizeof(Dst_T)) / 2 : RVV_T::lmul == 10 ? static_cast<float>(sizeof(Dst_T)) / 4 : static_cast<float>(sizeof(Dst_T)) / 8) / sizeof(typename RVV_T::ElemType) == 0.125 ? 11 : \
+                                         (RVV_T::lmul <= 8 ? RVV_T::lmul * static_cast<float>(sizeof(Dst_T)) : RVV_T::lmul == 9 ? static_cast<float>(sizeof(Dst_T)) / 2 : RVV_T::lmul == 10 ? static_cast<float>(sizeof(Dst_T)) / 4 : static_cast<float>(sizeof(Dst_T)) / 8) / sizeof(typename RVV_T::ElemType)))>;

 template <size_t DstSize> struct RVV_ToIntHelper;
 template <size_t DstSize> struct RVV_ToUintHelper;
--- a/3rdparty/hal_rvv/hal_rvv_1p0/warp.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/warp.hpp
--- a/modules/imgproc/src/hal_replacement.hpp
+++ b/modules/imgproc/src/hal_replacement.hpp
@ -373,9 +373,58 @@ inline int hal_ni_remap32f(int src_type, const uchar *src_data, size_t src_step,
                           float* mapx, size_t mapx_step, float* mapy, size_t mapy_step,
                           int interpolation, int border_type, const double border_value[4])
 { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+/**
+   @brief hal_remap with floating point maps
+   @param src_type source and destination image type
+   @param src_data source image data
+   @param src_step source image step
+   @param src_width source image width
+   @param src_height source image height
+   @param dst_data destination image data
+   @param dst_step destination image step
+   @param dst_width destination image width
+   @param dst_height destination image height
+   @param map map for xy values
+   @param map_step map matrix step
+   @param interpolation interpolation mode (CV_HAL_INTER_NEAREST, ...)
+   @param border_type border processing mode (CV_HAL_BORDER_REFLECT, ...)
+   @param border_value values to use for CV_HAL_BORDER_CONSTANT mode
+   @sa cv::remap
+ */
+inline int hal_ni_remap32fc2(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height,
+                             uchar *dst_data, size_t dst_step, int dst_width, int dst_height,
+                             float* map, size_t map_step, int interpolation, int border_type, const double border_value[4])
+{ return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+/**
+   @brief hal_remap with fixed-point maps
+   @param src_type source and destination image type
+   @param src_data source image data
+   @param src_step source image step
+   @param src_width source image width
+   @param src_height source image height
+   @param dst_data destination image data
+   @param dst_step destination image step
+   @param dst_width destination image width
+   @param dst_height destination image height
+   @param mapx map for x values
+   @param mapx_step mapx matrix step
+   @param mapy map for y values
+   @param mapy_step mapy matrix step
+   @param interpolation interpolation mode (CV_HAL_INTER_NEAREST, ...)
+   @param border_type border processing mode (CV_HAL_BORDER_REFLECT, ...)
+   @param border_value values to use for CV_HAL_BORDER_CONSTANT mode
+   @sa cv::remap
+ */
+inline int hal_ni_remap16s(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height,
+                           uchar *dst_data, size_t dst_step, int dst_width, int dst_height,
+                           short* mapx, size_t mapx_step, ushort* mapy, size_t mapy_step,
+                           int interpolation, int border_type, const double border_value[4])
+{ return CV_HAL_ERROR_NOT_IMPLEMENTED; }

 //! @cond IGNORED
 #define cv_hal_remap32f hal_ni_remap32f
+#define cv_hal_remap32fc2 hal_ni_remap32fc2
+#define cv_hal_remap16s hal_ni_remap16s
 //! @endcond

 /**
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -1720,6 +1720,16 @@ void cv::remap( InputArray _src, OutputArray _dst,
        CALL_HAL(remap32f, cv_hal_remap32f, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
                 map1.ptr<float>(), map1.step, map2.ptr<float>(), map2.step, interpolation, borderType, borderValue.val);
    }
+    if ((map1.type() == CV_32FC2) && map2.empty())
+    {
+        CALL_HAL(remap32fc2, cv_hal_remap32fc2, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
+                 map1.ptr<float>(), map1.step, interpolation, borderType, borderValue.val);
+    }
+    if ((map1.type() == CV_16SC2) && (map2.empty() || map2.type() == CV_16UC1))
+    {
+        CALL_HAL(remap16s, cv_hal_remap16s, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
+                 map1.ptr<short>(), map1.step, map2.ptr<ushort>(), map2.step, interpolation, borderType, borderValue.val);
+    }

    interpolation &= ~WARP_RELATIVE_MAP;
    if( interpolation == INTER_AREA )