Use LMUL=2 in the RISC-V Vector (RVV) FP16 part.

2025-07-20 11:06:38 +08:00 · 2024-10-24 15:08:43 +08:00 · 2024-10-24 15:08:43 +08:00 · a59a66a2c7
commit a59a66a2c7
parent a95658f106
2 changed files with 432 additions and 472 deletions
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
--- a/modules/core/src/matmul.simd.hpp
+++ b/modules/core/src/matmul.simd.hpp
@ -1595,7 +1595,10 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn,
 static void
 transform_32f( const float* src, float* dst, const float* m, int len, int scn, int dcn )
 {
-#if (CV_SIMD || CV_SIMD_SCALABLE) && !defined(__aarch64__) && !defined(_M_ARM64)
+// Disabled for RISC-V Vector (scalable), because of:
+// 1. v_matmuladd for RVV is 128-bit only but not scalable, this will fail the test `Core_Transform.accuracy`.
+// 2. Both gcc and clang can autovectorize this, with better performance than using Universal intrinsic.
+#if (CV_SIMD || CV_SIMD_SCALABLE) && !defined(__aarch64__) && !defined(_M_ARM64) && !(CV_TRY_RVV && CV_RVV)
    int x = 0;
    if( scn == 3 && dcn == 3 )
    {