diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/exp.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/exp.hpp
index ea7f0c67d4..60efee2b8b 100644
--- a/3rdparty/hal_rvv/hal_rvv_1p0/exp.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/exp.hpp
@@ -123,8 +123,8 @@ inline int exp32f(const float* src, float* dst, int _len)
         vl = __riscv_vsetvl_e32m4(len);
         auto x0 = __riscv_vle32_v_f32m4(src, vl);
 
-        x0 = __riscv_vfmerge(x0, detail::exp_min_val, __riscv_vmflt(x0, detail::exp_min_val, vl), vl);
-        x0 = __riscv_vfmerge(x0, detail::exp_max_val, __riscv_vmfgt(x0, detail::exp_max_val, vl), vl);
+        x0 = __riscv_vfmax(x0, detail::exp_min_val, vl);
+        x0 = __riscv_vfmin(x0, detail::exp_max_val, vl);
         x0 = __riscv_vfmul(x0, detail::exp_prescale, vl);
 
         auto xi = __riscv_vfcvt_rtz_x_f_v_i32m4(x0, vl);
@@ -133,8 +133,8 @@ inline int exp32f(const float* src, float* dst, int _len)
 
         auto t = __riscv_vsra(xi, detail::exp_scale, vl);
         t = __riscv_vadd(t, 127, vl);
-        t = __riscv_vmerge(t, 0, __riscv_vmslt(t, 0, vl), vl);
-        t = __riscv_vand(t, 255, vl);
+        t = __riscv_vmax(t, 0, vl);
+        t = __riscv_vmin(t, 255, vl);
         auto buf = __riscv_vreinterpret_f32m4(__riscv_vsll(t, 23, vl));
 
         auto _xi = __riscv_vreinterpret_u32m4(xi);
@@ -158,6 +158,7 @@ inline int exp64f(const double* src, double* dst, int _len)
 {
     size_t vl = __riscv_vsetvlmax_e64m4();
     // all vector registers are used up, so not load more constants
+    auto exp_a2 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a2, vl);
     auto exp_a3 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a3, vl);
     auto exp_a4 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a4, vl);
     auto exp_a5 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a5, vl);
@@ -166,8 +167,8 @@ inline int exp64f(const double* src, double* dst, int _len)
         vl = __riscv_vsetvl_e64m4(len);
         auto x0 = __riscv_vle64_v_f64m4(src, vl);
 
-        x0 = __riscv_vfmerge(x0, detail::exp_min_val, __riscv_vmflt(x0, detail::exp_min_val, vl), vl);
-        x0 = __riscv_vfmerge(x0, detail::exp_max_val, __riscv_vmfgt(x0, detail::exp_max_val, vl), vl);
+        x0 = __riscv_vfmax(x0, detail::exp_min_val, vl);
+        x0 = __riscv_vfmin(x0, detail::exp_max_val, vl);
         x0 = __riscv_vfmul(x0, detail::exp_prescale, vl);
 
         auto xi = __riscv_vfcvt_rtz_x_f_v_i64m4(x0, vl);
@@ -176,8 +177,8 @@ inline int exp64f(const double* src, double* dst, int _len)
 
         auto t = __riscv_vsra(xi, detail::exp_scale, vl);
         t = __riscv_vadd(t, 1023, vl);
-        t = __riscv_vmerge(t, 0, __riscv_vmslt(t, 0, vl), vl);
-        t = __riscv_vand(t, 2047, vl);
+        t = __riscv_vmax(t, 0, vl);
+        t = __riscv_vmin(t, 2047, vl);
         auto buf = __riscv_vreinterpret_f64m4(__riscv_vsll(t, 52, vl));
 
         auto _xi = __riscv_vreinterpret_u64m4(xi);
@@ -186,7 +187,7 @@ inline int exp64f(const double* src, double* dst, int _len)
 
         auto res = __riscv_vfmul(buf, tab_v, vl);
         auto xn = __riscv_vfadd(__riscv_vfmul(x0, detail::exp64f_a0, vl), detail::exp64f_a1, vl);
-        xn = __riscv_vfadd(__riscv_vfmul(x0, xn, vl), detail::exp64f_a2, vl);
+        xn = __riscv_vfmadd(xn, x0, exp_a2, vl);
         xn = __riscv_vfmadd(xn, x0, exp_a3, vl);
         xn = __riscv_vfmadd(xn, x0, exp_a4, vl);
         xn = __riscv_vfmadd(xn, x0, exp_a5, vl);
diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/log.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/log.hpp
index 4d8399ee88..02c62f4400 100644
--- a/3rdparty/hal_rvv/hal_rvv_1p0/log.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/log.hpp
@@ -324,7 +324,7 @@ inline int log32f(const float* src, float* dst, int _len)
         tab_v = __riscv_vluxei32(detail::log_tab_32f, __riscv_vadd(idx, 4, vl), vl);
         auto buf_f = __riscv_vreinterpret_f32m4(buf_i);
         auto x0 = __riscv_vfmul(__riscv_vfsub(buf_f, 1.f, vl), tab_v, vl);
-        x0 = __riscv_vfsub(__riscv_vmseq(idx, (uint32_t)510 * 4, vl), x0, 1.f / 512, vl);
+        x0 = __riscv_vfsub_mu(__riscv_vmseq(idx, (uint32_t)510 * 4, vl), x0, x0, 1.f / 512, vl);
 
         auto res = __riscv_vfadd(__riscv_vfmul(x0, detail::log32f_a0, vl), detail::log32f_a1, vl);
         res = __riscv_vfmadd(res, x0, log_a2, vl);
@@ -361,7 +361,7 @@ inline int log64f(const double* src, double* dst, int _len)
         tab_v = __riscv_vluxei64(detail::log_tab_64f, __riscv_vadd(idx, 8, vl), vl);
         auto buf_f = __riscv_vreinterpret_f64m4(buf_i);
         auto x0 = __riscv_vfmul(__riscv_vfsub(buf_f, 1.0, vl), tab_v, vl);
-        x0 = __riscv_vfsub(__riscv_vmseq(idx, (uint64_t)510 * 8, vl), x0, 1. / 512, vl);
+        x0 = __riscv_vfsub_mu(__riscv_vmseq(idx, (uint64_t)510 * 8, vl), x0, x0, 1. / 512, vl);
 
         auto res = __riscv_vfadd(__riscv_vfmul(x0, detail::log64f_a0, vl), detail::log64f_a1, vl);
         res = __riscv_vfadd(__riscv_vfmul(x0, res, vl), detail::log64f_a2, vl);
diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
index 7912e1fdc4..85acd09d4b 100644
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
@@ -509,6 +509,28 @@ inline v_float64 v_lut(const double* tab, const v_int32& vidx) \
 #endif
 
 
+// Strangely, __riscv_vluxseg2ei32 is slower (tested on Muse-Pi and CanMV K230)
+#define OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(_Tpvec, _Tp, suffix) \
+inline void v_lut_deinterleave(const _Tp* tab, const v_int32& vidx, _Tpvec& vx, _Tpvec& vy) \
+{ \
+    v_uint32 vidx_ = __riscv_vmul(__riscv_vreinterpret_u32m2(vidx), sizeof(_Tp), VTraits<v_int32>::vlanes()); \
+    vx = __riscv_vluxei32(tab, vidx_, VTraits<_Tpvec>::vlanes()); \
+    vy = __riscv_vluxei32(tab, __riscv_vadd(vidx_, sizeof(_Tp), VTraits<v_int32>::vlanes()), VTraits<_Tpvec>::vlanes()); \
+}
+OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(v_float32, float, f32)
+OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(v_int32, int, i32)
+OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(v_uint32, unsigned, u32)
+
+#if CV_SIMD_SCALABLE_64F
+inline void v_lut_deinterleave(const double* tab, const v_int32& vidx, v_float64& vx, v_float64& vy) \
+{ \
+    vuint32m1_t vidx_ = __riscv_vmul(__riscv_vlmul_trunc_u32m1(__riscv_vreinterpret_u32m2(vidx)), sizeof(double), VTraits<v_float64>::vlanes()); \
+    vx = __riscv_vluxei32(tab, vidx_, VTraits<v_float64>::vlanes()); \
+    vy = __riscv_vluxei32(tab, __riscv_vadd(vidx_, sizeof(double), VTraits<v_int32>::vlanes()), VTraits<v_float64>::vlanes()); \
+}
+#endif
+
+
 inline v_uint8 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); }
 inline v_uint8 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); }
 inline v_uint8 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); }
diff --git a/modules/core/src/mathfuncs_core.simd.hpp b/modules/core/src/mathfuncs_core.simd.hpp
index f92234f140..41a3261c64 100644
--- a/modules/core/src/mathfuncs_core.simd.hpp
+++ b/modules/core/src/mathfuncs_core.simd.hpp
@@ -771,7 +771,7 @@ void log32f( const float *_x, float *y, int n )
     int i = 0;
     const int* x = (const int*)_x;
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     const int VECSZ = VTraits<v_float32>::vlanes();
     const v_float32 vln2 = vx_setall_f32((float)ln_2);
     const v_float32 v1 = vx_setall_f32(1.f);
@@ -846,7 +846,7 @@ void log64f( const double *x, double *y, int n )
 
     int i = 0;
 
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     const int VECSZ = VTraits<v_float64>::vlanes();
     const v_float64 vln2 = vx_setall_f64(ln_2);
 
diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp
index 721cf70b6d..88d646b09f 100644
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -1099,7 +1099,7 @@ struct ExpOp : public BaseElemWiseOp
     }
     void getValueRange(int depth, double& minval, double& maxval)
     {
-        maxval = depth == CV_32F ? 50 : 100;
+        maxval = depth == CV_32F ? 80 : 700;
         minval = -maxval;
     }
     void op(const vector<Mat>& src, Mat& dst, const Mat&)
diff --git a/modules/core/test/test_hal_core.cpp b/modules/core/test/test_hal_core.cpp
index a86016d44f..6e85cce99b 100644
--- a/modules/core/test/test_hal_core.cpp
+++ b/modules/core/test/test_hal_core.cpp
@@ -74,7 +74,8 @@ TEST_P(mathfuncs, accuracy)
     int n = 100;
 
     Mat src(1, n, depth), dst(1, n, depth), dst0(1, n, depth);
-    randu(src, 1, 10);
+    double maxval = depth == CV_32F ? 80 : 700;
+    randu(src, -maxval, maxval);
 
     switch (nfunc)
     {