mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 14:06:27 +08:00
fix exp, log | enable ui for log | strengthen test
Co-authored-by: Liutong HAN <liutong2020@iscas.ac.cn>
This commit is contained in:
parent
524d8ae01c
commit
0fed1fa184
19
3rdparty/hal_rvv/hal_rvv_1p0/exp.hpp
vendored
19
3rdparty/hal_rvv/hal_rvv_1p0/exp.hpp
vendored
@ -123,8 +123,8 @@ inline int exp32f(const float* src, float* dst, int _len)
|
||||
vl = __riscv_vsetvl_e32m4(len);
|
||||
auto x0 = __riscv_vle32_v_f32m4(src, vl);
|
||||
|
||||
x0 = __riscv_vfmerge(x0, detail::exp_min_val, __riscv_vmflt(x0, detail::exp_min_val, vl), vl);
|
||||
x0 = __riscv_vfmerge(x0, detail::exp_max_val, __riscv_vmfgt(x0, detail::exp_max_val, vl), vl);
|
||||
x0 = __riscv_vfmax(x0, detail::exp_min_val, vl);
|
||||
x0 = __riscv_vfmin(x0, detail::exp_max_val, vl);
|
||||
x0 = __riscv_vfmul(x0, detail::exp_prescale, vl);
|
||||
|
||||
auto xi = __riscv_vfcvt_rtz_x_f_v_i32m4(x0, vl);
|
||||
@ -133,8 +133,8 @@ inline int exp32f(const float* src, float* dst, int _len)
|
||||
|
||||
auto t = __riscv_vsra(xi, detail::exp_scale, vl);
|
||||
t = __riscv_vadd(t, 127, vl);
|
||||
t = __riscv_vmerge(t, 0, __riscv_vmslt(t, 0, vl), vl);
|
||||
t = __riscv_vand(t, 255, vl);
|
||||
t = __riscv_vmax(t, 0, vl);
|
||||
t = __riscv_vmin(t, 255, vl);
|
||||
auto buf = __riscv_vreinterpret_f32m4(__riscv_vsll(t, 23, vl));
|
||||
|
||||
auto _xi = __riscv_vreinterpret_u32m4(xi);
|
||||
@ -158,6 +158,7 @@ inline int exp64f(const double* src, double* dst, int _len)
|
||||
{
|
||||
size_t vl = __riscv_vsetvlmax_e64m4();
|
||||
// all vector registers are used up, so not load more constants
|
||||
auto exp_a2 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a2, vl);
|
||||
auto exp_a3 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a3, vl);
|
||||
auto exp_a4 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a4, vl);
|
||||
auto exp_a5 = __riscv_vfmv_v_f_f64m4(detail::exp64f_a5, vl);
|
||||
@ -166,8 +167,8 @@ inline int exp64f(const double* src, double* dst, int _len)
|
||||
vl = __riscv_vsetvl_e64m4(len);
|
||||
auto x0 = __riscv_vle64_v_f64m4(src, vl);
|
||||
|
||||
x0 = __riscv_vfmerge(x0, detail::exp_min_val, __riscv_vmflt(x0, detail::exp_min_val, vl), vl);
|
||||
x0 = __riscv_vfmerge(x0, detail::exp_max_val, __riscv_vmfgt(x0, detail::exp_max_val, vl), vl);
|
||||
x0 = __riscv_vfmax(x0, detail::exp_min_val, vl);
|
||||
x0 = __riscv_vfmin(x0, detail::exp_max_val, vl);
|
||||
x0 = __riscv_vfmul(x0, detail::exp_prescale, vl);
|
||||
|
||||
auto xi = __riscv_vfcvt_rtz_x_f_v_i64m4(x0, vl);
|
||||
@ -176,8 +177,8 @@ inline int exp64f(const double* src, double* dst, int _len)
|
||||
|
||||
auto t = __riscv_vsra(xi, detail::exp_scale, vl);
|
||||
t = __riscv_vadd(t, 1023, vl);
|
||||
t = __riscv_vmerge(t, 0, __riscv_vmslt(t, 0, vl), vl);
|
||||
t = __riscv_vand(t, 2047, vl);
|
||||
t = __riscv_vmax(t, 0, vl);
|
||||
t = __riscv_vmin(t, 2047, vl);
|
||||
auto buf = __riscv_vreinterpret_f64m4(__riscv_vsll(t, 52, vl));
|
||||
|
||||
auto _xi = __riscv_vreinterpret_u64m4(xi);
|
||||
@ -186,7 +187,7 @@ inline int exp64f(const double* src, double* dst, int _len)
|
||||
|
||||
auto res = __riscv_vfmul(buf, tab_v, vl);
|
||||
auto xn = __riscv_vfadd(__riscv_vfmul(x0, detail::exp64f_a0, vl), detail::exp64f_a1, vl);
|
||||
xn = __riscv_vfadd(__riscv_vfmul(x0, xn, vl), detail::exp64f_a2, vl);
|
||||
xn = __riscv_vfmadd(xn, x0, exp_a2, vl);
|
||||
xn = __riscv_vfmadd(xn, x0, exp_a3, vl);
|
||||
xn = __riscv_vfmadd(xn, x0, exp_a4, vl);
|
||||
xn = __riscv_vfmadd(xn, x0, exp_a5, vl);
|
||||
|
4
3rdparty/hal_rvv/hal_rvv_1p0/log.hpp
vendored
4
3rdparty/hal_rvv/hal_rvv_1p0/log.hpp
vendored
@ -324,7 +324,7 @@ inline int log32f(const float* src, float* dst, int _len)
|
||||
tab_v = __riscv_vluxei32(detail::log_tab_32f, __riscv_vadd(idx, 4, vl), vl);
|
||||
auto buf_f = __riscv_vreinterpret_f32m4(buf_i);
|
||||
auto x0 = __riscv_vfmul(__riscv_vfsub(buf_f, 1.f, vl), tab_v, vl);
|
||||
x0 = __riscv_vfsub(__riscv_vmseq(idx, (uint32_t)510 * 4, vl), x0, 1.f / 512, vl);
|
||||
x0 = __riscv_vfsub_mu(__riscv_vmseq(idx, (uint32_t)510 * 4, vl), x0, x0, 1.f / 512, vl);
|
||||
|
||||
auto res = __riscv_vfadd(__riscv_vfmul(x0, detail::log32f_a0, vl), detail::log32f_a1, vl);
|
||||
res = __riscv_vfmadd(res, x0, log_a2, vl);
|
||||
@ -361,7 +361,7 @@ inline int log64f(const double* src, double* dst, int _len)
|
||||
tab_v = __riscv_vluxei64(detail::log_tab_64f, __riscv_vadd(idx, 8, vl), vl);
|
||||
auto buf_f = __riscv_vreinterpret_f64m4(buf_i);
|
||||
auto x0 = __riscv_vfmul(__riscv_vfsub(buf_f, 1.0, vl), tab_v, vl);
|
||||
x0 = __riscv_vfsub(__riscv_vmseq(idx, (uint64_t)510 * 8, vl), x0, 1. / 512, vl);
|
||||
x0 = __riscv_vfsub_mu(__riscv_vmseq(idx, (uint64_t)510 * 8, vl), x0, x0, 1. / 512, vl);
|
||||
|
||||
auto res = __riscv_vfadd(__riscv_vfmul(x0, detail::log64f_a0, vl), detail::log64f_a1, vl);
|
||||
res = __riscv_vfadd(__riscv_vfmul(x0, res, vl), detail::log64f_a2, vl);
|
||||
|
@ -509,6 +509,28 @@ inline v_float64 v_lut(const double* tab, const v_int32& vidx) \
|
||||
#endif
|
||||
|
||||
|
||||
// Strangely, __riscv_vluxseg2ei32 is slower (tested on Muse-Pi and CanMV K230)
|
||||
#define OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(_Tpvec, _Tp, suffix) \
|
||||
inline void v_lut_deinterleave(const _Tp* tab, const v_int32& vidx, _Tpvec& vx, _Tpvec& vy) \
|
||||
{ \
|
||||
v_uint32 vidx_ = __riscv_vmul(__riscv_vreinterpret_u32m2(vidx), sizeof(_Tp), VTraits<v_int32>::vlanes()); \
|
||||
vx = __riscv_vluxei32(tab, vidx_, VTraits<_Tpvec>::vlanes()); \
|
||||
vy = __riscv_vluxei32(tab, __riscv_vadd(vidx_, sizeof(_Tp), VTraits<v_int32>::vlanes()), VTraits<_Tpvec>::vlanes()); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(v_float32, float, f32)
|
||||
OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(v_int32, int, i32)
|
||||
OPENCV_HAL_IMPL_RVV_LUT_DEINTERLEAVE(v_uint32, unsigned, u32)
|
||||
|
||||
#if CV_SIMD_SCALABLE_64F
|
||||
inline void v_lut_deinterleave(const double* tab, const v_int32& vidx, v_float64& vx, v_float64& vy) \
|
||||
{ \
|
||||
vuint32m1_t vidx_ = __riscv_vmul(__riscv_vlmul_trunc_u32m1(__riscv_vreinterpret_u32m2(vidx)), sizeof(double), VTraits<v_float64>::vlanes()); \
|
||||
vx = __riscv_vluxei32(tab, vidx_, VTraits<v_float64>::vlanes()); \
|
||||
vy = __riscv_vluxei32(tab, __riscv_vadd(vidx_, sizeof(double), VTraits<v_int32>::vlanes()), VTraits<v_float64>::vlanes()); \
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
inline v_uint8 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); }
|
||||
inline v_uint8 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); }
|
||||
inline v_uint8 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); }
|
||||
|
@ -771,7 +771,7 @@ void log32f( const float *_x, float *y, int n )
|
||||
int i = 0;
|
||||
const int* x = (const int*)_x;
|
||||
|
||||
#if CV_SIMD
|
||||
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||
const int VECSZ = VTraits<v_float32>::vlanes();
|
||||
const v_float32 vln2 = vx_setall_f32((float)ln_2);
|
||||
const v_float32 v1 = vx_setall_f32(1.f);
|
||||
@ -846,7 +846,7 @@ void log64f( const double *x, double *y, int n )
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD_64F
|
||||
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||
const int VECSZ = VTraits<v_float64>::vlanes();
|
||||
const v_float64 vln2 = vx_setall_f64(ln_2);
|
||||
|
||||
|
@ -1099,7 +1099,7 @@ struct ExpOp : public BaseElemWiseOp
|
||||
}
|
||||
void getValueRange(int depth, double& minval, double& maxval)
|
||||
{
|
||||
maxval = depth == CV_32F ? 50 : 100;
|
||||
maxval = depth == CV_32F ? 80 : 700;
|
||||
minval = -maxval;
|
||||
}
|
||||
void op(const vector<Mat>& src, Mat& dst, const Mat&)
|
||||
|
@ -74,7 +74,8 @@ TEST_P(mathfuncs, accuracy)
|
||||
int n = 100;
|
||||
|
||||
Mat src(1, n, depth), dst(1, n, depth), dst0(1, n, depth);
|
||||
randu(src, 1, 10);
|
||||
double maxval = depth == CV_32F ? 80 : 700;
|
||||
randu(src, -maxval, maxval);
|
||||
|
||||
switch (nfunc)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user