diff --git a/3rdparty/hal_rvv/hal_rvv.hpp b/3rdparty/hal_rvv/hal_rvv.hpp index 83ae132e24..3b0d596d9d 100644 --- a/3rdparty/hal_rvv/hal_rvv.hpp +++ b/3rdparty/hal_rvv/hal_rvv.hpp @@ -22,6 +22,7 @@ #if defined(__riscv_v) && __riscv_v == 1000000 #include "hal_rvv_1p0/merge.hpp" // core #include "hal_rvv_1p0/mean.hpp" // core +#include "hal_rvv_1p0/dxt.hpp" // core #include "hal_rvv_1p0/norm.hpp" // core #include "hal_rvv_1p0/norm_diff.hpp" // core #include "hal_rvv_1p0/norm_hamming.hpp" // core diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/dxt.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/dxt.hpp new file mode 100644 index 0000000000..3c763b386b --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/dxt.hpp @@ -0,0 +1,569 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef OPENCV_HAL_RVV_DXT_HPP_INCLUDED +#define OPENCV_HAL_RVV_DXT_HPP_INCLUDED + +#include + +namespace cv { namespace cv_hal_rvv { namespace dxt { + +#undef cv_hal_dft +#define cv_hal_dft cv::cv_hal_rvv::dxt::dft + +template struct rvv; + +template<> struct rvv +{ + using T = vfloat32mf2_t; + static inline size_t vsetvl_itab(size_t a) { return __riscv_vsetvl_e32m8(a); } + static inline vuint32m8_t vlse_itab(const uint* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_u32m8(a, b, c); } + static inline vfloat32m8_t vlse_itab_f(const float* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_f32m8(a, b, c); } + static inline void vsse_itab(float* a, ptrdiff_t b, vfloat32m8_t c, size_t d) { return __riscv_vsse32(a, b, c, d); } + static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e32mf2(a); } + static inline vfloat32m1_t vfmv_s(float a, size_t b) { return __riscv_vfmv_s_f_f32m1(a, b); } + static inline void vlseg(const float* a, T& b, T& c, size_t d) { auto x = __riscv_vlseg2e32_v_f32mf2x2(a, d); b = __riscv_vget_v_f32mf2x2_f32mf2(x, 0), c = __riscv_vget_v_f32mf2x2_f32mf2(x, 1); } + static inline void vlsseg(const float* a, ptrdiff_t b, T& c, T& d, size_t e) { auto x = __riscv_vlsseg2e32_v_f32mf2x2(a, b, e); c = __riscv_vget_v_f32mf2x2_f32mf2(x, 0), d = __riscv_vget_v_f32mf2x2_f32mf2(x, 1); } + static inline void vsseg(float* a, T b, T c, size_t d) { __riscv_vsseg2e32(a, __riscv_vset_v_f32mf2_f32mf2x2(__riscv_vset_v_f32mf2_f32mf2x2(vfloat32mf2x2_t(), 0, b), 1, c), d); } +}; + +template<> struct rvv +{ + using T = vfloat64m1_t; + static inline size_t vsetvl_itab(size_t a) { return __riscv_vsetvl_e32m4(a); } + static inline vuint32m4_t vlse_itab(const uint* a, ptrdiff_t b, size_t c) { return __riscv_vlse32_v_u32m4(a, b, c); } + static inline vfloat64m8_t vlse_itab_f(const double* a, ptrdiff_t b, size_t c) { return __riscv_vlse64_v_f64m8(a, b, c); } + static inline void vsse_itab(double* a, ptrdiff_t b, vfloat64m8_t c, size_t d) { return __riscv_vsse64(a, b, c, d); } + static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e64m1(a); } + static inline vfloat64m1_t vfmv_s(double a, size_t b) { return __riscv_vfmv_s_f_f64m1(a, b); } + static inline void vlseg(const double* a, T& b, T& c, size_t d) { auto x = __riscv_vlseg2e64_v_f64m1x2(a, d); b = __riscv_vget_v_f64m1x2_f64m1(x, 0), c = __riscv_vget_v_f64m1x2_f64m1(x, 1); } + static inline void vlsseg(const double* a, ptrdiff_t b, T& c, T& d, size_t e) { auto x = __riscv_vlsseg2e64_v_f64m1x2(a, b, e); c = __riscv_vget_v_f64m1x2_f64m1(x, 0), d = __riscv_vget_v_f64m1x2_f64m1(x, 1); } + static inline void vsseg(double* a, T b, T c, size_t d) { __riscv_vsseg2e64(a, __riscv_vset_v_f64m1_f64m1x2(__riscv_vset_v_f64m1_f64m1x2(vfloat64m1x2_t(), 0, b), 1, c), d); } +}; + +// the algorithm is copied from core/src/dxt.cpp, +// in the function template static void cv::DFT and cv::DFT_R2, cv::DFT_R3, cv::DFT_R5 +template +inline int dft(const Complex* src, Complex* dst, int nf, int *factors, T scale, int* itab, + const Complex* wave, int tab_size, int len, bool isInverse, bool noPermute) +{ + int n = len; + int f_idx, nx; + int dw0 = tab_size, dw; + int i, j, k; + Complex t; + using VT = typename rvv::T; + + int tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n; + int vl; + + // 0. shuffle data + if( dst != src ) + { + if( !isInverse ) + { + for( i = 0; i < n; i += vl ) + { + vl = rvv::vsetvl_itab(n - i); + auto vec_itab = rvv::vlse_itab(reinterpret_cast(itab + i * tab_step), sizeof(int) * tab_step, vl); + vec_itab = __riscv_vmul(vec_itab, sizeof(T) * 2, vl); + auto vec_src_re = __riscv_vloxei32(reinterpret_cast(src), vec_itab, vl); + vec_itab = __riscv_vadd(vec_itab, sizeof(T), vl); + auto vec_src_im = __riscv_vloxei32(reinterpret_cast(src), vec_itab, vl); + rvv::vsse_itab(reinterpret_cast(dst + i), sizeof(T) * 2, vec_src_re, vl); + rvv::vsse_itab(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vec_src_im, vl); + } + } + else + { + for( i = 0; i < n; i += vl ) + { + vl = rvv::vsetvl_itab(n - i); + auto vec_itab = rvv::vlse_itab(reinterpret_cast(itab + i * tab_step), sizeof(int) * tab_step, vl); + vec_itab = __riscv_vmul(vec_itab, sizeof(T) * 2, vl); + auto vec_src_re = __riscv_vloxei32(reinterpret_cast(src), vec_itab, vl); + vec_itab = __riscv_vadd(vec_itab, sizeof(T), vl); + auto vec_src_im = __riscv_vloxei32(reinterpret_cast(src), vec_itab, vl); + vec_src_im = __riscv_vfneg(vec_src_im, vl); + rvv::vsse_itab(reinterpret_cast(dst + i), sizeof(T) * 2, vec_src_re, vl); + rvv::vsse_itab(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vec_src_im, vl); + } + } + } + else + { + // copied from core/src/dxt.cpp, it is slow to swap elements by intrinsics + if( !noPermute ) + { + if( nf == 1 ) + { + if( (n & 3) == 0 ) + { + int n2 = n/2; + Complex* dsth = dst + n2; + + for( i = 0; i < n2; i += 2, itab += tab_step*2 ) + { + j = itab[0]; + + t = dst[i+1], dst[i+1] = dsth[j], dsth[j] = t; + if( j > i ) + { + t = dst[i], dst[i] = dst[j], dst[j] = t; + t = dsth[i+1], dsth[i+1] = dsth[j+1], dsth[j+1] = t; + } + } + } + // else do nothing + } + else + { + for( i = 0; i < n; i++, itab += tab_step ) + { + j = itab[0]; + if( j > i ) + t = dst[i], dst[i] = dst[j], dst[j] = t; + } + } + } + + if( isInverse ) + { + for( i = 0; i < n; i += vl ) + { + vl = rvv::vsetvl_itab(n - i); + auto vec_src_im = rvv::vlse_itab_f(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vl); + vec_src_im = __riscv_vfneg(vec_src_im, vl); + rvv::vsse_itab(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vec_src_im, vl); + } + } + } + + n = 1; + // 1. power-2 transforms + if( (factors[0] & 1) == 0 ) + { + // radix-4 transform + for( ; n*4 <= factors[0]; ) + { + nx = n; + n *= 4; + dw0 /= 4; + + for( i = 0; i < len; i += n ) + { + Complex *v0, *v1; + T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4; + + v0 = dst + i; + v1 = v0 + nx*2; + + r0 = v1[0].re; i0 = v1[0].im; + r4 = v1[nx].re; i4 = v1[nx].im; + + r1 = r0 + r4; i1 = i0 + i4; + r3 = i0 - i4; i3 = r4 - r0; + + r2 = v0[0].re; i2 = v0[0].im; + r4 = v0[nx].re; i4 = v0[nx].im; + + r0 = r2 + r4; i0 = i2 + i4; + r2 -= r4; i2 -= i4; + + v0[0].re = r0 + r1; v0[0].im = i0 + i1; + v1[0].re = r0 - r1; v1[0].im = i0 - i1; + v0[nx].re = r2 + r3; v0[nx].im = i2 + i3; + v1[nx].re = r2 - r3; v1[nx].im = i2 - i3; + + for( j = 1; j < nx; j += vl ) + { + vl = rvv::vsetvl(nx - j); + v0 = dst + i + j; + v1 = v0 + nx*2; + + VT vec_re, vec_im, vec_w_re, vec_w_im; + rvv::vlseg(reinterpret_cast(v1), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0), sizeof(T) * dw0 * 2, vec_w_re, vec_w_im, vl); + auto vec_r0 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + auto vec_i0 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + + rvv::vlseg(reinterpret_cast(v1 + nx), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0 * 3), sizeof(T) * dw0 * 6, vec_w_re, vec_w_im, vl); + auto vec_r3 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + auto vec_i3 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + + auto vec_r1 = __riscv_vfadd(vec_i0, vec_i3, vl); + auto vec_i1 = __riscv_vfadd(vec_r0, vec_r3, vl); + vec_r3 = __riscv_vfsub(vec_r0, vec_r3, vl); + vec_i3 = __riscv_vfsub(vec_i3, vec_i0, vl); + VT vec_r4, vec_i4; + rvv::vlseg(reinterpret_cast(v0), vec_r4, vec_i4, vl); + + rvv::vlseg(reinterpret_cast(v0 + nx), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0 * 2), sizeof(T) * dw0 * 4, vec_w_re, vec_w_im, vl); + auto vec_r2 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i2 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + vec_r0 = __riscv_vfadd(vec_r4, vec_r2, vl); + vec_i0 = __riscv_vfadd(vec_i4, vec_i2, vl); + + rvv::vsseg(reinterpret_cast(v0), __riscv_vfadd(vec_r0, vec_r1, vl), __riscv_vfadd(vec_i0, vec_i1, vl), vl); + rvv::vsseg(reinterpret_cast(v1), __riscv_vfsub(vec_r0, vec_r1, vl), __riscv_vfsub(vec_i0, vec_i1, vl), vl); + + vec_r2 = __riscv_vfsub(vec_r4, vec_r2, vl); + vec_i2 = __riscv_vfsub(vec_i4, vec_i2, vl); + + rvv::vsseg(reinterpret_cast(v0 + nx), __riscv_vfadd(vec_r2, vec_r3, vl), __riscv_vfadd(vec_i2, vec_i3, vl), vl); + rvv::vsseg(reinterpret_cast(v1 + nx), __riscv_vfsub(vec_r2, vec_r3, vl), __riscv_vfsub(vec_i2, vec_i3, vl), vl); + } + } + } + + for( ; n < factors[0]; ) + { + // do the remaining radix-2 transform + nx = n; + n *= 2; + dw0 /= 2; + + for( i = 0; i < len; i += n ) + { + Complex* v = dst + i; + T r0 = v[0].re + v[nx].re; + T i0 = v[0].im + v[nx].im; + T r1 = v[0].re - v[nx].re; + T i1 = v[0].im - v[nx].im; + v[0].re = r0; v[0].im = i0; + v[nx].re = r1; v[nx].im = i1; + + for( j = 1; j < nx; j += vl ) + { + vl = rvv::vsetvl(nx - j); + v = dst + i + j; + + VT vec_re, vec_im, vec_w_re, vec_w_im; + rvv::vlseg(reinterpret_cast(v + nx), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0), sizeof(T) * dw0 * 2, vec_w_re, vec_w_im, vl); + + auto vec_r1 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i1 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + VT vec_r0, vec_i0; + rvv::vlseg(reinterpret_cast(v), vec_r0, vec_i0, vl); + + rvv::vsseg(reinterpret_cast(v), __riscv_vfadd(vec_r0, vec_r1, vl), __riscv_vfadd(vec_i0, vec_i1, vl), vl); + rvv::vsseg(reinterpret_cast(v + nx), __riscv_vfsub(vec_r0, vec_r1, vl), __riscv_vfsub(vec_i0, vec_i1, vl), vl); + } + } + } + } + + // 2. all the other transforms + for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ ) + { + int factor = factors[f_idx]; + nx = n; + n *= factor; + dw0 /= factor; + + if( factor == 3 ) + { + const T sin_120 = 0.86602540378443864676372317075294; + for( i = 0; i < len; i += n ) + { + Complex* v = dst + i; + T r1 = v[nx].re + v[nx*2].re; + T i1 = v[nx].im + v[nx*2].im; + T r0 = v[0].re; + T i0 = v[0].im; + T r2 = sin_120*(v[nx].im - v[nx*2].im); + T i2 = sin_120*(v[nx*2].re - v[nx].re); + v[0].re = r0 + r1; v[0].im = i0 + i1; + r0 -= (T)0.5*r1; i0 -= (T)0.5*i1; + v[nx].re = r0 + r2; v[nx].im = i0 + i2; + v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; + + for( j = 1; j < nx; j += vl ) + { + vl = rvv::vsetvl(nx - j); + v = dst + i + j; + + VT vec_re, vec_im, vec_w_re, vec_w_im; + rvv::vlseg(reinterpret_cast(v + nx), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0), sizeof(T) * dw0 * 2, vec_w_re, vec_w_im, vl); + auto vec_r0 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i0 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + rvv::vlseg(reinterpret_cast(v + nx * 2), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0 * 2), sizeof(T) * dw0 * 4, vec_w_re, vec_w_im, vl); + auto vec_r2 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + auto vec_i2 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + + auto vec_r1 = __riscv_vfadd(vec_r0, vec_i2, vl); + auto vec_i1 = __riscv_vfadd(vec_i0, vec_r2, vl); + + vec_r2 = __riscv_vfmul(__riscv_vfsub(vec_i0, vec_r2, vl), sin_120, vl); + vec_i2 = __riscv_vfmul(__riscv_vfsub(vec_i2, vec_r0, vl), sin_120, vl); + rvv::vlseg(reinterpret_cast(v), vec_r0, vec_i0, vl); + + rvv::vsseg(reinterpret_cast(v), __riscv_vfadd(vec_r0, vec_r1, vl), __riscv_vfadd(vec_i0, vec_i1, vl), vl); + vec_r0 = __riscv_vfsub(vec_r0, __riscv_vfmul(vec_r1, 0.5, vl), vl); + vec_i0 = __riscv_vfsub(vec_i0, __riscv_vfmul(vec_i1, 0.5, vl), vl); + rvv::vsseg(reinterpret_cast(v + nx), __riscv_vfadd(vec_r0, vec_r2, vl), __riscv_vfadd(vec_i0, vec_i2, vl), vl); + rvv::vsseg(reinterpret_cast(v + nx * 2), __riscv_vfsub(vec_r0, vec_r2, vl), __riscv_vfsub(vec_i0, vec_i2, vl), vl); + } + } + } + else if( factor == 5 ) + { + const T fft5_2 = 0.559016994374947424102293417182819; + const T fft5_3 = -0.951056516295153572116439333379382; + const T fft5_4 = -1.538841768587626701285145288018455; + const T fft5_5 = 0.363271264002680442947733378740309; + for( i = 0; i < len; i += n ) + { + for( j = 0; j < nx; j += vl ) + { + vl = rvv::vsetvl(nx - j); + Complex* v0 = dst + i + j; + Complex* v1 = v0 + nx*2; + Complex* v2 = v1 + nx*2; + + VT vec_re, vec_im, vec_w_re, vec_w_im; + rvv::vlseg(reinterpret_cast(v0 + nx), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0), sizeof(T) * dw0 * 2, vec_w_re, vec_w_im, vl); + auto vec_r3 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i3 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + rvv::vlseg(reinterpret_cast(v2), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0 * 4), sizeof(T) * dw0 * 8, vec_w_re, vec_w_im, vl); + auto vec_r2 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i2 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + auto vec_r1 = __riscv_vfadd(vec_r3, vec_r2, vl); + auto vec_i1 = __riscv_vfadd(vec_i3, vec_i2, vl); + vec_r3 = __riscv_vfsub(vec_r3, vec_r2, vl); + vec_i3 = __riscv_vfsub(vec_i3, vec_i2, vl); + + rvv::vlseg(reinterpret_cast(v1 + nx), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0 * 3), sizeof(T) * dw0 * 6, vec_w_re, vec_w_im, vl); + auto vec_r4 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i4 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + rvv::vlseg(reinterpret_cast(v1), vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + j * dw0 * 2), sizeof(T) * dw0 * 4, vec_w_re, vec_w_im, vl); + auto vec_r0 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i0 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + vec_r2 = __riscv_vfadd(vec_r4, vec_r0, vl); + vec_i2 = __riscv_vfadd(vec_i4, vec_i0, vl); + vec_r4 = __riscv_vfsub(vec_r4, vec_r0, vl); + vec_i4 = __riscv_vfsub(vec_i4, vec_i0, vl); + + rvv::vlseg(reinterpret_cast(v0), vec_r0, vec_i0, vl); + auto vec_r5 = __riscv_vfadd(vec_r1, vec_r2, vl); + auto vec_i5 = __riscv_vfadd(vec_i1, vec_i2, vl); + + rvv::vsseg(reinterpret_cast(v0), __riscv_vfadd(vec_r0, vec_r5, vl), __riscv_vfadd(vec_i0, vec_i5, vl), vl); + + vec_r0 = __riscv_vfsub(vec_r0, __riscv_vfmul(vec_r5, 0.25, vl), vl); + vec_i0 = __riscv_vfsub(vec_i0, __riscv_vfmul(vec_i5, 0.25, vl), vl); + vec_r1 = __riscv_vfmul(__riscv_vfsub(vec_r1, vec_r2, vl), fft5_2, vl); + vec_i1 = __riscv_vfmul(__riscv_vfsub(vec_i1, vec_i2, vl), fft5_2, vl); + vec_r2 = __riscv_vfmul(__riscv_vfadd(vec_i3, vec_i4, vl), -fft5_3, vl); + vec_i2 = __riscv_vfmul(__riscv_vfadd(vec_r3, vec_r4, vl), fft5_3, vl); + + vec_i3 = __riscv_vfmul(vec_i3, -fft5_5, vl); + vec_r3 = __riscv_vfmul(vec_r3, fft5_5, vl); + vec_i4 = __riscv_vfmul(vec_i4, -fft5_4, vl); + vec_r4 = __riscv_vfmul(vec_r4, fft5_4, vl); + + vec_r5 = __riscv_vfadd(vec_r2, vec_i3, vl); + vec_i5 = __riscv_vfadd(vec_i2, vec_r3, vl); + vec_r2 = __riscv_vfsub(vec_r2, vec_i4, vl); + vec_i2 = __riscv_vfsub(vec_i2, vec_r4, vl); + + vec_r3 = __riscv_vfadd(vec_r0, vec_r1, vl); + vec_i3 = __riscv_vfadd(vec_i0, vec_i1, vl); + + rvv::vsseg(reinterpret_cast(v0 + nx), __riscv_vfadd(vec_r3, vec_r2, vl), __riscv_vfadd(vec_i3, vec_i2, vl), vl); + rvv::vsseg(reinterpret_cast(v2), __riscv_vfsub(vec_r3, vec_r2, vl), __riscv_vfsub(vec_i3, vec_i2, vl), vl); + + vec_r0 = __riscv_vfsub(vec_r0, vec_r1, vl); + vec_i0 = __riscv_vfsub(vec_i0, vec_i1, vl); + + rvv::vsseg(reinterpret_cast(v1), __riscv_vfadd(vec_r0, vec_r5, vl), __riscv_vfadd(vec_i0, vec_i5, vl), vl); + rvv::vsseg(reinterpret_cast(v1 + nx), __riscv_vfsub(vec_r0, vec_r5, vl), __riscv_vfsub(vec_i0, vec_i5, vl), vl); + } + } + } + else + { + // radix-"factor" - an odd number + int p, q, factor2 = (factor - 1)/2; + int dd, dw_f = tab_size/factor; + std::vector > buf(factor2 * 2); + Complex* a = buf.data(); + Complex* b = a + factor2; + + for( i = 0; i < len; i += n ) + { + for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) + { + Complex* v = dst + i + j; + Complex v_0 = v[0]; + Complex vn_0 = v_0; + + if( j == 0 ) + { + for( p = 1; p <= factor2; p += vl ) + { + vl = rvv::vsetvl(factor2 + 1 - p); + + VT vec_a_re, vec_a_im, vec_b_re, vec_b_im; + rvv::vlsseg(reinterpret_cast(v + nx * p), sizeof(T) * nx * 2, vec_a_re, vec_a_im, vl); + rvv::vlsseg(reinterpret_cast(v + n - nx * p), (ptrdiff_t)sizeof(T) * nx * -2, vec_b_re, vec_b_im, vl); + auto vec_r0 = __riscv_vfadd(vec_a_re, vec_b_re, vl); + auto vec_r1 = __riscv_vfsub(vec_a_re, vec_b_re, vl); + auto vec_i0 = __riscv_vfsub(vec_a_im, vec_b_im, vl); + auto vec_i1 = __riscv_vfadd(vec_a_im, vec_b_im, vl); + + vn_0.re += __riscv_vfmv_f(__riscv_vfredosum(vec_r0, rvv::vfmv_s(0, vl), vl)); + vn_0.im += __riscv_vfmv_f(__riscv_vfredosum(vec_i1, rvv::vfmv_s(0, vl), vl)); + + rvv::vsseg(reinterpret_cast(a + p - 1), vec_r0, vec_i0, vl); + rvv::vsseg(reinterpret_cast(b + p - 1), vec_r1, vec_i1, vl); + } + } + else + { + const Complex* wave_ = wave + dw*factor; + + for( p = 1; p <= factor2; p += vl ) + { + vl = rvv::vsetvl(factor2 + 1 - p); + + VT vec_re, vec_im, vec_w_re, vec_w_im; + rvv::vlsseg(reinterpret_cast(v + nx * p), sizeof(T) * nx * 2, vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave + p * dw), sizeof(T) * dw * 2, vec_w_re, vec_w_im, vl); + auto vec_r2 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i2 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + rvv::vlsseg(reinterpret_cast(v + n - nx * p), (ptrdiff_t)sizeof(T) * nx * -2, vec_re, vec_im, vl); + rvv::vlsseg(reinterpret_cast(wave_ - p * dw), (ptrdiff_t)sizeof(T) * dw * -2, vec_w_re, vec_w_im, vl); + auto vec_r1 = __riscv_vfsub(__riscv_vfmul(vec_re, vec_w_re, vl), __riscv_vfmul(vec_im, vec_w_im, vl), vl); + auto vec_i1 = __riscv_vfadd(__riscv_vfmul(vec_re, vec_w_im, vl), __riscv_vfmul(vec_im, vec_w_re, vl), vl); + + auto vec_r0 = __riscv_vfadd(vec_r2, vec_r1, vl); + auto vec_i0 = __riscv_vfsub(vec_i2, vec_i1, vl); + vec_r1 = __riscv_vfsub(vec_r2, vec_r1, vl); + vec_i1 = __riscv_vfadd(vec_i2, vec_i1, vl); + + vn_0.re += __riscv_vfmv_f(__riscv_vfredosum(vec_r0, rvv::vfmv_s(0, vl), vl)); + vn_0.im += __riscv_vfmv_f(__riscv_vfredosum(vec_i1, rvv::vfmv_s(0, vl), vl)); + + rvv::vsseg(reinterpret_cast(a + p - 1), vec_r0, vec_i0, vl); + rvv::vsseg(reinterpret_cast(b + p - 1), vec_r1, vec_i1, vl); + } + } + + v[0] = vn_0; + + for( p = 1, k = nx; p <= factor2; p++, k += nx ) + { + Complex s0 = v_0, s1 = v_0; + dd = dw_f*p; + + vl = __riscv_vsetvlmax_e32mf2(); + auto vec_dd = __riscv_vid_v_u32mf2(vl); + vec_dd = __riscv_vmul(vec_dd, dd, vl); + vec_dd = __riscv_vremu(vec_dd, tab_size, vl); + + for( q = 0; q < factor2; q += vl ) + { + vl = rvv::vsetvl(factor2 - q); + + auto vec_d = __riscv_vadd(vec_dd, (q + 1) * dd % tab_size, vl); + auto vmask = __riscv_vmsgeu(vec_d, tab_size, vl); + vec_d = __riscv_vsub_mu(vmask, vec_d, vec_d, tab_size, vl); + vec_d = __riscv_vmul(vec_d, sizeof(T) * 2, vl); + + auto vec_w = __riscv_vloxei32(reinterpret_cast(wave), vec_d, vl); + VT vec_a_re, vec_a_im, vec_b_re, vec_b_im; + rvv::vlsseg(reinterpret_cast(a + q), sizeof(T) * 2, vec_a_re, vec_a_im, vl); + rvv::vlsseg(reinterpret_cast(b + q), sizeof(T) * 2, vec_b_re, vec_b_im, vl); + auto vec_r0 = __riscv_vfmul(vec_w, vec_a_re, vl); + auto vec_r1 = __riscv_vfmul(vec_w, vec_b_im, vl); + + vec_w = __riscv_vloxei32(reinterpret_cast(wave) + 1, vec_d, vl); + auto vec_i0 = __riscv_vfmul(vec_w, vec_a_im, vl); + auto vec_i1 = __riscv_vfmul(vec_w, vec_b_re, vl); + + T r0 = __riscv_vfmv_f(__riscv_vfredosum(vec_r0, rvv::vfmv_s(0, vl), vl)); + T i0 = __riscv_vfmv_f(__riscv_vfredosum(vec_i0, rvv::vfmv_s(0, vl), vl)); + T r1 = __riscv_vfmv_f(__riscv_vfredosum(vec_r1, rvv::vfmv_s(0, vl), vl)); + T i1 = __riscv_vfmv_f(__riscv_vfredosum(vec_i1, rvv::vfmv_s(0, vl), vl)); + + s1.re += r0 + i0; s0.re += r0 - i0; + s1.im += r1 - i1; s0.im += r1 + i1; + } + + v[k] = s0; + v[n-k] = s1; + } + } + } + } + } + + if( scale != 1 ) + { + T re_scale = scale, im_scale = scale; + if( isInverse ) + im_scale = -im_scale; + + for( i = 0; i < len; i += vl ) + { + vl = rvv::vsetvl_itab(len - i); + auto vec_src_re = rvv::vlse_itab_f(reinterpret_cast(dst + i), sizeof(T) * 2, vl); + auto vec_src_im = rvv::vlse_itab_f(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vl); + vec_src_re = __riscv_vfmul(vec_src_re, re_scale, vl); + vec_src_im = __riscv_vfmul(vec_src_im, im_scale, vl); + rvv::vsse_itab(reinterpret_cast(dst + i), sizeof(T) * 2, vec_src_re, vl); + rvv::vsse_itab(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vec_src_im, vl); + } + } + else if( isInverse ) + { + for( i = 0; i < len; i += vl ) + { + vl = rvv::vsetvl_itab(len - i); + auto vec_src_im = rvv::vlse_itab_f(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vl); + vec_src_im = __riscv_vfneg(vec_src_im, vl); + rvv::vsse_itab(reinterpret_cast(dst + i) + 1, sizeof(T) * 2, vec_src_im, vl); + } + } + + return CV_HAL_ERROR_OK; +} + +inline int dft(const uchar* src, uchar* dst, int depth, int nf, int *factors, double scale, int* itab, void* wave, + int tab_size, int n, bool isInverse, bool noPermute) +{ + if( n == 0 ) + return CV_HAL_ERROR_OK; + + switch( depth ) + { + case CV_32F: + return dft(reinterpret_cast*>(src), reinterpret_cast*>(dst), nf, factors, (float)scale, + itab, reinterpret_cast*>(wave), tab_size, n, isInverse, noPermute); + case CV_64F: + return dft(reinterpret_cast*>(src), reinterpret_cast*>(dst), nf, factors, (double)scale, + itab, reinterpret_cast*>(wave), tab_size, n, isInverse, noPermute); + } + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + +}}} + +#endif diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp index 8786764cf0..aa8cf2937f 100644 --- a/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp +++ b/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp @@ -1,68 +1,65 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. -#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED -#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED +#ifndef OPENCV_HAL_RVV_MINMAX_HPP_INCLUDED +#define OPENCV_HAL_RVV_MINMAX_HPP_INCLUDED #include -namespace cv { namespace cv_hal_rvv { +namespace cv { namespace cv_hal_rvv { namespace minmax { #undef cv_hal_minMaxIdx -#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx +#define cv_hal_minMaxIdx cv::cv_hal_rvv::minmax::minMaxIdx #undef cv_hal_minMaxIdxMaskStep -#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx +#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minmax::minMaxIdx -namespace -{ - template struct rvv; +template struct rvv; - #define HAL_RVV_GENERATOR(T, EEW, TYPE, IS_U, EMUL, M_EMUL, B_LEN) \ - template<> struct rvv \ - { \ - using vec_t = v##IS_U##int##EEW##EMUL##_t; \ - using bool_t = vbool##B_LEN##_t; \ - static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \ - static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \ - static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_vmv_v_x_##TYPE##EMUL(a, b); } \ - static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \ - static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \ - static inline vec_t vmin_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmin##IS_U##_tu(a, b, c, d); } \ - static inline vec_t vmax_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmax##IS_U##_tu(a, b, c, d); } \ - static inline vec_t vmin_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmin##IS_U##_tumu(a, b, c, d, e); } \ - static inline vec_t vmax_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmax##IS_U##_tumu(a, b, c, d, e); } \ - static inline vec_t vredmin(vec_t a, vec_t b, size_t c) { return __riscv_vredmin##IS_U(a, b, c); } \ - static inline vec_t vredmax(vec_t a, vec_t b, size_t c) { return __riscv_vredmax##IS_U(a, b, c); } \ - }; - HAL_RVV_GENERATOR(uchar , 8 , u8 , u, m1, m1 , 8 ) - HAL_RVV_GENERATOR(schar , 8 , i8 , , m1, m1 , 8 ) - HAL_RVV_GENERATOR(ushort, 16, u16, u, m1, mf2, 16) - HAL_RVV_GENERATOR(short , 16, i16, , m1, mf2, 16) - #undef HAL_RVV_GENERATOR +#define HAL_RVV_GENERATOR(T, EEW, TYPE, IS_U, EMUL, M_EMUL, B_LEN) \ +template<> struct rvv \ +{ \ + using vec_t = v##IS_U##int##EEW##EMUL##_t; \ + using bool_t = vbool##B_LEN##_t; \ + static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \ + static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \ + static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_vmv_v_x_##TYPE##EMUL(a, b); } \ + static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \ + static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \ + static inline vec_t vmin_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmin##IS_U##_tu(a, b, c, d); } \ + static inline vec_t vmax_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmax##IS_U##_tu(a, b, c, d); } \ + static inline vec_t vmin_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmin##IS_U##_tumu(a, b, c, d, e); } \ + static inline vec_t vmax_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmax##IS_U##_tumu(a, b, c, d, e); } \ + static inline vec_t vredmin(vec_t a, vec_t b, size_t c) { return __riscv_vredmin##IS_U(a, b, c); } \ + static inline vec_t vredmax(vec_t a, vec_t b, size_t c) { return __riscv_vredmax##IS_U(a, b, c); } \ +}; +HAL_RVV_GENERATOR(uchar , 8 , u8 , u, m1, m1 , 8 ) +HAL_RVV_GENERATOR(schar , 8 , i8 , , m1, m1 , 8 ) +HAL_RVV_GENERATOR(ushort, 16, u16, u, m1, mf2, 16) +HAL_RVV_GENERATOR(short , 16, i16, , m1, mf2, 16) +#undef HAL_RVV_GENERATOR - #define HAL_RVV_GENERATOR(T, NAME, EEW, TYPE, IS_F, F_OR_S, F_OR_X, EMUL, M_EMUL, P_EMUL, B_LEN) \ - template<> struct rvv \ - { \ - using vec_t = v##NAME##EEW##EMUL##_t; \ - using bool_t = vbool##B_LEN##_t; \ - static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \ - static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \ - static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_v##IS_F##mv_v_##F_OR_X##_##TYPE##EMUL(a, b); } \ - static inline vuint32##P_EMUL##_t vid(size_t a) { return __riscv_vid_v_u32##P_EMUL(a); } \ - static inline vuint32##P_EMUL##_t vundefined() { return __riscv_vundefined_u32##P_EMUL(); } \ - static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \ - static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \ - static inline bool_t vmlt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##lt(a, b, c); } \ - static inline bool_t vmgt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##gt(a, b, c); } \ - static inline bool_t vmlt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##lt##_mu(a, b, c, d, e); } \ - static inline bool_t vmgt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##gt##_mu(a, b, c, d, e); } \ - static inline T vmv_x_s(vec_t a) { return __riscv_v##IS_F##mv_##F_OR_X(a); } \ - }; - HAL_RVV_GENERATOR(int , int , 32, i32, , s, x, m4, m1 , m4, 8 ) - HAL_RVV_GENERATOR(float , float, 32, f32, f, f, f, m4, m1 , m4, 8 ) - HAL_RVV_GENERATOR(double, float, 64, f64, f, f, f, m4, mf2, m2, 16) - #undef HAL_RVV_GENERATOR -} +#define HAL_RVV_GENERATOR(T, NAME, EEW, TYPE, IS_F, F_OR_S, F_OR_X, EMUL, M_EMUL, P_EMUL, B_LEN) \ +template<> struct rvv \ +{ \ + using vec_t = v##NAME##EEW##EMUL##_t; \ + using bool_t = vbool##B_LEN##_t; \ + static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \ + static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \ + static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_v##IS_F##mv_v_##F_OR_X##_##TYPE##EMUL(a, b); } \ + static inline vuint32##P_EMUL##_t vid(size_t a) { return __riscv_vid_v_u32##P_EMUL(a); } \ + static inline vuint32##P_EMUL##_t vundefined() { return __riscv_vundefined_u32##P_EMUL(); } \ + static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \ + static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \ + static inline bool_t vmlt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##lt(a, b, c); } \ + static inline bool_t vmgt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##gt(a, b, c); } \ + static inline bool_t vmlt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##lt##_mu(a, b, c, d, e); } \ + static inline bool_t vmgt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##gt##_mu(a, b, c, d, e); } \ + static inline T vmv_x_s(vec_t a) { return __riscv_v##IS_F##mv_##F_OR_X(a); } \ +}; +HAL_RVV_GENERATOR(int , int , 32, i32, , s, x, m4, m1 , m4, 8 ) +HAL_RVV_GENERATOR(float , float, 32, f32, f, f, f, m4, m1 , m4, 8 ) +HAL_RVV_GENERATOR(double, float, 64, f64, f, f, f, m4, mf2, m2, 16) +#undef HAL_RVV_GENERATOR template inline int minMaxIdxReadTwice(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal, @@ -330,6 +327,6 @@ inline int minMaxIdx(const uchar* src_data, size_t src_step, int width, int heig return CV_HAL_ERROR_NOT_IMPLEMENTED; } -}} +}}} #endif diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp index e53b9d4391..19577ab2b8 100644 --- a/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp +++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp @@ -6,10 +6,10 @@ #include -namespace cv { namespace cv_hal_rvv { +namespace cv { namespace cv_hal_rvv { namespace norm { #undef cv_hal_norm -#define cv_hal_norm cv::cv_hal_rvv::norm +#define cv_hal_norm cv::cv_hal_rvv::norm::norm inline int normInf_8UC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result) { @@ -512,6 +512,6 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas return CV_HAL_ERROR_NOT_IMPLEMENTED; } -}} +}}} #endif diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp index 6e4a9be65d..54125f6beb 100644 --- a/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp +++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp @@ -6,10 +6,10 @@ #include -namespace cv { namespace cv_hal_rvv { +namespace cv { namespace cv_hal_rvv { namespace norm_diff { #undef cv_hal_normDiff -#define cv_hal_normDiff cv::cv_hal_rvv::normDiff +#define cv_hal_normDiff cv::cv_hal_rvv::norm_diff::normDiff inline int normDiffInf_8UC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result) { @@ -590,7 +590,7 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size if(ret == CV_HAL_ERROR_OK && (norm_type & NORM_RELATIVE)) { double result_; - ret = cv::cv_hal_rvv::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type & ~NORM_RELATIVE, &result_); + ret = cv::cv_hal_rvv::norm::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type & ~NORM_RELATIVE, &result_); if(ret == CV_HAL_ERROR_OK) { *result /= result_ + DBL_EPSILON; @@ -600,6 +600,6 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size return ret; } -}} +}}} #endif diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 85966a5a52..5814d0dfd6 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -844,6 +844,17 @@ DFT(const OcvDftOptions & c, const Complex* src, Complex* dst) Complex t; T scale = (T)c.scale; + if(typeid(T) == typeid(float)) + { + CALL_HAL(dft, cv_hal_dft, reinterpret_cast(src), reinterpret_cast(dst), CV_32F, + c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute); + } + if(typeid(T) == typeid(double)) + { + CALL_HAL(dft, cv_hal_dft, reinterpret_cast(src), reinterpret_cast(dst), CV_64F, + c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute); + } + if( c.useIpp ) { #ifdef USE_IPP_DFT diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index 07ee647828..517e9b8f0b 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -756,10 +756,28 @@ inline int hal_ni_dft1D(cvhalDFT *context, const uchar *src, uchar *dst) { retur */ inline int hal_ni_dftFree1D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param src source data +@param dst destination data +@param depth depth of source +@param nf OcvDftOptions data +@param factors OcvDftOptions data +@param scale OcvDftOptions data +@param itab OcvDftOptions data +@param wave OcvDftOptions data +@param tab_size OcvDftOptions data +@param n OcvDftOptions data +@param isInverse OcvDftOptions data +@param noPermute OcvDftOptions data + */ +inline int hal_ni_dft(const uchar* src, uchar* dst, int depth, int nf, int *factors, double scale, int* itab, void* wave, + int tab_size, int n, bool isInverse, bool noPermute) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + //! @cond IGNORED #define cv_hal_dftInit1D hal_ni_dftInit1D #define cv_hal_dft1D hal_ni_dft1D #define cv_hal_dftFree1D hal_ni_dftFree1D +#define cv_hal_dft hal_ni_dft //! @endcond /**