diff --git a/3rdparty/hal_rvv/hal_rvv.hpp b/3rdparty/hal_rvv/hal_rvv.hpp index 92eea59aae..45c7da446d 100644 --- a/3rdparty/hal_rvv/hal_rvv.hpp +++ b/3rdparty/hal_rvv/hal_rvv.hpp @@ -33,6 +33,7 @@ #include "hal_rvv_1p0/split.hpp" // core #include "hal_rvv_1p0/magnitude.hpp" // core #include "hal_rvv_1p0/cart_to_polar.hpp" // core +#include "hal_rvv_1p0/polar_to_cart.hpp" // core #include "hal_rvv_1p0/flip.hpp" // core #include "hal_rvv_1p0/lut.hpp" // core #include "hal_rvv_1p0/exp.hpp" // core diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/polar_to_cart.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/polar_to_cart.hpp new file mode 100644 index 0000000000..feab2047e5 --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/polar_to_cart.hpp @@ -0,0 +1,53 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences. + +#ifndef OPENCV_HAL_RVV_POLAR_TO_CART_HPP_INCLUDED +#define OPENCV_HAL_RVV_POLAR_TO_CART_HPP_INCLUDED + +#include +#include "hal_rvv_1p0/sincos.hpp" +#include "hal_rvv_1p0/types.hpp" + +namespace cv { namespace cv_hal_rvv { + +#undef cv_hal_polarToCart32f +#define cv_hal_polarToCart32f cv::cv_hal_rvv::polarToCart +#undef cv_hal_polarToCart64f +#define cv_hal_polarToCart64f cv::cv_hal_rvv::polarToCart + +template +inline int + polarToCart(const Elem* mag, const Elem* angle, Elem* x, Elem* y, int len, bool angleInDegrees) +{ + using T = RVV_F32M4; + const auto sincos_scale = angleInDegrees ? detail::sincos_deg_scale : detail::sincos_rad_scale; + + size_t vl; + auto cos_p2 = T::vmv(detail::sincos_cos_p2, T::setvlmax()); + auto cos_p0 = T::vmv(detail::sincos_cos_p0, T::setvlmax()); + for (; len > 0; len -= (int)vl, angle += vl, x += vl, y += vl) + { + vl = RVV_T::setvl(len); + auto vangle = T::cast(RVV_T::vload(angle, vl), vl); + T::VecType vsin, vcos; + detail::SinCos32f(vangle, vsin, vcos, sincos_scale, cos_p2, cos_p0, vl); + if (mag) + { + auto vmag = T::cast(RVV_T::vload(mag, vl), vl); + vsin = __riscv_vfmul(vsin, vmag, vl); + vcos = __riscv_vfmul(vcos, vmag, vl); + mag += vl; + } + RVV_T::vstore(x, RVV_T::cast(vcos, vl), vl); + RVV_T::vstore(y, RVV_T::cast(vsin, vl), vl); + } + + return CV_HAL_ERROR_OK; +} + +}} // namespace cv::cv_hal_rvv + +#endif // OPENCV_HAL_RVV_POLAR_TO_CART_HPP_INCLUDED diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp index 1b07cabfb2..a349d341c5 100644 --- a/3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp +++ b/3rdparty/hal_rvv/hal_rvv_1p0/pyramids.hpp @@ -25,8 +25,8 @@ template<> struct rvv using WT = RVV_SameLen; using MT = RVV_SameLen; - static inline WT::VecType vcvt_T_WT(T::VecType a, size_t b) { return WT::cast(MT::cast(a, b), b); } - static inline T::VecType vcvt_WT_T(WT::VecType a, int b, size_t c) { return T::cast(MT::cast(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c), c), c); } + static inline WT::VecType vcvt_T_WT(T::VecType a, size_t b) { return WT::reinterpret(MT::cast(a, b)); } + static inline T::VecType vcvt_WT_T(WT::VecType a, int b, size_t c) { return T::cast(MT::reinterpret(__riscv_vsra(__riscv_vadd(a, 1 << (b - 1), c), b, c)), c); } static inline WT::VecType down0(WT::VecType vec_src0, WT::VecType vec_src1, WT::VecType vec_src2, WT::VecType vec_src3, WT::VecType vec_src4, size_t vl) { return __riscv_vadd(__riscv_vadd(__riscv_vadd(vec_src0, vec_src4, vl), __riscv_vadd(vec_src2, vec_src2, vl), vl), __riscv_vsll(__riscv_vadd(__riscv_vadd(vec_src1, vec_src2, vl), vec_src3, vl), 2, vl), vl); diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/sincos.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/sincos.hpp new file mode 100644 index 0000000000..776d58f42c --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/sincos.hpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level +// directory of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences. + +#ifndef OPENCV_HAL_RVV_SINCOS_HPP_INCLUDED +#define OPENCV_HAL_RVV_SINCOS_HPP_INCLUDED + +#include +#include "hal_rvv_1p0/types.hpp" + +namespace cv { namespace cv_hal_rvv { namespace detail { + +static constexpr size_t sincos_mask = 0x3; + +static constexpr float sincos_rad_scale = 2.f / CV_PI; +static constexpr float sincos_deg_scale = 2.f / 180.f; + +// Taylor expansion coefficients for sin(x*pi/2) and cos(x*pi/2) +static constexpr double sincos_sin_p7 = -0.004681754135319; +static constexpr double sincos_sin_p5 = 0.079692626246167; +static constexpr double sincos_sin_p3 = -0.645964097506246; +static constexpr double sincos_sin_p1 = 1.570796326794897; + +static constexpr double sincos_cos_p8 = 0.000919260274839; +static constexpr double sincos_cos_p6 = -0.020863480763353; +static constexpr double sincos_cos_p4 = 0.253669507901048; +static constexpr double sincos_cos_p2 = -1.233700550136170; +static constexpr double sincos_cos_p0 = 1.000000000000000; + +// Taylor expansion and angle sum identity +// Use 7 LMUL registers (can be reduced to 5 by splitting fmadd to fadd and fmul) +template +static inline void + SinCos32f(T angle, T& sinval, T& cosval, float scale, T cos_p2, T cos_p0, size_t vl) +{ + angle = __riscv_vfmul(angle, scale, vl); + auto round_angle = RVV_ToInt::cast(angle, vl); + auto delta_angle = __riscv_vfsub(angle, RVV_T::cast(round_angle, vl), vl); + auto delta_angle2 = __riscv_vfmul(delta_angle, delta_angle, vl); + + auto sin = __riscv_vfadd(__riscv_vfmul(delta_angle2, sincos_sin_p7, vl), sincos_sin_p5, vl); + sin = __riscv_vfadd(__riscv_vfmul(delta_angle2, sin, vl), sincos_sin_p3, vl); + sin = __riscv_vfadd(__riscv_vfmul(delta_angle2, sin, vl), sincos_sin_p1, vl); + sin = __riscv_vfmul(delta_angle, sin, vl); + + auto cos = __riscv_vfadd(__riscv_vfmul(delta_angle2, sincos_cos_p8, vl), sincos_cos_p6, vl); + cos = __riscv_vfadd(__riscv_vfmul(delta_angle2, cos, vl), sincos_cos_p4, vl); + cos = __riscv_vfmadd(cos, delta_angle2, cos_p2, vl); + cos = __riscv_vfmadd(cos, delta_angle2, cos_p0, vl); + + // idx = 0: sinval = sin, cosval = cos + // idx = 1: sinval = cos, cosval = -sin + // idx = 2: sinval = -sin, cosval = -cos + // idx = 3: sinval = -cos, cosval = sin + auto idx = __riscv_vand(round_angle, sincos_mask, vl); + auto idx1 = __riscv_vmseq(idx, 1, vl); + auto idx2 = __riscv_vmseq(idx, 2, vl); + auto idx3 = __riscv_vmseq(idx, 3, vl); + + auto idx13 = __riscv_vmor(idx1, idx3, vl); + sinval = __riscv_vmerge(sin, cos, idx13, vl); + cosval = __riscv_vmerge(cos, sin, idx13, vl); + + sinval = __riscv_vfneg_mu(__riscv_vmor(idx2, idx3, vl), sinval, sinval, vl); + cosval = __riscv_vfneg_mu(__riscv_vmor(idx1, idx2, vl), cosval, cosval, vl); +} + +}}} // namespace cv::cv_hal_rvv::detail + +#endif // OPENCV_HAL_RVV_SINCOS_HPP_INCLUDED diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/types.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/types.hpp index e6d4bd99fb..71182e0e4f 100644 --- a/3rdparty/hal_rvv/hal_rvv_1p0/types.hpp +++ b/3rdparty/hal_rvv/hal_rvv_1p0/types.hpp @@ -96,6 +96,22 @@ template using RVV_SameLen = RVV; +template struct RVV_ToIntHelper; +template struct RVV_ToUintHelper; +template struct RVV_ToFloatHelper; + +template +using RVV_ToInt = + RVV::type, RVV_T::lmul>; + +template +using RVV_ToUint = + RVV::type, RVV_T::lmul>; + +template +using RVV_ToFloat = + RVV::type, RVV_T::lmul>; + template using RVV_BaseType = RVV; @@ -203,13 +219,15 @@ static inline BaseType vredmax(VecType vs2, BaseType vs1, size_t vl) { using BoolType = HAL_RVV_BOOL_TYPE(__VA_ARGS__); \ using BaseType = v##VEC_TYPE##m1_t; \ \ - static constexpr size_t lmul = LMUL_TYPE; \ + static constexpr RVV_LMUL lmul = LMUL_TYPE; \ \ HAL_RVV_SIZE_RELATED(EEW, TYPE, LMUL, __VA_ARGS__) \ HAL_RVV_SIZE_UNRELATED(__VA_ARGS__) \ \ template \ inline static VecType cast(FROM v, size_t vl); \ + template \ + inline static VecType reinterpret(FROM v); \ }; \ \ template <> \ @@ -304,6 +322,20 @@ HAL_RVV_DEFINE_ONE( float, float32, LMUL_f2, 32, f32, mf2, HAL_RVV_FLOAT_PARAM) // -------------------------------Define cast-------------------------------- +template <> struct RVV_ToIntHelper<1> {using type = int8_t;}; +template <> struct RVV_ToIntHelper<2> {using type = int16_t;}; +template <> struct RVV_ToIntHelper<4> {using type = int32_t;}; +template <> struct RVV_ToIntHelper<8> {using type = int64_t;}; + +template <> struct RVV_ToUintHelper<1> {using type = uint8_t;}; +template <> struct RVV_ToUintHelper<2> {using type = uint16_t;}; +template <> struct RVV_ToUintHelper<4> {using type = uint32_t;}; +template <> struct RVV_ToUintHelper<8> {using type = uint64_t;}; + +template <> struct RVV_ToFloatHelper<2> {using type = _Float16;}; +template <> struct RVV_ToFloatHelper<4> {using type = float;}; +template <> struct RVV_ToFloatHelper<8> {using type = double;}; + #define HAL_RVV_CVT(ONE, TWO) \ template <> \ inline ONE::VecType ONE::cast(TWO::VecType v, size_t vl) { return __riscv_vncvt_x(v, vl); } \ @@ -441,18 +473,52 @@ HAL_RVV_CVT(RVV_F32MF2, RVV_F64M1) #undef HAL_RVV_CVT -#define HAL_RVV_CVT(A, B, A_TYPE, B_TYPE, LMUL_TYPE, LMUL) \ +#define HAL_RVV_CVT(A, B, A_TYPE, B_TYPE, LMUL_TYPE, LMUL, IS_U) \ template <> \ inline RVV::VecType RVV::cast( \ - RVV::VecType v, [[maybe_unused]] size_t vl \ + RVV::VecType v, size_t vl \ ) { \ - return __riscv_vreinterpret_##A_TYPE##LMUL(v); \ + return __riscv_vfcvt_f_x##IS_U##_v_##A_TYPE##LMUL(v, vl); \ } \ template <> \ inline RVV::VecType RVV::cast( \ - RVV::VecType v, [[maybe_unused]] size_t vl \ + RVV::VecType v, size_t vl \ ) { \ - return __riscv_vreinterpret_##B_TYPE##LMUL(v); \ + return __riscv_vfcvt_x##IS_U##_f_v_##B_TYPE##LMUL(v, vl); \ + } + +HAL_RVV_CVT( float, int32_t, f32, i32, LMUL_1, m1, ) +HAL_RVV_CVT( float, int32_t, f32, i32, LMUL_2, m2, ) +HAL_RVV_CVT( float, int32_t, f32, i32, LMUL_4, m4, ) +HAL_RVV_CVT( float, int32_t, f32, i32, LMUL_8, m8, ) +HAL_RVV_CVT( float, int32_t, f32, i32, LMUL_f2, mf2, ) + +HAL_RVV_CVT( float, uint32_t, f32, u32, LMUL_1, m1, u) +HAL_RVV_CVT( float, uint32_t, f32, u32, LMUL_2, m2, u) +HAL_RVV_CVT( float, uint32_t, f32, u32, LMUL_4, m4, u) +HAL_RVV_CVT( float, uint32_t, f32, u32, LMUL_8, m8, u) +HAL_RVV_CVT( float, uint32_t, f32, u32, LMUL_f2, mf2, u) + +HAL_RVV_CVT(double, int64_t, f64, i64, LMUL_1, m1, ) +HAL_RVV_CVT(double, int64_t, f64, i64, LMUL_2, m2, ) +HAL_RVV_CVT(double, int64_t, f64, i64, LMUL_4, m4, ) +HAL_RVV_CVT(double, int64_t, f64, i64, LMUL_8, m8, ) + +HAL_RVV_CVT(double, uint64_t, f64, u64, LMUL_1, m1, u) +HAL_RVV_CVT(double, uint64_t, f64, u64, LMUL_2, m2, u) +HAL_RVV_CVT(double, uint64_t, f64, u64, LMUL_4, m4, u) +HAL_RVV_CVT(double, uint64_t, f64, u64, LMUL_8, m8, u) + +#undef HAL_RVV_CVT + +#define HAL_RVV_CVT(A, B, A_TYPE, B_TYPE, LMUL_TYPE, LMUL) \ + template <> \ + inline RVV::VecType RVV::reinterpret(RVV::VecType v) { \ + return __riscv_vreinterpret_##A_TYPE##LMUL(v); \ + } \ + template <> \ + inline RVV::VecType RVV::reinterpret(RVV::VecType v) { \ + return __riscv_vreinterpret_##B_TYPE##LMUL(v); \ } #define HAL_RVV_CVT2(A, B, A_TYPE, B_TYPE) \ diff --git a/modules/core/perf/perf_math.cpp b/modules/core/perf/perf_math.cpp index 398a3ad651..e06e281592 100644 --- a/modules/core/perf/perf_math.cpp +++ b/modules/core/perf/perf_math.cpp @@ -79,6 +79,28 @@ PERF_TEST_P(CartToPolarFixture, CartToPolar, SANITY_CHECK_NOTHING(); } +///////////// Polar to Cart ///////////// + +typedef Size_MatType PolarToCartFixture; + +PERF_TEST_P(PolarToCartFixture, PolarToCart, + testing::Combine(testing::Values(TYPICAL_MAT_SIZES), testing::Values(CV_32F, CV_64F))) +{ + cv::Size size = std::get<0>(GetParam()); + int type = std::get<1>(GetParam()); + + cv::Mat magnitude(size, type); + cv::Mat angle(size, type); + cv::Mat x(size, type); + cv::Mat y(size, type); + + declare.in(magnitude, angle, WARMUP_RNG).out(x, y); + + TEST_CYCLE() cv::polarToCart(magnitude, angle, x, y); + + SANITY_CHECK_NOTHING(); +} + // generates random vectors, performs Gram-Schmidt orthogonalization on them Mat randomOrtho(int rows, int ftype, RNG& rng) { diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 2ba2a4001e..0403b75452 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -328,149 +328,6 @@ void cartToPolar( InputArray src1, InputArray src2, * Polar -> Cartezian * \****************************************************************************************/ -static void SinCos_32f( const float *angle, float *sinval, float* cosval, - int len, int angle_in_degrees ) -{ - const int N = 64; - - static const double sin_table[] = - { - 0.00000000000000000000, 0.09801714032956060400, - 0.19509032201612825000, 0.29028467725446233000, - 0.38268343236508978000, 0.47139673682599764000, - 0.55557023301960218000, 0.63439328416364549000, - 0.70710678118654746000, 0.77301045336273699000, - 0.83146961230254524000, 0.88192126434835494000, - 0.92387953251128674000, 0.95694033573220894000, - 0.98078528040323043000, 0.99518472667219682000, - 1.00000000000000000000, 0.99518472667219693000, - 0.98078528040323043000, 0.95694033573220894000, - 0.92387953251128674000, 0.88192126434835505000, - 0.83146961230254546000, 0.77301045336273710000, - 0.70710678118654757000, 0.63439328416364549000, - 0.55557023301960218000, 0.47139673682599786000, - 0.38268343236508989000, 0.29028467725446239000, - 0.19509032201612861000, 0.09801714032956082600, - 0.00000000000000012246, -0.09801714032956059000, - -0.19509032201612836000, -0.29028467725446211000, - -0.38268343236508967000, -0.47139673682599764000, - -0.55557023301960196000, -0.63439328416364527000, - -0.70710678118654746000, -0.77301045336273666000, - -0.83146961230254524000, -0.88192126434835494000, - -0.92387953251128652000, -0.95694033573220882000, - -0.98078528040323032000, -0.99518472667219693000, - -1.00000000000000000000, -0.99518472667219693000, - -0.98078528040323043000, -0.95694033573220894000, - -0.92387953251128663000, -0.88192126434835505000, - -0.83146961230254546000, -0.77301045336273688000, - -0.70710678118654768000, -0.63439328416364593000, - -0.55557023301960218000, -0.47139673682599792000, - -0.38268343236509039000, -0.29028467725446250000, - -0.19509032201612872000, -0.09801714032956050600, - }; - - static const double k2 = (2*CV_PI)/N; - - static const double sin_a0 = -0.166630293345647*k2*k2*k2; - static const double sin_a2 = k2; - - static const double cos_a0 = -0.499818138450326*k2*k2; - /*static const double cos_a2 = 1;*/ - - double k1; - int i = 0; - - if( !angle_in_degrees ) - k1 = N/(2*CV_PI); - else - k1 = N/360.; - -#if CV_AVX2 - if (USE_AVX2) - { - __m128d v_k1 = _mm_set1_pd(k1); - __m128d v_1 = _mm_set1_pd(1); - __m128i v_N1 = _mm_set1_epi32(N - 1); - __m128i v_N4 = _mm_set1_epi32(N >> 2); - __m128d v_sin_a0 = _mm_set1_pd(sin_a0); - __m128d v_sin_a2 = _mm_set1_pd(sin_a2); - __m128d v_cos_a0 = _mm_set1_pd(cos_a0); - - for ( ; i <= len - 4; i += 4) - { - __m128 v_angle = _mm_loadu_ps(angle + i); - - // 0-1 - __m128d v_t = _mm_mul_pd(_mm_cvtps_pd(v_angle), v_k1); - __m128i v_it = _mm_cvtpd_epi32(v_t); - v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it)); - - __m128i v_sin_idx = _mm_and_si128(v_it, v_N1); - __m128i v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1); - - __m128d v_t2 = _mm_mul_pd(v_t, v_t); - __m128d v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t); - __m128d v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1); - - __m128d v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8); - __m128d v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8); - - __m128d v_sin_val_0 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b), - _mm_mul_pd(v_cos_a, v_sin_b)); - __m128d v_cos_val_0 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b), - _mm_mul_pd(v_sin_a, v_sin_b)); - - // 2-3 - v_t = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_angle), 8))), v_k1); - v_it = _mm_cvtpd_epi32(v_t); - v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it)); - - v_sin_idx = _mm_and_si128(v_it, v_N1); - v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1); - - v_t2 = _mm_mul_pd(v_t, v_t); - v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t); - v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1); - - v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8); - v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8); - - __m128d v_sin_val_1 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b), - _mm_mul_pd(v_cos_a, v_sin_b)); - __m128d v_cos_val_1 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b), - _mm_mul_pd(v_sin_a, v_sin_b)); - - _mm_storeu_ps(sinval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_sin_val_0), - _mm_cvtpd_ps(v_sin_val_1))); - _mm_storeu_ps(cosval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_cos_val_0), - _mm_cvtpd_ps(v_cos_val_1))); - } - } -#endif - - for( ; i < len; i++ ) - { - double t = angle[i]*k1; - int it = cvRound(t); - t -= it; - int sin_idx = it & (N - 1); - int cos_idx = (N/4 - sin_idx) & (N - 1); - - double sin_b = (sin_a0*t*t + sin_a2)*t; - double cos_b = cos_a0*t*t + 1; - - double sin_a = sin_table[sin_idx]; - double cos_a = sin_table[cos_idx]; - - double sin_val = sin_a*cos_b + cos_a*sin_b; - double cos_val = cos_a*cos_b - sin_a*sin_b; - - sinval[i] = (float)sin_val; - cosval[i] = (float)cos_val; - } -} - - #ifdef HAVE_OPENCL static bool ocl_polarToCart( InputArray _mag, InputArray _angle, @@ -587,11 +444,13 @@ void polarToCart( InputArray src1, InputArray src2, CV_Assert(dst1.getObj() != dst2.getObj()); +#ifdef HAVE_IPP const bool isInPlace = (src1.getObj() == dst1.getObj()) || (src1.getObj() == dst2.getObj()) || (src2.getObj() == dst1.getObj()) || (src2.getObj() == dst2.getObj()); +#endif int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type)); @@ -610,95 +469,25 @@ void polarToCart( InputArray src1, InputArray src2, const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0}; uchar* ptrs[4] = {}; NAryMatIterator it(arrays, ptrs); - cv::AutoBuffer _buf; - float* buf[2] = {0, 0}; - int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn); + int j, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn); size_t esz1 = Angle.elemSize1(); - if (( depth == CV_64F ) || isInPlace) - { - _buf.allocate(blockSize*2); - buf[0] = _buf.data(); - buf[1] = buf[0] + blockSize; - } - for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( j = 0; j < total; j += blockSize ) { int len = std::min(total - j, blockSize); - if (( depth == CV_32F ) && !isInPlace) + if ( depth == CV_32F ) { const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1]; float *x = (float*)ptrs[2], *y = (float*)ptrs[3]; - - SinCos_32f( angle, y, x, len, angleInDegrees ); - if( mag ) - { - k = 0; - -#if (CV_SIMD || CV_SIMD_SCALABLE) - int cWidth = VTraits::vlanes(); - for( ; k <= len - cWidth; k += cWidth ) - { - v_float32 v_m = vx_load(mag + k); - v_store(x + k, v_mul(vx_load(x + k), v_m)); - v_store(y + k, v_mul(vx_load(y + k), v_m)); - } - vx_cleanup(); -#endif - - for( ; k < len; k++ ) - { - float m = mag[k]; - x[k] *= m; y[k] *= m; - } - } - } - else if (( depth == CV_32F ) && isInPlace) - { - const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1]; - float *x = (float*)ptrs[2], *y = (float*)ptrs[3]; - - for( k = 0; k < len; k++ ) - buf[0][k] = (float)angle[k]; - - SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees ); - if( mag ) - for( k = 0; k < len; k++ ) - { - float m = mag[k]; - x[k] = buf[0][k]*m; y[k] = buf[1][k]*m; - } - else - { - std::memcpy(x, buf[0], sizeof(float) * len); - std::memcpy(y, buf[1], sizeof(float) * len); - } + hal::polarToCart32f( mag, angle, x, y, len, angleInDegrees ); } else { const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1]; double *x = (double*)ptrs[2], *y = (double*)ptrs[3]; - - for( k = 0; k < len; k++ ) - buf[0][k] = (float)angle[k]; - - SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees ); - if( mag ) - for( k = 0; k < len; k++ ) - { - double m = mag[k]; - x[k] = buf[0][k]*m; y[k] = buf[1][k]*m; - } - else - { - for( k = 0; k < len; k++ ) - { - x[k] = buf[0][k]; - y[k] = buf[1][k]; - } - } + hal::polarToCart64f( mag, angle, x, y, len, angleInDegrees ); } if( ptrs[0] ) diff --git a/modules/core/src/mathfuncs_core.dispatch.cpp b/modules/core/src/mathfuncs_core.dispatch.cpp index 485eac27b4..84e4e6a652 100644 --- a/modules/core/src/mathfuncs_core.dispatch.cpp +++ b/modules/core/src/mathfuncs_core.dispatch.cpp @@ -29,6 +29,26 @@ void cartToPolar64f(const double* x, const double* y, double* mag, double* angle CV_CPU_DISPATCH_MODES_ALL); } +void polarToCart32f(const float* mag, const float* angle, float* x, float* y, int len, bool angleInDegrees) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(polarToCart32f, cv_hal_polarToCart32f, mag, angle, x, y, len, angleInDegrees); + + CV_CPU_DISPATCH(polarToCart32f, (mag, angle, x, y, len, angleInDegrees), + CV_CPU_DISPATCH_MODES_ALL); +} + +void polarToCart64f(const double* mag, const double* angle, double* x, double* y, int len, bool angleInDegrees) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(polarToCart64f, cv_hal_polarToCart64f, mag, angle, x, y, len, angleInDegrees); + + CV_CPU_DISPATCH(polarToCart64f, (mag, angle, x, y, len, angleInDegrees), + CV_CPU_DISPATCH_MODES_ALL); +} + void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees ) { CV_INSTRUMENT_REGION(); diff --git a/modules/core/src/mathfuncs_core.simd.hpp b/modules/core/src/mathfuncs_core.simd.hpp index e9d57a5c4d..d9289ecb4e 100644 --- a/modules/core/src/mathfuncs_core.simd.hpp +++ b/modules/core/src/mathfuncs_core.simd.hpp @@ -11,6 +11,8 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN // forward declarations void cartToPolar32f(const float *X, const float *Y, float* mag, float *angle, int len, bool angleInDegrees); void cartToPolar64f(const double *X, const double *Y, double* mag, double *angle, int len, bool angleInDegrees); +void polarToCart32f(const float *mag, const float *angle, float *X, float *Y, int len, bool angleInDegrees); +void polarToCart64f(const double *mag, const double *angle, double *X, double *Y, int len, bool angleInDegrees); void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees); void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool angleInDegrees); void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees); @@ -178,6 +180,167 @@ void cartToPolar64f(const double *X, const double *Y, double *mag, double *angle } } +namespace { + +static inline void SinCos_32f(const float* mag, const float* angle, float* cosval, float* sinval, int len, int angle_in_degrees) +{ + const int N = 64; + + static const double sin_table[] = + { + 0.00000000000000000000, 0.09801714032956060400, + 0.19509032201612825000, 0.29028467725446233000, + 0.38268343236508978000, 0.47139673682599764000, + 0.55557023301960218000, 0.63439328416364549000, + 0.70710678118654746000, 0.77301045336273699000, + 0.83146961230254524000, 0.88192126434835494000, + 0.92387953251128674000, 0.95694033573220894000, + 0.98078528040323043000, 0.99518472667219682000, + 1.00000000000000000000, 0.99518472667219693000, + 0.98078528040323043000, 0.95694033573220894000, + 0.92387953251128674000, 0.88192126434835505000, + 0.83146961230254546000, 0.77301045336273710000, + 0.70710678118654757000, 0.63439328416364549000, + 0.55557023301960218000, 0.47139673682599786000, + 0.38268343236508989000, 0.29028467725446239000, + 0.19509032201612861000, 0.09801714032956082600, + 0.00000000000000012246, -0.09801714032956059000, + -0.19509032201612836000, -0.29028467725446211000, + -0.38268343236508967000, -0.47139673682599764000, + -0.55557023301960196000, -0.63439328416364527000, + -0.70710678118654746000, -0.77301045336273666000, + -0.83146961230254524000, -0.88192126434835494000, + -0.92387953251128652000, -0.95694033573220882000, + -0.98078528040323032000, -0.99518472667219693000, + -1.00000000000000000000, -0.99518472667219693000, + -0.98078528040323043000, -0.95694033573220894000, + -0.92387953251128663000, -0.88192126434835505000, + -0.83146961230254546000, -0.77301045336273688000, + -0.70710678118654768000, -0.63439328416364593000, + -0.55557023301960218000, -0.47139673682599792000, + -0.38268343236509039000, -0.29028467725446250000, + -0.19509032201612872000, -0.09801714032956050600, + }; + + static const double k2 = (2*CV_PI)/N; + + static const double sin_a0 = -0.166630293345647*k2*k2*k2; + static const double sin_a2 = k2; + + static const double cos_a0 = -0.499818138450326*k2*k2; + /*static const double cos_a2 = 1;*/ + + double k1; + int i = 0; + + if( !angle_in_degrees ) + k1 = N/(2*CV_PI); + else + k1 = N/360.; + +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits::vlanes(); + const v_float32 scale = vx_setall_f32(angle_in_degrees ? (float)CV_PI / 180.f : 1.f); + + for( ; i < len; i += VECSZ*2 ) + { + if( i + VECSZ*2 > len ) + { + // if it's inplace operation, we cannot repeatedly process + // the tail for the second time, so we have to use the + // scalar code + if( i == 0 || angle == cosval || angle == sinval || mag == cosval || mag == sinval ) + break; + i = len - VECSZ*2; + } + + v_float32 r0 = v_mul(vx_load(angle + i), scale); + v_float32 r1 = v_mul(vx_load(angle + i + VECSZ), scale); + + v_float32 c0, c1, s0, s1; + v_sincos(r0, s0, c0); + v_sincos(r1, s1, c1); + + if( mag ) + { + v_float32 m0 = vx_load(mag + i); + v_float32 m1 = vx_load(mag + i + VECSZ); + c0 = v_mul(c0, m0); + c1 = v_mul(c1, m1); + s0 = v_mul(s0, m0); + s1 = v_mul(s1, m1); + } + + v_store(cosval + i, c0); + v_store(cosval + i + VECSZ, c1); + + v_store(sinval + i, s0); + v_store(sinval + i + VECSZ, s1); + } + vx_cleanup(); +#endif + + for( ; i < len; i++ ) + { + double t = angle[i]*k1; + int it = cvRound(t); + t -= it; + int sin_idx = it & (N - 1); + int cos_idx = (N/4 - sin_idx) & (N - 1); + + double sin_b = (sin_a0*t*t + sin_a2)*t; + double cos_b = cos_a0*t*t + 1; + + double sin_a = sin_table[sin_idx]; + double cos_a = sin_table[cos_idx]; + + double sin_val = sin_a*cos_b + cos_a*sin_b; + double cos_val = cos_a*cos_b - sin_a*sin_b; + + if (mag) + { + double mag_val = mag[i]; + sin_val *= mag_val; + cos_val *= mag_val; + } + + sinval[i] = (float)sin_val; + cosval[i] = (float)cos_val; + } +} + +} // anonymous:: + +void polarToCart32f(const float *mag, const float *angle, float *X, float *Y, int len, bool angleInDegrees) +{ + CV_INSTRUMENT_REGION(); + SinCos_32f(mag, angle, X, Y, len, angleInDegrees); +} + +void polarToCart64f(const double *mag, const double *angle, double *X, double *Y, int len, bool angleInDegrees) +{ + CV_INSTRUMENT_REGION(); + + const int BLKSZ = 128; + float ybuf[BLKSZ], xbuf[BLKSZ], _mbuf[BLKSZ], abuf[BLKSZ]; + float* mbuf = mag ? _mbuf : nullptr; + for( int i = 0; i < len; i += BLKSZ ) + { + int j, blksz = std::min(BLKSZ, len - i); + for( j = 0; j < blksz; j++ ) + { + if (mbuf) + mbuf[j] = (float)mag[i + j]; + abuf[j] = (float)angle[i + j]; + } + SinCos_32f(mbuf, abuf, xbuf, ybuf, blksz, angleInDegrees); + for( j = 0; j < blksz; j++ ) + X[i + j] = xbuf[j]; + for( j = 0; j < blksz; j++ ) + Y[i + j] = ybuf[j]; + } +} + static void fastAtan32f_(const float *Y, const float *X, float *angle, int len, bool angleInDegrees ) { float scale = angleInDegrees ? 1.f : (float)(CV_PI/180);