diff --git a/3rdparty/hal_rvv/hal_rvv.hpp b/3rdparty/hal_rvv/hal_rvv.hpp index 57d2ccfee5..83b1ea272c 100644 --- a/3rdparty/hal_rvv/hal_rvv.hpp +++ b/3rdparty/hal_rvv/hal_rvv.hpp @@ -30,6 +30,7 @@ #include "hal_rvv_1p0/minmax.hpp" // core #include "hal_rvv_1p0/atan.hpp" // core #include "hal_rvv_1p0/split.hpp" // core +#include "hal_rvv_1p0/magnitude.hpp" // core #include "hal_rvv_1p0/flip.hpp" // core #include "hal_rvv_1p0/lut.hpp" // core #include "hal_rvv_1p0/exp.hpp" // core diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/magnitude.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/magnitude.hpp new file mode 100644 index 0000000000..eb814c1b77 --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/magnitude.hpp @@ -0,0 +1,42 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences. + +#ifndef OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED +#define OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED + +#include + +#include "hal_rvv_1p0/sqrt.hpp" +#include "hal_rvv_1p0/types.hpp" + +namespace cv { namespace cv_hal_rvv { + +#undef cv_hal_magnitude32f +#define cv_hal_magnitude32f cv::cv_hal_rvv::magnitude> +#undef cv_hal_magnitude64f +#define cv_hal_magnitude64f cv::cv_hal_rvv::magnitude> + +template +inline int magnitude(const T* x, const T* y, T* dst, int len) +{ + size_t vl; + for (; len > 0; len -= (int)vl, x += vl, y += vl, dst += vl) + { + vl = SQRT_T::T::setvl(len); + + auto vx = SQRT_T::T::vload(x, vl); + auto vy = SQRT_T::T::vload(y, vl); + + auto vmag = detail::sqrt(__riscv_vfmadd(vx, vx, __riscv_vfmul(vy, vy, vl), vl), vl); + SQRT_T::T::vstore(dst, vmag, vl); + } + + return CV_HAL_ERROR_OK; +} + +}} // namespace cv::cv_hal_rvv + +#endif // OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/sqrt.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/sqrt.hpp index 9a2e5d6bfe..9ed72f6818 100644 --- a/3rdparty/hal_rvv/hal_rvv_1p0/sqrt.hpp +++ b/3rdparty/hal_rvv/hal_rvv_1p0/sqrt.hpp @@ -45,11 +45,12 @@ inline VEC_T sqrt(VEC_T x, size_t vl) t = __riscv_vfrsub(t, 1.5, vl); y = __riscv_vfmul(t, y, vl); } - // just to prevent the compiler from calculating mask before the invSqrt, which will run out + // just to prevent the compiler from calculating mask before the iteration, which will run out // of registers and cause memory access. asm volatile("" ::: "memory"); - auto mask = __riscv_vmfne(x, 0.0, vl); - mask = __riscv_vmfne_mu(mask, mask, x, INFINITY, vl); + auto classified = __riscv_vfclass(x, vl); + // block -0, +0, positive subnormal number, +inf + auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl); return __riscv_vfmul_mu(mask, x, x, y, vl); } @@ -58,8 +59,9 @@ inline VEC_T sqrt(VEC_T x, size_t vl) template inline VEC_T invSqrt(VEC_T x, size_t vl) { - auto mask = __riscv_vmfne(x, 0.0, vl); - mask = __riscv_vmfne_mu(mask, mask, x, INFINITY, vl); + auto classified = __riscv_vfclass(x, vl); + // block -0, +0, positive subnormal number, +inf + auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl); auto x2 = __riscv_vfmul(x, 0.5, vl); auto y = __riscv_vfrsqrt7(x, vl); #pragma unroll diff --git a/modules/core/perf/perf_math.cpp b/modules/core/perf/perf_math.cpp index fe947aec1a..c06fda44da 100644 --- a/modules/core/perf/perf_math.cpp +++ b/modules/core/perf/perf_math.cpp @@ -36,6 +36,27 @@ PERF_TEST_P(VectorLength, phase64f, testing::Values(128, 1000, 128*1024, 512*102 SANITY_CHECK(angle, 5e-5); } +///////////// Magnitude ///////////// + +typedef Size_MatType MagnitudeFixture; + +PERF_TEST_P(MagnitudeFixture, Magnitude, + testing::Combine(testing::Values(TYPICAL_MAT_SIZES), testing::Values(CV_32F, CV_64F))) +{ + cv::Size size = std::get<0>(GetParam()); + int type = std::get<1>(GetParam()); + + cv::Mat x(size, type); + cv::Mat y(size, type); + cv::Mat magnitude(size, type); + + declare.in(x, y, WARMUP_RNG).out(magnitude); + + TEST_CYCLE() cv::magnitude(x, y, magnitude); + + SANITY_CHECK_NOTHING(); +} + // generates random vectors, performs Gram-Schmidt orthogonalization on them Mat randomOrtho(int rows, int ftype, RNG& rng) { diff --git a/modules/core/src/mathfuncs_core.simd.hpp b/modules/core/src/mathfuncs_core.simd.hpp index 41a3261c64..0d9d9272e6 100644 --- a/modules/core/src/mathfuncs_core.simd.hpp +++ b/modules/core/src/mathfuncs_core.simd.hpp @@ -273,7 +273,7 @@ void magnitude32f(const float* x, const float* y, float* mag, int len) int i = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) const int VECSZ = VTraits::vlanes(); for( ; i < len; i += VECSZ*2 ) { @@ -306,7 +306,7 @@ void magnitude64f(const double* x, const double* y, double* mag, int len) int i = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) const int VECSZ = VTraits::vlanes(); for( ; i < len; i += VECSZ*2 ) {