mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
Merge pull request #27002 from GenshinImpactStarts:magnitude
[HAL RVV] impl magnitude | add perf test #27002 Implement through the existing `cv_hal_magnitude32f` and `cv_hal_magnitude64f` interfaces. **UPDATE**: UI is enabled. The only difference between UI and HAL now is HAL use a approximate `sqrt`. Perf test done on MUSE-PI. ```sh $ opencv_test_core --gtest_filter="*Magnitude*" $ opencv_perf_core --gtest_filter="*Magnitude*" --perf_min_samples=300 --perf_force_samples=300 ``` Test result between enabled UI and HAL: ``` Name of Test ui rvv rvv vs ui (x-factor) Magnitude::MagnitudeFixture::(127x61, 32FC1) 0.029 0.016 1.75 Magnitude::MagnitudeFixture::(127x61, 64FC1) 0.057 0.036 1.57 Magnitude::MagnitudeFixture::(640x480, 32FC1) 1.063 0.648 1.64 Magnitude::MagnitudeFixture::(640x480, 64FC1) 2.261 1.530 1.48 Magnitude::MagnitudeFixture::(1280x720, 32FC1) 3.261 2.118 1.54 Magnitude::MagnitudeFixture::(1280x720, 64FC1) 6.802 4.682 1.45 Magnitude::MagnitudeFixture::(1920x1080, 32FC1) 7.287 4.738 1.54 Magnitude::MagnitudeFixture::(1920x1080, 64FC1) 15.226 10.334 1.47 ``` Test result before and after enabling UI: ``` Name of Test orig pr pr vs orig (x-factor) Magnitude::MagnitudeFixture::(127x61, 32FC1) 0.032 0.029 1.11 Magnitude::MagnitudeFixture::(127x61, 64FC1) 0.067 0.057 1.17 Magnitude::MagnitudeFixture::(640x480, 32FC1) 1.228 1.063 1.16 Magnitude::MagnitudeFixture::(640x480, 64FC1) 2.786 2.261 1.23 Magnitude::MagnitudeFixture::(1280x720, 32FC1) 3.762 3.261 1.15 Magnitude::MagnitudeFixture::(1280x720, 64FC1) 8.549 6.802 1.26 Magnitude::MagnitudeFixture::(1920x1080, 32FC1) 8.408 7.287 1.15 Magnitude::MagnitudeFixture::(1920x1080, 64FC1) 18.884 15.226 1.24 ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
71fe903121
commit
e30697fd42
1
3rdparty/hal_rvv/hal_rvv.hpp
vendored
1
3rdparty/hal_rvv/hal_rvv.hpp
vendored
@ -30,6 +30,7 @@
|
||||
#include "hal_rvv_1p0/minmax.hpp" // core
|
||||
#include "hal_rvv_1p0/atan.hpp" // core
|
||||
#include "hal_rvv_1p0/split.hpp" // core
|
||||
#include "hal_rvv_1p0/magnitude.hpp" // core
|
||||
#include "hal_rvv_1p0/flip.hpp" // core
|
||||
#include "hal_rvv_1p0/lut.hpp" // core
|
||||
#include "hal_rvv_1p0/exp.hpp" // core
|
||||
|
42
3rdparty/hal_rvv/hal_rvv_1p0/magnitude.hpp
vendored
Normal file
42
3rdparty/hal_rvv/hal_rvv_1p0/magnitude.hpp
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||
|
||||
#ifndef OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED
|
||||
#define OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
||||
#include "hal_rvv_1p0/sqrt.hpp"
|
||||
#include "hal_rvv_1p0/types.hpp"
|
||||
|
||||
namespace cv { namespace cv_hal_rvv {
|
||||
|
||||
#undef cv_hal_magnitude32f
|
||||
#define cv_hal_magnitude32f cv::cv_hal_rvv::magnitude<cv::cv_hal_rvv::Sqrt32f<cv::cv_hal_rvv::RVV_F32M8>>
|
||||
#undef cv_hal_magnitude64f
|
||||
#define cv_hal_magnitude64f cv::cv_hal_rvv::magnitude<cv::cv_hal_rvv::Sqrt64f<cv::cv_hal_rvv::RVV_F64M8>>
|
||||
|
||||
template <typename SQRT_T, typename T = typename SQRT_T::T::ElemType>
|
||||
inline int magnitude(const T* x, const T* y, T* dst, int len)
|
||||
{
|
||||
size_t vl;
|
||||
for (; len > 0; len -= (int)vl, x += vl, y += vl, dst += vl)
|
||||
{
|
||||
vl = SQRT_T::T::setvl(len);
|
||||
|
||||
auto vx = SQRT_T::T::vload(x, vl);
|
||||
auto vy = SQRT_T::T::vload(y, vl);
|
||||
|
||||
auto vmag = detail::sqrt<SQRT_T::iter_times>(__riscv_vfmadd(vx, vx, __riscv_vfmul(vy, vy, vl), vl), vl);
|
||||
SQRT_T::T::vstore(dst, vmag, vl);
|
||||
}
|
||||
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
}} // namespace cv::cv_hal_rvv
|
||||
|
||||
#endif // OPENCV_HAL_RVV_MAGNITUDE_HPP_INCLUDED
|
12
3rdparty/hal_rvv/hal_rvv_1p0/sqrt.hpp
vendored
12
3rdparty/hal_rvv/hal_rvv_1p0/sqrt.hpp
vendored
@ -45,11 +45,12 @@ inline VEC_T sqrt(VEC_T x, size_t vl)
|
||||
t = __riscv_vfrsub(t, 1.5, vl);
|
||||
y = __riscv_vfmul(t, y, vl);
|
||||
}
|
||||
// just to prevent the compiler from calculating mask before the invSqrt, which will run out
|
||||
// just to prevent the compiler from calculating mask before the iteration, which will run out
|
||||
// of registers and cause memory access.
|
||||
asm volatile("" ::: "memory");
|
||||
auto mask = __riscv_vmfne(x, 0.0, vl);
|
||||
mask = __riscv_vmfne_mu(mask, mask, x, INFINITY, vl);
|
||||
auto classified = __riscv_vfclass(x, vl);
|
||||
// block -0, +0, positive subnormal number, +inf
|
||||
auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl);
|
||||
return __riscv_vfmul_mu(mask, x, x, y, vl);
|
||||
}
|
||||
|
||||
@ -58,8 +59,9 @@ inline VEC_T sqrt(VEC_T x, size_t vl)
|
||||
template <size_t iter_times, typename VEC_T>
|
||||
inline VEC_T invSqrt(VEC_T x, size_t vl)
|
||||
{
|
||||
auto mask = __riscv_vmfne(x, 0.0, vl);
|
||||
mask = __riscv_vmfne_mu(mask, mask, x, INFINITY, vl);
|
||||
auto classified = __riscv_vfclass(x, vl);
|
||||
// block -0, +0, positive subnormal number, +inf
|
||||
auto mask = __riscv_vmseq(__riscv_vand(classified, 0b10111000, vl), 0, vl);
|
||||
auto x2 = __riscv_vfmul(x, 0.5, vl);
|
||||
auto y = __riscv_vfrsqrt7(x, vl);
|
||||
#pragma unroll
|
||||
|
@ -36,6 +36,27 @@ PERF_TEST_P(VectorLength, phase64f, testing::Values(128, 1000, 128*1024, 512*102
|
||||
SANITY_CHECK(angle, 5e-5);
|
||||
}
|
||||
|
||||
///////////// Magnitude /////////////
|
||||
|
||||
typedef Size_MatType MagnitudeFixture;
|
||||
|
||||
PERF_TEST_P(MagnitudeFixture, Magnitude,
|
||||
testing::Combine(testing::Values(TYPICAL_MAT_SIZES), testing::Values(CV_32F, CV_64F)))
|
||||
{
|
||||
cv::Size size = std::get<0>(GetParam());
|
||||
int type = std::get<1>(GetParam());
|
||||
|
||||
cv::Mat x(size, type);
|
||||
cv::Mat y(size, type);
|
||||
cv::Mat magnitude(size, type);
|
||||
|
||||
declare.in(x, y, WARMUP_RNG).out(magnitude);
|
||||
|
||||
TEST_CYCLE() cv::magnitude(x, y, magnitude);
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
// generates random vectors, performs Gram-Schmidt orthogonalization on them
|
||||
Mat randomOrtho(int rows, int ftype, RNG& rng)
|
||||
{
|
||||
|
@ -273,7 +273,7 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD
|
||||
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||
const int VECSZ = VTraits<v_float32>::vlanes();
|
||||
for( ; i < len; i += VECSZ*2 )
|
||||
{
|
||||
@ -306,7 +306,7 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD_64F
|
||||
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||
const int VECSZ = VTraits<v_float64>::vlanes();
|
||||
for( ; i < len; i += VECSZ*2 )
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user