From b5f5540e8ab5f94afc43e30b77f42afb90635fe4 Mon Sep 17 00:00:00 2001 From: Daniil Anufriev Date: Fri, 21 Feb 2025 17:36:54 +0300 Subject: [PATCH] Merge pull request #26886 from sk1er52:feature/exp64f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable SIMD_SCALABLE for exp and sqrt #26886 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake ``` CPU - Banana Pi k1, compiler - clang 18.1.4 ``` ``` Geometric mean (ms) Name of Test baseline hal ui hal ui vs vs baseline baseline (x-factor) (x-factor) Exp::ExpFixture::(127x61, 32FC1) 0.358 -- 0.033 -- 10.70 Exp::ExpFixture::(640x480, 32FC1) 14.304 -- 1.167 -- 12.26 Exp::ExpFixture::(1280x720, 32FC1) 42.785 -- 3.538 -- 12.09 Exp::ExpFixture::(1920x1080, 32FC1) 96.206 -- 7.927 -- 12.14 Exp::ExpFixture::(127x61, 64FC1) 0.433 0.050 0.098 8.59 4.40 Exp::ExpFixture::(640x480, 64FC1) 17.315 1.935 3.813 8.95 4.54 Exp::ExpFixture::(1280x720, 64FC1) 52.181 5.877 11.519 8.88 4.53 Exp::ExpFixture::(1920x1080, 64FC1) 117.082 13.157 25.854 8.90 4.53 ``` Additionally, this PR brings Sqrt optimization with UI: ``` Geometric mean (ms) Name of Test baseline ui ui vs baseline (x-factor) Sqrt::SqrtFixture::(127x61, 5, false) 0.111 0.027 4.11 Sqrt::SqrtFixture::(127x61, 6, false) 0.149 0.053 2.82 Sqrt::SqrtFixture::(640x480, 5, false) 4.374 0.967 4.52 Sqrt::SqrtFixture::(640x480, 6, false) 5.885 2.046 2.88 Sqrt::SqrtFixture::(1280x720, 5, false) 12.960 2.915 4.45 Sqrt::SqrtFixture::(1280x720, 6, false) 17.648 6.107 2.89 Sqrt::SqrtFixture::(1920x1080, 5, false) 29.178 6.524 4.47 Sqrt::SqrtFixture::(1920x1080, 6, false) 39.709 13.670 2.90 ``` Reference Muller, J.-M. Elementary Functions: Algorithms and Implementation. 2nd ed. Boston: Birkhäuser, 2006. https://www.springer.com/gp/book/9780817643720 --- modules/core/perf/perf_arithm.cpp | 42 ++++++++++++++++++++---- modules/core/src/mathfuncs_core.simd.hpp | 8 ++--- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/modules/core/perf/perf_arithm.cpp b/modules/core/perf/perf_arithm.cpp index 6cc25a3476..b9decbfd4e 100644 --- a/modules/core/perf/perf_arithm.cpp +++ b/modules/core/perf/perf_arithm.cpp @@ -706,22 +706,27 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , ArithmMixedTest, ) ); -typedef Size_MatType InvSqrtFixture; -PERF_TEST_P(InvSqrtFixture, InvSqrt, testing::Combine( - testing::Values(TYPICAL_MAT_SIZES), - testing::Values(CV_32FC1, CV_64FC1))) -{ +typedef perf::TestBaseWithParam> SqrtFixture; +PERF_TEST_P_(SqrtFixture, Sqrt) { Size sz = get<0>(GetParam()); int type = get<1>(GetParam()); + bool inverse = get<2>(GetParam()); Mat src(sz, type), dst(sz, type); randu(src, FLT_EPSILON, 1000); declare.in(src).out(dst); - TEST_CYCLE() cv::pow(src, -0.5, dst); + TEST_CYCLE() cv::pow(src, inverse ? -0.5 : 0.5, dst); SANITY_CHECK_NOTHING(); } +INSTANTIATE_TEST_CASE_P(/*nothing*/ , SqrtFixture, + testing::Combine( + testing::Values(TYPICAL_MAT_SIZES), + testing::Values(CV_32FC1, CV_64FC1), + testing::Bool() + ) +); ///////////// Rotate //////////////////////// @@ -815,4 +820,29 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , PatchNaNsFixture, ) ); +//////////////EXP//////////// + +typedef Size_MatType ExpFixture; + +PERF_TEST_P(ExpFixture, Exp, + testing::Combine(testing::Values(TYPICAL_MAT_SIZES), testing::Values(CV_32F, CV_64F))) +{ + cv::Size size = std::get<0>(GetParam()); + int type = std::get<1>(GetParam()); + + cv::Mat src(size, type); + cv::Mat dst(size, type); + + declare.in(src).out(dst); + + cv::randu(src, -5.0, 5.0); + + TEST_CYCLE() + { + cv::exp(src, dst); + } + + SANITY_CHECK_NOTHING(); +} + } // namespace diff --git a/modules/core/src/mathfuncs_core.simd.hpp b/modules/core/src/mathfuncs_core.simd.hpp index 3fa3cba1b8..f92234f140 100644 --- a/modules/core/src/mathfuncs_core.simd.hpp +++ b/modules/core/src/mathfuncs_core.simd.hpp @@ -396,7 +396,7 @@ void sqrt32f(const float* src, float* dst, int len) int i = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) const int VECSZ = VTraits::vlanes(); for( ; i < len; i += VECSZ*2 ) { @@ -425,7 +425,7 @@ void sqrt64f(const double* src, double* dst, int len) int i = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) const int VECSZ = VTraits::vlanes(); for( ; i < len; i += VECSZ*2 ) { @@ -527,7 +527,7 @@ void exp32f( const float *_x, float *y, int n ) float maxval = (float)(exp_max_val/exp_prescale); float postscale = (float)exp_postscale; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) const int VECSZ = VTraits::vlanes(); const v_float32 vprescale = vx_setall_f32((float)exp_prescale); const v_float32 vpostscale = vx_setall_f32((float)exp_postscale); @@ -641,7 +641,7 @@ void exp64f( const double *_x, double *y, int n ) double minval = (-exp_max_val/exp_prescale); double maxval = (exp_max_val/exp_prescale); -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) const int VECSZ = VTraits::vlanes(); const v_float64 vprescale = vx_setall_f64(exp_prescale); const v_float64 vpostscale = vx_setall_f64(exp_postscale);