mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 05:39:22 +08:00
Merge pull request #26886 from sk1er52:feature/exp64f
Enable SIMD_SCALABLE for exp and sqrt #26886 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake ``` CPU - Banana Pi k1, compiler - clang 18.1.4 ``` ``` Geometric mean (ms) Name of Test baseline hal ui hal ui vs vs baseline baseline (x-factor) (x-factor) Exp::ExpFixture::(127x61, 32FC1) 0.358 -- 0.033 -- 10.70 Exp::ExpFixture::(640x480, 32FC1) 14.304 -- 1.167 -- 12.26 Exp::ExpFixture::(1280x720, 32FC1) 42.785 -- 3.538 -- 12.09 Exp::ExpFixture::(1920x1080, 32FC1) 96.206 -- 7.927 -- 12.14 Exp::ExpFixture::(127x61, 64FC1) 0.433 0.050 0.098 8.59 4.40 Exp::ExpFixture::(640x480, 64FC1) 17.315 1.935 3.813 8.95 4.54 Exp::ExpFixture::(1280x720, 64FC1) 52.181 5.877 11.519 8.88 4.53 Exp::ExpFixture::(1920x1080, 64FC1) 117.082 13.157 25.854 8.90 4.53 ``` Additionally, this PR brings Sqrt optimization with UI: ``` Geometric mean (ms) Name of Test baseline ui ui vs baseline (x-factor) Sqrt::SqrtFixture::(127x61, 5, false) 0.111 0.027 4.11 Sqrt::SqrtFixture::(127x61, 6, false) 0.149 0.053 2.82 Sqrt::SqrtFixture::(640x480, 5, false) 4.374 0.967 4.52 Sqrt::SqrtFixture::(640x480, 6, false) 5.885 2.046 2.88 Sqrt::SqrtFixture::(1280x720, 5, false) 12.960 2.915 4.45 Sqrt::SqrtFixture::(1280x720, 6, false) 17.648 6.107 2.89 Sqrt::SqrtFixture::(1920x1080, 5, false) 29.178 6.524 4.47 Sqrt::SqrtFixture::(1920x1080, 6, false) 39.709 13.670 2.90 ``` Reference Muller, J.-M. Elementary Functions: Algorithms and Implementation. 2nd ed. Boston: Birkhäuser, 2006. https://www.springer.com/gp/book/9780817643720
This commit is contained in:
parent
a256886838
commit
b5f5540e8a
@ -706,22 +706,27 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , ArithmMixedTest,
|
||||
)
|
||||
);
|
||||
|
||||
typedef Size_MatType InvSqrtFixture;
|
||||
PERF_TEST_P(InvSqrtFixture, InvSqrt, testing::Combine(
|
||||
testing::Values(TYPICAL_MAT_SIZES),
|
||||
testing::Values(CV_32FC1, CV_64FC1)))
|
||||
{
|
||||
typedef perf::TestBaseWithParam<std::tuple<cv::Size, int, bool>> SqrtFixture;
|
||||
PERF_TEST_P_(SqrtFixture, Sqrt) {
|
||||
Size sz = get<0>(GetParam());
|
||||
int type = get<1>(GetParam());
|
||||
bool inverse = get<2>(GetParam());
|
||||
|
||||
Mat src(sz, type), dst(sz, type);
|
||||
randu(src, FLT_EPSILON, 1000);
|
||||
declare.in(src).out(dst);
|
||||
|
||||
TEST_CYCLE() cv::pow(src, -0.5, dst);
|
||||
TEST_CYCLE() cv::pow(src, inverse ? -0.5 : 0.5, dst);
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/ , SqrtFixture,
|
||||
testing::Combine(
|
||||
testing::Values(TYPICAL_MAT_SIZES),
|
||||
testing::Values(CV_32FC1, CV_64FC1),
|
||||
testing::Bool()
|
||||
)
|
||||
);
|
||||
|
||||
///////////// Rotate ////////////////////////
|
||||
|
||||
@ -815,4 +820,29 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , PatchNaNsFixture,
|
||||
)
|
||||
);
|
||||
|
||||
//////////////EXP////////////
|
||||
|
||||
typedef Size_MatType ExpFixture;
|
||||
|
||||
PERF_TEST_P(ExpFixture, Exp,
|
||||
testing::Combine(testing::Values(TYPICAL_MAT_SIZES), testing::Values(CV_32F, CV_64F)))
|
||||
{
|
||||
cv::Size size = std::get<0>(GetParam());
|
||||
int type = std::get<1>(GetParam());
|
||||
|
||||
cv::Mat src(size, type);
|
||||
cv::Mat dst(size, type);
|
||||
|
||||
declare.in(src).out(dst);
|
||||
|
||||
cv::randu(src, -5.0, 5.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::exp(src, dst);
|
||||
}
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -396,7 +396,7 @@ void sqrt32f(const float* src, float* dst, int len)
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD
|
||||
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||
const int VECSZ = VTraits<v_float32>::vlanes();
|
||||
for( ; i < len; i += VECSZ*2 )
|
||||
{
|
||||
@ -425,7 +425,7 @@ void sqrt64f(const double* src, double* dst, int len)
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD_64F
|
||||
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||
const int VECSZ = VTraits<v_float64>::vlanes();
|
||||
for( ; i < len; i += VECSZ*2 )
|
||||
{
|
||||
@ -527,7 +527,7 @@ void exp32f( const float *_x, float *y, int n )
|
||||
float maxval = (float)(exp_max_val/exp_prescale);
|
||||
float postscale = (float)exp_postscale;
|
||||
|
||||
#if CV_SIMD
|
||||
#if (CV_SIMD || CV_SIMD_SCALABLE)
|
||||
const int VECSZ = VTraits<v_float32>::vlanes();
|
||||
const v_float32 vprescale = vx_setall_f32((float)exp_prescale);
|
||||
const v_float32 vpostscale = vx_setall_f32((float)exp_postscale);
|
||||
@ -641,7 +641,7 @@ void exp64f( const double *_x, double *y, int n )
|
||||
double minval = (-exp_max_val/exp_prescale);
|
||||
double maxval = (exp_max_val/exp_prescale);
|
||||
|
||||
#if CV_SIMD_64F
|
||||
#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
|
||||
const int VECSZ = VTraits<v_float64>::vlanes();
|
||||
const v_float64 vprescale = vx_setall_f64(exp_prescale);
|
||||
const v_float64 vpostscale = vx_setall_f64(exp_postscale);
|
||||
|
Loading…
Reference in New Issue
Block a user