Reworked separable filter to use wide universal intrinsics

2025-06-08 01:53:19 +08:00 · 2018-12-03 22:58:31 +03:00 · 2018-12-03 22:58:31 +03:00 · 06f32e3b3e
commit 06f32e3b3e
parent b0a08cced9
2 changed files with 785 additions and 1835 deletions
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@ -1278,6 +1278,16 @@ OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float64x4, 15)
 OPENCV_HAL_IMPL_AVX_MULADD(v_float32x8, ps)
 OPENCV_HAL_IMPL_AVX_MULADD(v_float64x4, pd)
 inline v_int32x8 v_fma(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c)
 {
    return a * b + c;
 }
 inline v_int32x8 v_muladd(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c)
 {
    return v_fma(a, b, c);
 }
 inline v_float32x8 v_invsqrt(const v_float32x8& x)
 {
    v_float32x8 half = x * v256_setall_f32(0.5);
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp