diff --git a/modules/imgproc/src/phasecorr.cpp b/modules/imgproc/src/phasecorr.cpp index d2f88420be..9befd0be98 100644 --- a/modules/imgproc/src/phasecorr.cpp +++ b/modules/imgproc/src/phasecorr.cpp @@ -38,6 +38,7 @@ #include "precomp.hpp" #include +#include "opencv2/core/hal/intrin.hpp" namespace cv { @@ -614,8 +615,27 @@ void cv::createHanningWindow(OutputArray _dst, cv::Size winSize, int type) double* const wc = _wc.data(); double coeff0 = 2.0 * CV_PI / (double)(cols - 1), coeff1 = 2.0 * CV_PI / (double)(rows - 1); - for(int j = 0; j < cols; j++) - wc[j] = 0.5 * (1.0 - cos(coeff0 * j)); + int c = 0; +#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F + const int nlanes32 = VTraits::vlanes(); + const int nlanes64 = VTraits::vlanes(); + const int max_nlanes = VTraits::max_nlanes; + std::array index; + std::iota(index.data(), index.data()+max_nlanes, 0.f); + v_float64 vindex = vx_load(index.data()); + v_float64 delta = vx_setall_f64(VTraits::vlanes()); + v_float64 vcoeff0 = vx_setall_f64(coeff0); + v_float64 one = vx_setall_f64(1.f); + v_float64 half = vx_setall_f64(0.5f); + for (; c <= cols - nlanes64; c += nlanes64) + { + v_float64 v = v_mul(half, v_sub(one, v_cos(v_mul(vcoeff0, vindex)))); + vx_store(wc + c, v); + vindex = v_add(vindex, delta); + } +#endif + for(; c < cols; c++) + wc[c] = 0.5 * (1.0 - cos(coeff0 * c)); if(dst.depth() == CV_32F) { @@ -623,7 +643,24 @@ void cv::createHanningWindow(OutputArray _dst, cv::Size winSize, int type) { float* dstData = dst.ptr(i); double wr = 0.5 * (1.0 - cos(coeff1 * i)); - for(int j = 0; j < cols; j++) + int j = 0; +#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F + v_float64 vwr = vx_setall_f64(wr); + for (; j < cols; j += nlanes32) + { + if (j > cols - nlanes32) + { + if (j == 0) + break; + j = cols - nlanes32; + } + + v_float64 v0 = v_mul(vwr, vx_load(wc + j)); + v_float64 v1 = v_mul(vwr, vx_load(wc + j + nlanes64)); + vx_store(dstData + j, v_cvt_f32(v0, v1)); + } +#endif + for(; j < cols; j++) dstData[j] = (float)(wr * wc[j]); } } @@ -633,7 +670,23 @@ void cv::createHanningWindow(OutputArray _dst, cv::Size winSize, int type) { double* dstData = dst.ptr(i); double wr = 0.5 * (1.0 - cos(coeff1 * i)); - for(int j = 0; j < cols; j++) + int j = 0; +#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F + v_float64 vwr = vx_setall_f64(wr); + for (; j < cols; j += nlanes64) + { + if (j > cols - nlanes64) + { + if (j == 0) + break; + j = cols - nlanes64; + } + + v_float64 v = v_mul(vwr, vx_load(wc + j)); + vx_store(dstData + j, v); + } +#endif + for(; j < cols; j++) dstData[j] = wr * wc[j]; } }