From aa53541235bd56946b8242924e01a5fff9b24eff Mon Sep 17 00:00:00 2001 From: Aleksei Trutnev Date: Mon, 7 Feb 2022 12:54:50 +0300 Subject: [PATCH] enable dynamic dispatching for merge4 --- .../gapi/src/backends/fluid/gfluidcore.cpp | 12 +----- .../fluid/gfluidcore_func.dispatch.cpp | 7 ++++ .../src/backends/fluid/gfluidcore_func.hpp | 3 ++ .../backends/fluid/gfluidcore_func.simd.hpp | 38 +++++++++++++++++++ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index 23d9822bcc..22f73e553c 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -2686,16 +2686,8 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false) int w = 0; // cycle counter - #if CV_SIMD128 - for (; w <= width-16; w+=16) - { - v_uint8x16 a, b, c, d; - a = v_load(&in1[w]); - b = v_load(&in2[w]); - c = v_load(&in3[w]); - d = v_load(&in4[w]); - v_store_interleave(&out[4*w], a, b, c, d); - } + #if CV_SIMD + w = merge4_simd(in1, in2, in3, in4, out, width); #endif for (; w < width; w++) diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index eb7a2e91d3..9afac9ceb4 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -256,6 +256,13 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], CV_CPU_DISPATCH_MODES_ALL); } +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width) +{ + CV_CPU_DISPATCH(merge4_simd, (in1, in2, in3, in4, out, width), + CV_CPU_DISPATCH_MODES_ALL); +} + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index f61d7d40b3..868923932d 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -196,6 +196,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], uchar out[], const int width); +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width); + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index b5c7cae431..2424a57677 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -217,6 +217,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], uchar out[], const int width); +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width); + #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY struct scale_tag {}; @@ -2076,6 +2079,41 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], return x; } +//------------------------- +// +// Fluid kernels: Merge4 +// +//------------------------- + +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width) +{ + constexpr int nlanes = v_uint8::nlanes; + if (width < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= width - nlanes; x += nlanes) + { + v_uint8 a, b, c, d; + a = vx_load(&in1[x]); + b = vx_load(&in2[x]); + c = vx_load(&in3[x]); + d = vx_load(&in4[x]); + v_store_interleave(&out[4 * x], a, b, c, d); + } + if (x < width) + { + x = width - nlanes; + continue; + } + break; + } + return x; +} + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END