Merge pull request #21797 from anna-khakimova:ak/merge3_extend_supported_types

GAPI Fluid SIMD:Add support of new several types for the Merge3 - Support of the new several types was added. - Fixes for the Split/Merge and ConvertTo issues.
2025-06-10 19:24:07 +08:00 · 2023-05-31 14:59:39 +03:00 · 2023-05-31 14:59:39 +03:00 · 6d3dd24622
commit 6d3dd24622
parent fc5d412ba7
10 changed files with 143 additions and 60 deletions
--- a/modules/gapi/perf/common/gapi_core_perf_tests.hpp
+++ b/modules/gapi/perf/common/gapi_core_perf_tests.hpp
@ -62,7 +62,7 @@ namespace opencv_test
    class InRangePerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, cv::GCompileArgs>> {};
    class Split3PerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
    class Split4PerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
-    class Merge3PerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
+    class Merge3PerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, cv::GCompileArgs>> {};
    class Merge4PerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
    class RemapPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, cv::GCompileArgs>> {};
    class FlipPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, int, cv::GCompileArgs>> {};
--- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
+++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
@ -1577,11 +1577,12 @@ PERF_TEST_P_(Merge3PerfTest, TestPerformance)
 {
    compare_f cmpF;
    cv::Size sz;
    MatType type = -1;
    cv::GCompileArgs compile_args;
-    std::tie(cmpF, sz, compile_args) = GetParam();
+    std::tie(cmpF, sz, type, compile_args) = GetParam();
-    initMatsRandU(CV_8UC1, sz, CV_8UC3);
+    initMatsRandU(type, sz, CV_MAKETYPE(type, 3));
-    cv::Mat in_mat3(sz, CV_8UC1);
+    cv::Mat in_mat3(sz, type);
    cv::Scalar mean = cv::Scalar::all(127);
    cv::Scalar stddev = cv::Scalar::all(40.f);
    cv::randn(in_mat3, mean, stddev);
--- a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
+++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
@ -252,6 +252,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestCPU, Split4PerfTest,
 INSTANTIATE_TEST_CASE_P(Merge3PerfTestCPU, Merge3PerfTest,
    Combine(Values(AbsExact().to_compare_f()),
            Values(szSmall128, szVGA, sz720p, sz1080p),
            Values(CV_8U),
            Values(cv::compile_args(CORE_CPU))));
 INSTANTIATE_TEST_CASE_P(Merge4PerfTestCPU, Merge4PerfTest,
--- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
+++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
@ -253,6 +253,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestFluid, Split4PerfTest,
 INSTANTIATE_TEST_CASE_P(Merge3PerfTestFluid, Merge3PerfTest,
    Combine(Values(AbsExact().to_compare_f()),
            Values(szSmall128, szVGA, sz720p, sz1080p),
            Values(CV_8U, CV_16S, CV_16U, CV_32F),
            Values(cv::compile_args(CORE_FLUID))));
 INSTANTIATE_TEST_CASE_P(Merge4PerfTestFluid, Merge4PerfTest,
--- a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
+++ b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
@ -242,6 +242,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestGPU, Split4PerfTest,
 INSTANTIATE_TEST_CASE_P(Merge3PerfTestGPU, Merge3PerfTest,
    Combine(Values(AbsExact().to_compare_f()),
            Values( szSmall128, szVGA, sz720p, sz1080p ),
            Values(CV_8U),
            Values(cv::compile_args(CORE_GPU))));
 INSTANTIATE_TEST_CASE_P(Merge4PerfTestGPU, Merge4PerfTest,
--- a/modules/gapi/src/backends/fluid/gfluidcore.cpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp
@ -2320,12 +2320,15 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)
    static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)
    {
        GAPI_Assert((src.meta().depth == CV_8U) && (dst1.meta().depth == CV_8U) &&
                    (dst2.meta().depth == CV_8U) && (dst3.meta().depth == CV_8U) &&
                    (3 == src.meta().chan));
        const auto *in   = src.InLine<uchar>(0);
              auto *out1 = dst1.OutLine<uchar>();
              auto *out2 = dst2.OutLine<uchar>();
              auto *out3 = dst3.OutLine<uchar>();
        GAPI_Assert(3 == src.meta().chan);
        int width = src.length();
        int w = 0;
@ -2348,13 +2351,16 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)
    static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4)
    {
        GAPI_Assert((src.meta().depth == CV_8U) && (dst1.meta().depth == CV_8U) &&
                    (dst2.meta().depth == CV_8U) && (dst3.meta().depth == CV_8U) &&
                    (dst4.meta().depth == CV_8U) && (4 == src.meta().chan));
        const auto *in   = src.InLine<uchar>(0);
              auto *out1 = dst1.OutLine<uchar>();
              auto *out2 = dst2.OutLine<uchar>();
              auto *out3 = dst3.OutLine<uchar>();
              auto *out4 = dst4.OutLine<uchar>();
        GAPI_Assert(4 == src.meta().chan);
        int width = src.length();
        int w = 0;
@ -2372,31 +2378,46 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)
    }
 };
 template<typename T>
 CV_ALWAYS_INLINE void run_merge3(Buffer& dst, const View& src1, const View& src2, const View& src3)
 {
    const auto* in1 = src1.InLine<T>(0);
    const auto* in2 = src2.InLine<T>(0);
    const auto* in3 = src3.InLine<T>(0);
    auto* out = dst.OutLine<T>();
    int width = dst.length();
    int w = 0;
 #if CV_SIMD
        w = merge3_simd(in1, in2, in3, out, width);
 #endif
    for (; w < width; w++)
    {
        out[3 * w] = in1[w];
        out[3 * w + 1] = in2[w];
        out[3 * w + 2] = in3[w];
    }
 }
 GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false)
 {
    static const int Window = 1;
-    static void run(const View &src1, const View &src2, const View &src3, Buffer &dst)
+    static void run(const View& src1, const View& src2, const View& src3, Buffer& dst)
    {
-        const auto *in1 = src1.InLine<uchar>(0);
+        GAPI_Assert((src1.meta().depth == dst.meta().depth) &&
-        const auto *in2 = src2.InLine<uchar>(0);
+                    (src1.meta().depth == src2.meta().depth) &&
-        const auto *in3 = src3.InLine<uchar>(0);
+                    (src1.meta().depth == src3.meta().depth));
              auto *out = dst.OutLine<uchar>();
-        GAPI_Assert(3 == dst.meta().chan);
+        // SRC/DST TYPE      OP          __VA_ARGS__
-        int width = dst.length();
+        MERGE3_(uchar,  run_merge3, dst, src1, src2, src3);
-        int w = 0;
+        MERGE3_(ushort, run_merge3, dst, src1, src2, src3);
        MERGE3_(short,  run_merge3, dst, src1, src2, src3);
        MERGE3_(float,  run_merge3, dst, src1, src2, src3);
-    #if CV_SIMD
+        CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
        w = merge3_simd(in1, in2, in3, out, width);
    #endif
        for (; w < width; w++)
        {
            out[3*w    ] = in1[w];
            out[3*w + 1] = in2[w];
            out[3*w + 2] = in3[w];
        }
    }
 };
@ -2407,13 +2428,16 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)
    static void run(const View &src1, const View &src2, const View &src3, const View &src4,
                    Buffer &dst)
    {
        GAPI_Assert((dst.meta().depth == CV_8U) && (src1.meta().depth == CV_8U) &&
                    (src2.meta().depth == CV_8U) && (src3.meta().depth == CV_8U) &&
                    (4 == dst.meta().chan));
        const auto *in1 = src1.InLine<uchar>(0);
        const auto *in2 = src2.InLine<uchar>(0);
        const auto *in3 = src3.InLine<uchar>(0);
        const auto *in4 = src4.InLine<uchar>(0);
              auto *out = dst.OutLine<uchar>();
        GAPI_Assert(4 == dst.meta().chan);
        int width = dst.length();
        int w = 0; // cycle counter
--- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
@ -277,13 +277,21 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
                    CV_CPU_DISPATCH_MODES_ALL);
 }
-int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+#define MERGE3_SIMD(T)                                              \
-                uchar out[], const int width)
+int merge3_simd(const T in1[], const T in2[], const T in3[],        \
-{
+                T out[], const int width)                           \
-    CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width),
+{                                                                   \
-                    CV_CPU_DISPATCH_MODES_ALL);
+    CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width),       \
                    CV_CPU_DISPATCH_MODES_ALL);                     \
 }
 MERGE3_SIMD(uchar)
 MERGE3_SIMD(short)
 MERGE3_SIMD(ushort)
 MERGE3_SIMD(float)
 #undef MERGE3_SIMD
 int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
                const uchar in4[], uchar out[], const int width)
 {
--- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp
@ -216,8 +216,16 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[],
 int split4_simd(const uchar in[], uchar out1[], uchar out2[],
                uchar out3[], uchar out4[], const int width);
-int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+#define MERGE3_SIMD(T)                                          \
-               uchar out[], const int width);
+int merge3_simd(const T in1[], const T in2[], const T in3[],    \
                T out[], const int width);
 MERGE3_SIMD(uchar)
 MERGE3_SIMD(short)
 MERGE3_SIMD(ushort)
 MERGE3_SIMD(float)
 #undef MERGE3_SIMD
 int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
                const uchar in4[], uchar out[], const int width);
--- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
@ -322,12 +322,21 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[],
 int split4_simd(const uchar in[], uchar out1[], uchar out2[],
                uchar out3[], uchar out4[], const int width);
-int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+#define MERGE3_SIMD(T)                                          \
-                uchar out[], const int width);
+int merge3_simd(const T in1[], const T in2[], const T in3[],    \
                T out[], const int width);
 MERGE3_SIMD(uchar)
 MERGE3_SIMD(short)
 MERGE3_SIMD(ushort)
 MERGE3_SIMD(float)
 #undef MERGE3_SIMD
 int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
                const uchar in4[], uchar out[], const int width);
 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 #define SRC_SHORT_OR_USHORT std::is_same<SRC, short>::value || std::is_same<SRC, ushort>::value
@ -2530,34 +2539,42 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
 //
 //-------------------------
-int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+#define MERGE3_SIMD(T)                                              \
-                uchar out[], const int width)
+int merge3_simd(const T in1[], const T in2[], const T in3[],        \
-{
+                T out[], const int width)                           \
-    constexpr int nlanes = v_uint8::nlanes;
+{                                                                   \
-    if (width < nlanes)
+    constexpr int nlanes = vector_type_of_t<T>::nlanes;             \
-        return 0;
+    if (width < nlanes)                                             \
-
+        return 0;                                                   \
-    int x = 0;
+                                                                    \
-    for (;;)
+    int x = 0;                                                      \
-    {
+    for (;;)                                                        \
-        for (; x <= width - nlanes; x += nlanes)
+    {                                                               \
-        {
+        for (; x <= width - nlanes; x += nlanes)                    \
-            v_uint8 a, b, c;
+        {                                                           \
-            a = vx_load(&in1[x]);
+            vector_type_of_t<T> a, b, c;                            \
-            b = vx_load(&in2[x]);
+            a = vx_load(&in1[x]);                                   \
-            c = vx_load(&in3[x]);
+            b = vx_load(&in2[x]);                                   \
-            v_store_interleave(&out[3 * x], a, b, c);
+            c = vx_load(&in3[x]);                                   \
-        }
+            v_store_interleave(&out[3 * x], a, b, c);               \
-        if (x < width)
+        }                                                           \
-        {
+        if (x < width)                                              \
-            x = width - nlanes;
+        {                                                           \
-            continue;
+            x = width - nlanes;                                     \
-        }
+            continue;                                               \
-        break;
+        }                                                           \
-    }
+        break;                                                      \
-    return x;
+    }                                                               \
    return x;                                                       \
 }
 MERGE3_SIMD(uchar)
 MERGE3_SIMD(short)
 MERGE3_SIMD(ushort)
 MERGE3_SIMD(float)
 #undef MERGE3_SIMD
 //-------------------------
 //
 // Fluid kernels: Merge4
@ -2926,6 +2943,8 @@ CV_ALWAYS_INLINE void convertto_simd_nocoeff_impl(const SRC* inx, float* outx)
 int convertto_simd(const SRC in[], DST out[], const int length)    \
 {                                                                  \
    constexpr int nlanes = vector_type_of_t<DST>::nlanes;          \
    if (length < nlanes)                                           \
        return 0;                                                  \
                                                                   \
    int x = 0;                                                     \
    for (;;)                                                       \
@ -3093,6 +3112,9 @@ int convertto_scaled_simd(const SRC in[], DST out[], const float alpha,     \
                          const float beta, const int length)               \
 {                                                                           \
    constexpr int nlanes = vector_type_of_t<DST>::nlanes;                   \
    if (length < nlanes)                                                    \
        return 0;                                                           \
                                                                            \
    v_float32 v_alpha = vx_setall_f32(alpha);                               \
    v_float32 v_beta = vx_setall_f32(beta);                                 \
                                                                            \
--- a/modules/gapi/src/backends/fluid/gfluidutils.hpp
+++ b/modules/gapi/src/backends/fluid/gfluidutils.hpp
@ -86,6 +86,23 @@ using cv::gapi::own::rintd;
        return;                                            \
    }
 #define MERGE3_(T, OP, ...)                                \
    if (cv::DataType<T>::depth == dst.meta().depth &&      \
        cv::DataType<T>::depth == src1.meta().depth)       \
    {                                                      \
        GAPI_DbgAssert(dst.length() == src1.length());     \
        GAPI_DbgAssert(dst.length() == src2.length());     \
        GAPI_DbgAssert(dst.length() == src3.length());     \
                                                           \
        GAPI_DbgAssert(1 == src1.meta().chan);             \
        GAPI_DbgAssert(1 == src2.meta().chan);             \
        GAPI_DbgAssert(1 == src3.meta().chan);             \
        GAPI_DbgAssert(3 == dst.meta().chan);              \
                                                           \
        OP<T>(__VA_ARGS__);                                \
        return;                                            \
    }
 } // namespace fluid
 } // namespace gapi
 } // namespace cv