Enable dynamic dispatching for Merge3

This commit is contained in:
Aleksei Trutnev 2022-01-27 16:12:53 +03:00
parent f4a7754cc0
commit c62367612d
4 changed files with 59 additions and 20 deletions

View File

@ -2509,10 +2509,10 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)
static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)
{ {
const auto *in = src.InLine<uchar>(0); const auto *in = src.InLine<uchar>(0);
auto *out1 = dst1.OutLine<uchar>(); auto *out1 = dst1.OutLine<uchar>();
auto *out2 = dst2.OutLine<uchar>(); auto *out2 = dst2.OutLine<uchar>();
auto *out3 = dst3.OutLine<uchar>(); auto *out3 = dst3.OutLine<uchar>();
GAPI_Assert(3 == src.meta().chan); GAPI_Assert(3 == src.meta().chan);
int width = src.length(); int width = src.length();
@ -2537,11 +2537,11 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)
static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4)
{ {
const auto *in = src.InLine<uchar>(0); const auto *in = src.InLine<uchar>(0);
auto *out1 = dst1.OutLine<uchar>(); auto *out1 = dst1.OutLine<uchar>();
auto *out2 = dst2.OutLine<uchar>(); auto *out2 = dst2.OutLine<uchar>();
auto *out3 = dst3.OutLine<uchar>(); auto *out3 = dst3.OutLine<uchar>();
auto *out4 = dst4.OutLine<uchar>(); auto *out4 = dst4.OutLine<uchar>();
GAPI_Assert(4 == src.meta().chan); GAPI_Assert(4 == src.meta().chan);
int width = src.length(); int width = src.length();
@ -2574,18 +2574,10 @@ GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false)
GAPI_Assert(3 == dst.meta().chan); GAPI_Assert(3 == dst.meta().chan);
int width = dst.length(); int width = dst.length();
int w = 0;
int w = 0; // cycle counter #if CV_SIMD
w = merge3_simd(in1, in2, in3, out, width);
#if CV_SIMD128
for (; w <= width-16; w+=16)
{
v_uint8x16 a, b, c;
a = v_load(&in1[w]);
b = v_load(&in2[w]);
c = v_load(&in3[w]);
v_store_interleave(&out[3*w], a, b, c);
}
#endif #endif
for (; w < width; w++) for (; w < width; w++)

View File

@ -221,6 +221,13 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
CV_CPU_DISPATCH_MODES_ALL); CV_CPU_DISPATCH_MODES_ALL);
} }
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
uchar out[], const int width)
{
CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width),
CV_CPU_DISPATCH_MODES_ALL);
}
} // namespace fluid } // namespace fluid
} // namespace gapi } // namespace gapi
} // namespace cv } // namespace cv

View File

@ -169,6 +169,9 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[],
int split4_simd(const uchar in[], uchar out1[], uchar out2[], int split4_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], uchar out4[], const int width); uchar out3[], uchar out4[], const int width);
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
uchar out[], const int width);
} // namespace fluid } // namespace fluid
} // namespace gapi } // namespace gapi
} // namespace cv } // namespace cv

View File

@ -190,6 +190,9 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[],
int split4_simd(const uchar in[], uchar out1[], uchar out2[], int split4_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], uchar out4[], const int width); uchar out3[], uchar out4[], const int width);
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
uchar out[], const int width);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
struct scale_tag {}; struct scale_tag {};
@ -1643,6 +1646,40 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
return x; return x;
} }
//-------------------------
//
// Fluid kernels: Merge3
//
//-------------------------
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
uchar out[], const int width)
{
constexpr int nlanes = v_uint8::nlanes;
if (width < nlanes)
return 0;
int x = 0;
for (;;)
{
for (; x <= width - nlanes; x += nlanes)
{
v_uint8 a, b, c;
a = vx_load(&in1[x]);
b = vx_load(&in2[x]);
c = vx_load(&in3[x]);
v_store_interleave(&out[3 * x], a, b, c);
}
if (x < width)
{
x = width - nlanes;
continue;
}
break;
}
return x;
}
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END CV_CPU_OPTIMIZATION_NAMESPACE_END