Merge pull request #21441 from alexgiving:atrutnev/split3_simd_fluid

This commit is contained in:
Alexander Alekhin 2022-01-25 15:00:54 +00:00
commit 9238316cf1
4 changed files with 43 additions and 16 deletions

View File

@ -2509,26 +2509,18 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)
static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)
{
const auto *in = src.InLine<uchar>(0);
auto *out1 = dst1.OutLine<uchar>();
auto *out2 = dst2.OutLine<uchar>();
auto *out3 = dst3.OutLine<uchar>();
const auto *in = src.InLine<uchar>(0);
auto *out1 = dst1.OutLine<uchar>();
auto *out2 = dst2.OutLine<uchar>();
auto *out3 = dst3.OutLine<uchar>();
GAPI_Assert(3 == src.meta().chan);
int width = src.length();
int w = 0;
int w = 0; // cycle counter
#if CV_SIMD128
for (; w <= width-16; w+=16)
{
v_uint8x16 a, b, c;
v_load_deinterleave(&in[3*w], a, b, c);
v_store(&out1[w], a);
v_store(&out2[w], b);
v_store(&out3[w], c);
}
#endif
#if CV_SIMD
w = split3_simd(in, out1, out2, out3, width);
#endif
for (; w < width; w++)
{

View File

@ -207,6 +207,13 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width)
{
CV_CPU_DISPATCH(split3_simd, (in, out1, out2, out3, width),
CV_CPU_DISPATCH_MODES_ALL);
}
} // namespace fluid
} // namespace gapi
} // namespace cv

View File

@ -163,6 +163,9 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width);
} // namespace fluid
} // namespace gapi
} // namespace cv

View File

@ -184,6 +184,9 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
struct scale_tag {};
@ -1568,6 +1571,28 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD
//-------------------------
//
// Fluid kernels: Split3
//
//-------------------------
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width)
{
constexpr int nlanes = v_uint8::nlanes;
int x = 0;
for (; x <= width - nlanes; x += nlanes)
{
v_uint8 a, b, c;
v_load_deinterleave(&in[3 * x], a, b, c);
vx_store(&out1[x], a);
vx_store(&out2[x], b);
vx_store(&out3[x], c);
}
return x;
}
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END