Merge pull request #21534 from alexgiving:atrutnev/simd_for_merge4

This commit is contained in:
Alexander Alekhin 2022-02-21 22:17:16 +00:00
commit 2efcaa9e8e
4 changed files with 50 additions and 10 deletions

View File

@ -2686,16 +2686,8 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)
int w = 0; // cycle counter
#if CV_SIMD128
for (; w <= width-16; w+=16)
{
v_uint8x16 a, b, c, d;
a = v_load(&in1[w]);
b = v_load(&in2[w]);
c = v_load(&in3[w]);
d = v_load(&in4[w]);
v_store_interleave(&out[4*w], a, b, c, d);
}
#if CV_SIMD
w = merge4_simd(in1, in2, in3, in4, out, width);
#endif
for (; w < width; w++)

View File

@ -256,6 +256,13 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
CV_CPU_DISPATCH_MODES_ALL);
}
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
const uchar in4[], uchar out[], const int width)
{
CV_CPU_DISPATCH(merge4_simd, (in1, in2, in3, in4, out, width),
CV_CPU_DISPATCH_MODES_ALL);
}
} // namespace fluid
} // namespace gapi
} // namespace cv

View File

@ -196,6 +196,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
uchar out[], const int width);
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
const uchar in4[], uchar out[], const int width);
} // namespace fluid
} // namespace gapi
} // namespace cv

View File

@ -217,6 +217,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
uchar out[], const int width);
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
const uchar in4[], uchar out[], const int width);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
struct scale_tag {};
@ -2076,6 +2079,41 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
return x;
}
//-------------------------
//
// Fluid kernels: Merge4
//
//-------------------------
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
const uchar in4[], uchar out[], const int width)
{
constexpr int nlanes = v_uint8::nlanes;
if (width < nlanes)
return 0;
int x = 0;
for (;;)
{
for (; x <= width - nlanes; x += nlanes)
{
v_uint8 a, b, c, d;
a = vx_load(&in1[x]);
b = vx_load(&in2[x]);
c = vx_load(&in3[x]);
d = vx_load(&in4[x]);
v_store_interleave(&out[4 * x], a, b, c, d);
}
if (x < width)
{
x = width - nlanes;
continue;
}
break;
}
return x;
}
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END