mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
Merge pull request #21534 from alexgiving:atrutnev/simd_for_merge4
This commit is contained in:
commit
2efcaa9e8e
@ -2686,16 +2686,8 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)
|
||||
|
||||
int w = 0; // cycle counter
|
||||
|
||||
#if CV_SIMD128
|
||||
for (; w <= width-16; w+=16)
|
||||
{
|
||||
v_uint8x16 a, b, c, d;
|
||||
a = v_load(&in1[w]);
|
||||
b = v_load(&in2[w]);
|
||||
c = v_load(&in3[w]);
|
||||
d = v_load(&in4[w]);
|
||||
v_store_interleave(&out[4*w], a, b, c, d);
|
||||
}
|
||||
#if CV_SIMD
|
||||
w = merge4_simd(in1, in2, in3, in4, out, width);
|
||||
#endif
|
||||
|
||||
for (; w < width; w++)
|
||||
|
@ -256,6 +256,13 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
const uchar in4[], uchar out[], const int width)
|
||||
{
|
||||
CV_CPU_DISPATCH(merge4_simd, (in1, in2, in3, in4, out, width),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
} // namespace fluid
|
||||
} // namespace gapi
|
||||
} // namespace cv
|
||||
|
@ -196,6 +196,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
|
||||
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
uchar out[], const int width);
|
||||
|
||||
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
const uchar in4[], uchar out[], const int width);
|
||||
|
||||
} // namespace fluid
|
||||
} // namespace gapi
|
||||
} // namespace cv
|
||||
|
@ -217,6 +217,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[],
|
||||
int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
uchar out[], const int width);
|
||||
|
||||
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
const uchar in4[], uchar out[], const int width);
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
struct scale_tag {};
|
||||
@ -2076,6 +2079,41 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
return x;
|
||||
}
|
||||
|
||||
//-------------------------
|
||||
//
|
||||
// Fluid kernels: Merge4
|
||||
//
|
||||
//-------------------------
|
||||
|
||||
int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
|
||||
const uchar in4[], uchar out[], const int width)
|
||||
{
|
||||
constexpr int nlanes = v_uint8::nlanes;
|
||||
if (width < nlanes)
|
||||
return 0;
|
||||
|
||||
int x = 0;
|
||||
for (;;)
|
||||
{
|
||||
for (; x <= width - nlanes; x += nlanes)
|
||||
{
|
||||
v_uint8 a, b, c, d;
|
||||
a = vx_load(&in1[x]);
|
||||
b = vx_load(&in2[x]);
|
||||
c = vx_load(&in3[x]);
|
||||
d = vx_load(&in4[x]);
|
||||
v_store_interleave(&out[4 * x], a, b, c, d);
|
||||
}
|
||||
if (x < width)
|
||||
{
|
||||
x = width - nlanes;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
|
Loading…
Reference in New Issue
Block a user