mirror of
https://github.com/opencv/opencv.git
synced 2024-12-18 11:28:02 +08:00
GAPI Fluid: Enable dynamic dispatching for AbsDiffC kernel.
This commit is contained in:
parent
c3910807c5
commit
a4d6bcba09
@ -50,7 +50,7 @@ namespace opencv_test
|
|||||||
class MinPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
|
class MinPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
|
||||||
class MaxPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
|
class MaxPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
|
||||||
class AbsDiffPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
|
class AbsDiffPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
|
||||||
class AbsDiffCPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
|
class AbsDiffCPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, cv::GCompileArgs>> {};
|
||||||
class SumPerfTest : public TestPerfParams<tuple<compare_scalar_f, cv::Size, MatType, cv::GCompileArgs>> {};
|
class SumPerfTest : public TestPerfParams<tuple<compare_scalar_f, cv::Size, MatType, cv::GCompileArgs>> {};
|
||||||
class CountNonZeroPerfTest : public TestPerfParams<tuple<compare_scalar_f, cv::Size, MatType, cv::GCompileArgs>> {};
|
class CountNonZeroPerfTest : public TestPerfParams<tuple<compare_scalar_f, cv::Size, MatType, cv::GCompileArgs>> {};
|
||||||
class AddWeightedPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, int, cv::GCompileArgs>> {};
|
class AddWeightedPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, int, cv::GCompileArgs>> {};
|
||||||
|
@ -970,9 +970,10 @@ PERF_TEST_P_(AbsDiffPerfTest, TestPerformance)
|
|||||||
|
|
||||||
PERF_TEST_P_(AbsDiffCPerfTest, TestPerformance)
|
PERF_TEST_P_(AbsDiffCPerfTest, TestPerformance)
|
||||||
{
|
{
|
||||||
cv::Size sz_in = get<0>(GetParam());
|
compare_f cmpF = get<0>(GetParam());
|
||||||
MatType type = get<1>(GetParam());
|
cv::Size sz_in = get<1>(GetParam());
|
||||||
cv::GCompileArgs compile_args = get<2>(GetParam());
|
MatType type = get<2>(GetParam());
|
||||||
|
cv::GCompileArgs compile_args = get<3>(GetParam());
|
||||||
|
|
||||||
|
|
||||||
initMatsRandU(type, sz_in, type, false);
|
initMatsRandU(type, sz_in, type, false);
|
||||||
@ -997,8 +998,9 @@ PERF_TEST_P_(AbsDiffCPerfTest, TestPerformance)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Comparison ////////////////////////////////////////////////////////////
|
// Comparison ////////////////////////////////////////////////////////////
|
||||||
// FIXIT unrealiable check: EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
|
{
|
||||||
EXPECT_EQ(out_mat_gapi.size(), sz_in);
|
EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
|
||||||
|
}
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
SANITY_CHECK_NOTHING();
|
||||||
}
|
}
|
||||||
|
@ -156,7 +156,8 @@ INSTANTIATE_TEST_CASE_P(AbsDiffPerfTestCPU, AbsDiffPerfTest,
|
|||||||
Values(cv::compile_args(CORE_CPU))));
|
Values(cv::compile_args(CORE_CPU))));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestCPU, AbsDiffCPerfTest,
|
INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestCPU, AbsDiffCPerfTest,
|
||||||
Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
|
Combine(Values(AbsExact().to_compare_f()),
|
||||||
|
Values(szSmall128, szVGA, sz720p, sz1080p),
|
||||||
Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
|
Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
|
||||||
Values(cv::compile_args(CORE_CPU))));
|
Values(cv::compile_args(CORE_CPU))));
|
||||||
|
|
||||||
|
@ -153,10 +153,9 @@ INSTANTIATE_TEST_CASE_P(AbsDiffPerfTestFluid, AbsDiffPerfTest,
|
|||||||
Values(cv::compile_args(CORE_FLUID))));
|
Values(cv::compile_args(CORE_FLUID))));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestFluid, AbsDiffCPerfTest,
|
INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestFluid, AbsDiffCPerfTest,
|
||||||
Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
|
Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
|
||||||
Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_8UC2,
|
Values(szSmall128, szVGA, sz720p, sz1080p),
|
||||||
CV_16UC2, CV_16SC2, CV_8UC3, CV_16UC3,
|
Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
|
||||||
CV_16SC3, CV_8UC4, CV_16UC4, CV_16SC4),
|
|
||||||
Values(cv::compile_args(CORE_FLUID))));
|
Values(cv::compile_args(CORE_FLUID))));
|
||||||
|
|
||||||
// INSTANTIATE_TEST_CASE_P(SumPerfTestFluid, SumPerfTest,
|
// INSTANTIATE_TEST_CASE_P(SumPerfTestFluid, SumPerfTest,
|
||||||
|
@ -154,7 +154,8 @@ INSTANTIATE_TEST_CASE_P(AbsDiffPerfTestGPU, AbsDiffPerfTest,
|
|||||||
Values(cv::compile_args(CORE_GPU))));
|
Values(cv::compile_args(CORE_GPU))));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestGPU, AbsDiffCPerfTest,
|
INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestGPU, AbsDiffCPerfTest,
|
||||||
Combine(Values( szSmall128, szVGA, sz720p, sz1080p ),
|
Combine(Values(AbsExact().to_compare_f()),
|
||||||
|
Values( szSmall128, szVGA, sz720p, sz1080p ),
|
||||||
Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
|
Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
|
||||||
Values(cv::compile_args(CORE_GPU))));
|
Values(cv::compile_args(CORE_GPU))));
|
||||||
|
|
||||||
|
@ -994,244 +994,6 @@ static void run_arithm_s(DST out[], const SRC in[], int width, int chan,
|
|||||||
CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
|
CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CV_SIMD
|
|
||||||
CV_ALWAYS_INLINE void absdiffc_short_store_c1c2c4(short* out_ptr, const v_int32& c1, const v_int32& c2)
|
|
||||||
{
|
|
||||||
vx_store(out_ptr, v_pack(c1, c2));
|
|
||||||
}
|
|
||||||
|
|
||||||
CV_ALWAYS_INLINE void absdiffc_short_store_c1c2c4(ushort* out_ptr, const v_int32& c1, const v_int32& c2)
|
|
||||||
{
|
|
||||||
vx_store(out_ptr, v_pack_u(c1, c2));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
CV_ALWAYS_INLINE int absdiffc_simd_c1c2c4(const T in[], T out[],
|
|
||||||
const v_float32& s, const int length)
|
|
||||||
{
|
|
||||||
static_assert((std::is_same<T, ushort>::value) || (std::is_same<T, short>::value),
|
|
||||||
"This templated overload is only for short or ushort type combinations.");
|
|
||||||
|
|
||||||
constexpr int nlanes = (std::is_same<T, ushort>::value) ? static_cast<int>(v_uint16::nlanes) :
|
|
||||||
static_cast<int>(v_int16::nlanes);
|
|
||||||
if (length < nlanes)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
int x = 0;
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
for (; x <= length - nlanes; x += nlanes)
|
|
||||||
{
|
|
||||||
v_float32 a1 = v_load_f32(in + x);
|
|
||||||
v_float32 a2 = v_load_f32(in + x + nlanes / 2);
|
|
||||||
|
|
||||||
absdiffc_short_store_c1c2c4(&out[x], v_round(v_absdiff(a1, s)),
|
|
||||||
v_round(v_absdiff(a2, s)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (x < length && (in != out))
|
|
||||||
{
|
|
||||||
x = length - nlanes;
|
|
||||||
continue; // process unaligned tail
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
CV_ALWAYS_INLINE int absdiffc_simd_c1c2c4<uchar>(const uchar in[], uchar out[],
|
|
||||||
const v_float32& s, const int length)
|
|
||||||
{
|
|
||||||
constexpr int nlanes = static_cast<int>(v_uint8::nlanes);
|
|
||||||
|
|
||||||
if (length < nlanes)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
int x = 0;
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
for (; x <= length - nlanes; x += nlanes)
|
|
||||||
{
|
|
||||||
v_float32 a1 = v_load_f32(in + x);
|
|
||||||
v_float32 a2 = v_load_f32(in + x + nlanes / 4);
|
|
||||||
v_float32 a3 = v_load_f32(in + x + nlanes / 2);
|
|
||||||
v_float32 a4 = v_load_f32(in + x + 3 * nlanes / 4);
|
|
||||||
|
|
||||||
vx_store(&out[x], v_pack_u(v_pack(v_round(v_absdiff(a1, s)),
|
|
||||||
v_round(v_absdiff(a2, s))),
|
|
||||||
v_pack(v_round(v_absdiff(a3, s)),
|
|
||||||
v_round(v_absdiff(a4, s)))));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (x < length && (in != out))
|
|
||||||
{
|
|
||||||
x = length - nlanes;
|
|
||||||
continue; // process unaligned tail
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
CV_ALWAYS_INLINE void absdiffc_short_store_c3(short* out_ptr, const v_int32& c1,
|
|
||||||
const v_int32& c2, const v_int32& c3,
|
|
||||||
const v_int32& c4, const v_int32& c5,
|
|
||||||
const v_int32& c6)
|
|
||||||
{
|
|
||||||
constexpr int nlanes = static_cast<int>(v_int16::nlanes);
|
|
||||||
vx_store(out_ptr, v_pack(c1, c2));
|
|
||||||
vx_store(out_ptr + nlanes, v_pack(c3, c4));
|
|
||||||
vx_store(out_ptr + 2*nlanes, v_pack(c5, c6));
|
|
||||||
}
|
|
||||||
|
|
||||||
CV_ALWAYS_INLINE void absdiffc_short_store_c3(ushort* out_ptr, const v_int32& c1,
|
|
||||||
const v_int32& c2, const v_int32& c3,
|
|
||||||
const v_int32& c4, const v_int32& c5,
|
|
||||||
const v_int32& c6)
|
|
||||||
{
|
|
||||||
constexpr int nlanes = static_cast<int>(v_uint16::nlanes);
|
|
||||||
vx_store(out_ptr, v_pack_u(c1, c2));
|
|
||||||
vx_store(out_ptr + nlanes, v_pack_u(c3, c4));
|
|
||||||
vx_store(out_ptr + 2*nlanes, v_pack_u(c5, c6));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
CV_ALWAYS_INLINE int absdiffc_simd_c3_impl(const T in[], T out[],
|
|
||||||
const v_float32& s1, const v_float32& s2,
|
|
||||||
const v_float32& s3, const int length)
|
|
||||||
{
|
|
||||||
static_assert((std::is_same<T, ushort>::value) || (std::is_same<T, short>::value),
|
|
||||||
"This templated overload is only for short or ushort type combinations.");
|
|
||||||
|
|
||||||
constexpr int nlanes = (std::is_same<T, ushort>::value) ? static_cast<int>(v_uint16::nlanes):
|
|
||||||
static_cast<int>(v_int16::nlanes);
|
|
||||||
|
|
||||||
if (length < 3 * nlanes)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
int x = 0;
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
for (; x <= length - 3 * nlanes; x += 3 * nlanes)
|
|
||||||
{
|
|
||||||
v_float32 a1 = v_load_f32(in + x);
|
|
||||||
v_float32 a2 = v_load_f32(in + x + nlanes / 2);
|
|
||||||
v_float32 a3 = v_load_f32(in + x + nlanes);
|
|
||||||
v_float32 a4 = v_load_f32(in + x + 3 * nlanes / 2);
|
|
||||||
v_float32 a5 = v_load_f32(in + x + 2 * nlanes);
|
|
||||||
v_float32 a6 = v_load_f32(in + x + 5 * nlanes / 2);
|
|
||||||
|
|
||||||
absdiffc_short_store_c3(&out[x], v_round(v_absdiff(a1, s1)),
|
|
||||||
v_round(v_absdiff(a2, s2)),
|
|
||||||
v_round(v_absdiff(a3, s3)),
|
|
||||||
v_round(v_absdiff(a4, s1)),
|
|
||||||
v_round(v_absdiff(a5, s2)),
|
|
||||||
v_round(v_absdiff(a6, s3)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (x < length && (in != out))
|
|
||||||
{
|
|
||||||
x = length - 3 * nlanes;
|
|
||||||
continue; // process unaligned tail
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
CV_ALWAYS_INLINE int absdiffc_simd_c3_impl<uchar>(const uchar in[], uchar out[],
|
|
||||||
const v_float32& s1, const v_float32& s2,
|
|
||||||
const v_float32& s3, const int length)
|
|
||||||
{
|
|
||||||
constexpr int nlanes = static_cast<int>(v_uint8::nlanes);
|
|
||||||
|
|
||||||
if (length < 3 * nlanes)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
int x = 0;
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
for (; x <= length - 3 * nlanes; x += 3 * nlanes)
|
|
||||||
{
|
|
||||||
vx_store(&out[x],
|
|
||||||
v_pack_u(v_pack(v_round(v_absdiff(v_load_f32(in + x), s1)),
|
|
||||||
v_round(v_absdiff(v_load_f32(in + x + nlanes/4), s2))),
|
|
||||||
v_pack(v_round(v_absdiff(v_load_f32(in + x + nlanes/2), s3)),
|
|
||||||
v_round(v_absdiff(v_load_f32(in + x + 3*nlanes/4), s1)))));
|
|
||||||
|
|
||||||
vx_store(&out[x + nlanes],
|
|
||||||
v_pack_u(v_pack(v_round(v_absdiff(v_load_f32(in + x + nlanes), s2)),
|
|
||||||
v_round(v_absdiff(v_load_f32(in + x + 5*nlanes/4), s3))),
|
|
||||||
v_pack(v_round(v_absdiff(v_load_f32(in + x + 3*nlanes/2), s1)),
|
|
||||||
v_round(v_absdiff(v_load_f32(in + x + 7*nlanes/4), s2)))));
|
|
||||||
|
|
||||||
vx_store(&out[x + 2 * nlanes],
|
|
||||||
v_pack_u(v_pack(v_round(v_absdiff(v_load_f32(in + x + 2*nlanes), s3)),
|
|
||||||
v_round(v_absdiff(v_load_f32(in + x + 9*nlanes/4), s1))),
|
|
||||||
v_pack(v_round(v_absdiff(v_load_f32(in + x + 5*nlanes/2), s2)),
|
|
||||||
v_round(v_absdiff(v_load_f32(in + x + 11*nlanes/4), s3)))));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (x < length && (in != out))
|
|
||||||
{
|
|
||||||
x = length - 3 * nlanes;
|
|
||||||
continue; // process unaligned tail
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
CV_ALWAYS_INLINE int absdiffc_simd_channels(const T in[], const float scalar[], T out[],
|
|
||||||
const int width, int chan)
|
|
||||||
{
|
|
||||||
int length = width * chan;
|
|
||||||
v_float32 s = vx_load(scalar);
|
|
||||||
|
|
||||||
return absdiffc_simd_c1c2c4(in, out, s, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
CV_ALWAYS_INLINE int absdiffc_simd_c3(const T in[], const float scalar[], T out[], int width)
|
|
||||||
{
|
|
||||||
constexpr int chan = 3;
|
|
||||||
int length = width * chan;
|
|
||||||
|
|
||||||
v_float32 s1 = vx_load(scalar);
|
|
||||||
#if CV_SIMD_WIDTH == 32
|
|
||||||
v_float32 s2 = vx_load(scalar + 2);
|
|
||||||
v_float32 s3 = vx_load(scalar + 1);
|
|
||||||
#else
|
|
||||||
v_float32 s2 = vx_load(scalar + 1);
|
|
||||||
v_float32 s3 = vx_load(scalar + 2);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return absdiffc_simd_c3_impl(in, out, s1, s2, s3, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
CV_ALWAYS_INLINE int absdiffc_simd(const T in[], const float scalar[], T out[], int width, int chan)
|
|
||||||
{
|
|
||||||
switch (chan)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 4:
|
|
||||||
return absdiffc_simd_channels(in, scalar, out, width, chan);
|
|
||||||
case 3:
|
|
||||||
return absdiffc_simd_c3(in, scalar, out, width);
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif // CV_SIMD
|
|
||||||
|
|
||||||
template<typename DST, typename SRC>
|
template<typename DST, typename SRC>
|
||||||
static void run_absdiffc(Buffer &dst, const View &src, const float scalar[])
|
static void run_absdiffc(Buffer &dst, const View &src, const float scalar[])
|
||||||
{
|
{
|
||||||
@ -1240,13 +1002,14 @@ static void run_absdiffc(Buffer &dst, const View &src, const float scalar[])
|
|||||||
|
|
||||||
int width = dst.length();
|
int width = dst.length();
|
||||||
int chan = dst.meta().chan;
|
int chan = dst.meta().chan;
|
||||||
|
const int length = width * chan;
|
||||||
|
|
||||||
int w = 0;
|
int w = 0;
|
||||||
#if CV_SIMD
|
#if CV_SIMD
|
||||||
w = absdiffc_simd(in, scalar, out, width, chan);
|
w = absdiffc_simd(in, scalar, out, length, chan);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (; w < width*chan; ++w)
|
for (; w < length; ++w)
|
||||||
out[w] = absdiff<DST>(in[w], scalar[w%chan]);
|
out[w] = absdiff<DST>(in[w], scalar[w%chan]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1349,49 +1112,6 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GAPI_FLUID_KERNEL(GFluidAbsDiffC, cv::gapi::core::GAbsDiffC, true)
|
|
||||||
{
|
|
||||||
static const int Window = 1;
|
|
||||||
|
|
||||||
static void run(const View &src, const cv::Scalar& _scalar, Buffer &dst, Buffer& scratch)
|
|
||||||
{
|
|
||||||
if (dst.y() == 0)
|
|
||||||
{
|
|
||||||
const int chan = src.meta().chan;
|
|
||||||
float* sc = scratch.OutLine<float>();
|
|
||||||
|
|
||||||
for (int i = 0; i < scratch.length(); ++i)
|
|
||||||
sc[i] = static_cast<float>(_scalar[i % chan]);
|
|
||||||
}
|
|
||||||
|
|
||||||
const float* scalar = scratch.OutLine<float>();
|
|
||||||
|
|
||||||
// DST SRC OP __VA_ARGS__
|
|
||||||
UNARY_(uchar, uchar, run_absdiffc, dst, src, scalar);
|
|
||||||
UNARY_(ushort, ushort, run_absdiffc, dst, src, scalar);
|
|
||||||
UNARY_(short, short, run_absdiffc, dst, src, scalar);
|
|
||||||
|
|
||||||
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void initScratch(const GMatDesc&, const GScalarDesc&, Buffer& scratch)
|
|
||||||
{
|
|
||||||
#if CV_SIMD
|
|
||||||
constexpr int buflen = static_cast<int>(v_float32::nlanes) + 2; // buffer size
|
|
||||||
#else
|
|
||||||
constexpr int buflen = 4;
|
|
||||||
#endif
|
|
||||||
cv::Size bufsize(buflen, 1);
|
|
||||||
GMatDesc bufdesc = { CV_32F, 1, bufsize };
|
|
||||||
Buffer buffer(bufdesc);
|
|
||||||
scratch = std::move(buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void resetScratch(Buffer& /* scratch */)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch)
|
CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch)
|
||||||
{
|
{
|
||||||
#if CV_SIMD
|
#if CV_SIMD
|
||||||
@ -1418,6 +1138,42 @@ CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch)
|
|||||||
scratch = std::move(buffer);
|
scratch = std::move(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GAPI_FLUID_KERNEL(GFluidAbsDiffC, cv::gapi::core::GAbsDiffC, true)
|
||||||
|
{
|
||||||
|
static const int Window = 1;
|
||||||
|
|
||||||
|
static void run(const View &src, const cv::Scalar& _scalar, Buffer &dst, Buffer& scratch)
|
||||||
|
{
|
||||||
|
if (dst.y() == 0)
|
||||||
|
{
|
||||||
|
const int chan = src.meta().chan;
|
||||||
|
float* sc = scratch.OutLine<float>();
|
||||||
|
|
||||||
|
for (int i = 0; i < scratch.length(); ++i)
|
||||||
|
sc[i] = static_cast<float>(_scalar[i % chan]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const float* scalar = scratch.OutLine<float>();
|
||||||
|
|
||||||
|
// DST SRC OP __VA_ARGS__
|
||||||
|
UNARY_(uchar, uchar, run_absdiffc, dst, src, scalar);
|
||||||
|
UNARY_(ushort, ushort, run_absdiffc, dst, src, scalar);
|
||||||
|
UNARY_(short, short, run_absdiffc, dst, src, scalar);
|
||||||
|
UNARY_(float, float, run_absdiffc, dst, src, scalar);
|
||||||
|
|
||||||
|
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void initScratch(const GMatDesc&, const GScalarDesc&, Buffer& scratch)
|
||||||
|
{
|
||||||
|
initScratchBuffer(scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void resetScratch(Buffer& /* scratch */)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, true)
|
GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, true)
|
||||||
{
|
{
|
||||||
static const int Window = 1;
|
static const int Window = 1;
|
||||||
|
@ -165,6 +165,21 @@ MULC_SIMD(float, float)
|
|||||||
|
|
||||||
#undef MULC_SIMD
|
#undef MULC_SIMD
|
||||||
|
|
||||||
|
#define ABSDIFFC_SIMD(SRC) \
|
||||||
|
int absdiffc_simd(const SRC in[], const float scalar[], SRC out[], \
|
||||||
|
const int length, const int chan) \
|
||||||
|
{ \
|
||||||
|
CV_CPU_DISPATCH(absdiffc_simd, (in, scalar, out, length, chan), \
|
||||||
|
CV_CPU_DISPATCH_MODES_ALL); \
|
||||||
|
}
|
||||||
|
|
||||||
|
ABSDIFFC_SIMD(uchar)
|
||||||
|
ABSDIFFC_SIMD(short)
|
||||||
|
ABSDIFFC_SIMD(ushort)
|
||||||
|
ABSDIFFC_SIMD(float)
|
||||||
|
|
||||||
|
#undef ABSDIFFC_SIMD
|
||||||
|
|
||||||
} // namespace fluid
|
} // namespace fluid
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace cv
|
} // namespace cv
|
||||||
|
@ -129,6 +129,17 @@ MULC_SIMD(float, float)
|
|||||||
|
|
||||||
#undef MULC_SIMD
|
#undef MULC_SIMD
|
||||||
|
|
||||||
|
#define ABSDIFFC_SIMD(T) \
|
||||||
|
int absdiffc_simd(const T in[], const float scalar[], T out[], \
|
||||||
|
const int length, const int chan);
|
||||||
|
|
||||||
|
ABSDIFFC_SIMD(uchar)
|
||||||
|
ABSDIFFC_SIMD(short)
|
||||||
|
ABSDIFFC_SIMD(ushort)
|
||||||
|
ABSDIFFC_SIMD(float)
|
||||||
|
|
||||||
|
#undef ABSDIFFC_SIMD
|
||||||
|
|
||||||
} // namespace fluid
|
} // namespace fluid
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace cv
|
} // namespace cv
|
||||||
|
@ -151,6 +151,17 @@ MULC_SIMD(float, float)
|
|||||||
|
|
||||||
#undef MULC_SIMD
|
#undef MULC_SIMD
|
||||||
|
|
||||||
|
#define ABSDIFFC_SIMD(T) \
|
||||||
|
int absdiffc_simd(const T in[], const float scalar[], T out[], \
|
||||||
|
const int length, const int chan);
|
||||||
|
|
||||||
|
ABSDIFFC_SIMD(uchar)
|
||||||
|
ABSDIFFC_SIMD(short)
|
||||||
|
ABSDIFFC_SIMD(ushort)
|
||||||
|
ABSDIFFC_SIMD(float)
|
||||||
|
|
||||||
|
#undef ABSDIFFC_SIMD
|
||||||
|
|
||||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||||
|
|
||||||
struct scale_tag {};
|
struct scale_tag {};
|
||||||
@ -901,6 +912,7 @@ MUL_SIMD(float, float)
|
|||||||
struct add_tag {};
|
struct add_tag {};
|
||||||
struct sub_tag {};
|
struct sub_tag {};
|
||||||
struct mul_tag {};
|
struct mul_tag {};
|
||||||
|
struct absdiff_tag {};
|
||||||
|
|
||||||
CV_ALWAYS_INLINE void arithmOpScalar_pack_store_c3(short* outx, const v_int32& c1,
|
CV_ALWAYS_INLINE void arithmOpScalar_pack_store_c3(short* outx, const v_int32& c1,
|
||||||
const v_int32& c2, const v_int32& c3,
|
const v_int32& c2, const v_int32& c3,
|
||||||
@ -938,6 +950,12 @@ CV_ALWAYS_INLINE v_float32 oper(mul_tag, const v_float32& a, const v_float32& sc
|
|||||||
{
|
{
|
||||||
return a * sc;
|
return a * sc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CV_ALWAYS_INLINE v_float32 oper(absdiff_tag, const v_float32& a, const v_float32& sc)
|
||||||
|
{
|
||||||
|
return v_absdiff(a, sc);
|
||||||
|
}
|
||||||
|
|
||||||
//-------------------------------------------------------------------------------------------------
|
//-------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
template<typename oper_tag, typename SRC, typename DST>
|
template<typename oper_tag, typename SRC, typename DST>
|
||||||
@ -1450,6 +1468,38 @@ MULC_SIMD(float, float)
|
|||||||
|
|
||||||
#undef MULC_SIMD
|
#undef MULC_SIMD
|
||||||
|
|
||||||
|
//-------------------------
|
||||||
|
//
|
||||||
|
// Fluid kernels: AbsDiffC
|
||||||
|
//
|
||||||
|
//-------------------------
|
||||||
|
|
||||||
|
#define ABSDIFFC_SIMD(SRC) \
|
||||||
|
int absdiffc_simd(const SRC in[], const float scalar[], SRC out[], \
|
||||||
|
const int length, const int chan) \
|
||||||
|
{ \
|
||||||
|
switch (chan) \
|
||||||
|
{ \
|
||||||
|
case 1: \
|
||||||
|
case 2: \
|
||||||
|
case 4: \
|
||||||
|
return arithmOpScalar_simd_common(absdiff_tag{}, in, scalar, out, length); \
|
||||||
|
case 3: \
|
||||||
|
return arithmOpScalar_simd_c3(absdiff_tag{}, in, scalar, out, length); \
|
||||||
|
default: \
|
||||||
|
GAPI_Assert(chan <= 4); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
return 0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
ABSDIFFC_SIMD(uchar)
|
||||||
|
ABSDIFFC_SIMD(short)
|
||||||
|
ABSDIFFC_SIMD(ushort)
|
||||||
|
ABSDIFFC_SIMD(float)
|
||||||
|
|
||||||
|
#undef ABSDIFFC_SIMD
|
||||||
|
|
||||||
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||||
|
|
||||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||||
|
Loading…
Reference in New Issue
Block a user