mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 11:45:30 +08:00
Merge pull request #21530 from anna-khakimova:ak/simd_divrc
* GAPI Fluid: SIMD for DivRC kernel. * Fluid: Div kernel's SIMD refactoring * SIMD for DivRC 3 channel case * Applied comments
This commit is contained in:
parent
ebb6915e58
commit
9c7adb7248
@ -528,6 +528,10 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance)
|
||||
|
||||
// FIXIT Unstable input data for divide
|
||||
initMatsRandU(type, sz, dtype, false);
|
||||
//This condition need as workaround the bug in the OpenCV.
|
||||
//It reinitializes divider matrix without zero values for CV_16S DST type.
|
||||
if (dtype == CV_16S || (type == CV_16S && dtype == -1))
|
||||
cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));
|
||||
|
||||
// OpenCV code ///////////////////////////////////////////////////////////
|
||||
cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype);
|
||||
|
@ -101,8 +101,8 @@ INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest,
|
||||
INSTANTIATE_TEST_CASE_P(DivRCPerfTestFluid, DivRCPerfTest,
|
||||
Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
|
||||
Values(szSmall128, szVGA, sz720p, sz1080p),
|
||||
Values(CV_8UC1, CV_8UC3, CV_32FC1),
|
||||
Values(-1, CV_8U, CV_32F),
|
||||
Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
|
||||
Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
|
||||
Values(1.0),
|
||||
Values(cv::compile_args(CORE_FLUID))));
|
||||
|
||||
|
@ -936,8 +936,8 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca
|
||||
}
|
||||
|
||||
template<typename DST, typename SRC>
|
||||
static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,
|
||||
float scale=1)
|
||||
CV_ALWAYS_INLINE void run_arithm_rs(Buffer &dst, const View &src, const float scalar[],
|
||||
Arithm arithm, float scale=1)
|
||||
{
|
||||
const auto *in = src.InLine<SRC>(0);
|
||||
auto *out = dst.OutLine<DST>();
|
||||
@ -955,15 +955,23 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A
|
||||
w = subrc_simd(scalar, in, out, length, chan);
|
||||
#endif
|
||||
for (; w < length; ++w)
|
||||
{
|
||||
out[w] = subr<DST>(in[w], scalar[w % chan]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
// TODO: optimize division
|
||||
case ARITHM_DIVIDE:
|
||||
for (int w=0; w < width; w++)
|
||||
for (int c=0; c < chan; c++)
|
||||
out[chan*w + c] = div<DST>(scalar[c], in[chan*w + c], scale);
|
||||
{
|
||||
int w = 0;
|
||||
#if CV_SIMD
|
||||
w = divrc_simd(scalar, in, out, length, chan, scale);
|
||||
#endif
|
||||
for (; w < length; ++w)
|
||||
{
|
||||
out[w] = div<DST>(scalar[w % chan], in[w], scale);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
|
||||
}
|
||||
}
|
||||
@ -1319,7 +1327,9 @@ CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch,
|
||||
#endif
|
||||
|
||||
for (; w < length; ++w)
|
||||
{
|
||||
out[w] = div<DST>(in[w], scalar[w % chan], scale);
|
||||
}
|
||||
}
|
||||
|
||||
GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
|
||||
@ -1402,32 +1412,55 @@ GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false)
|
||||
GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, true)
|
||||
{
|
||||
static const int Window = 1;
|
||||
|
||||
static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/,
|
||||
Buffer &dst)
|
||||
static void run(const cv::Scalar& _scalar, const View& src, double _scale, int /*dtype*/,
|
||||
Buffer& dst, Buffer& scratch)
|
||||
{
|
||||
const float scalar[4] = {
|
||||
static_cast<float>(_scalar[0]),
|
||||
static_cast<float>(_scalar[1]),
|
||||
static_cast<float>(_scalar[2]),
|
||||
static_cast<float>(_scalar[3])
|
||||
};
|
||||
GAPI_Assert(src.meta().chan <= 4);
|
||||
|
||||
if (dst.y() == 0)
|
||||
{
|
||||
const int chan = src.meta().chan;
|
||||
float* _scratch = scratch.OutLine<float>();
|
||||
|
||||
scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
|
||||
}
|
||||
|
||||
const float* scalar = scratch.OutLine<float>();
|
||||
const float scale = static_cast<float>(_scale);
|
||||
|
||||
// DST SRC OP __VA_ARGS__
|
||||
UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(uchar, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(uchar, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(uchar, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(uchar, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(ushort, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(ushort, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(ushort, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(ushort, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(short, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(short, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(short, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(float, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(float, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
UNARY_(float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
|
||||
|
||||
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
|
||||
}
|
||||
|
||||
static void initScratch(const GScalarDesc&, const GMatDesc&, double, int, Buffer& scratch)
|
||||
{
|
||||
initScratchBuffer(scratch);
|
||||
}
|
||||
|
||||
static void resetScratch(Buffer& /*scratch*/)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
//-------------------
|
||||
|
@ -235,6 +235,33 @@ ABSDIFFC_SIMD(float)
|
||||
|
||||
#undef ABSDIFFC_SIMD
|
||||
|
||||
#define DIVRC_SIMD(SRC, DST) \
|
||||
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
|
||||
const int length, const int chan, const float scale) \
|
||||
{ \
|
||||
CV_CPU_DISPATCH(divrc_simd, (scalar, in, out, length, chan, scale), \
|
||||
CV_CPU_DISPATCH_MODES_ALL); \
|
||||
}
|
||||
|
||||
DIVRC_SIMD(uchar, uchar)
|
||||
DIVRC_SIMD(ushort, uchar)
|
||||
DIVRC_SIMD(short, uchar)
|
||||
DIVRC_SIMD(float, uchar)
|
||||
DIVRC_SIMD(short, short)
|
||||
DIVRC_SIMD(ushort, short)
|
||||
DIVRC_SIMD(uchar, short)
|
||||
DIVRC_SIMD(float, short)
|
||||
DIVRC_SIMD(ushort, ushort)
|
||||
DIVRC_SIMD(uchar, ushort)
|
||||
DIVRC_SIMD(short, ushort)
|
||||
DIVRC_SIMD(float, ushort)
|
||||
DIVRC_SIMD(uchar, float)
|
||||
DIVRC_SIMD(ushort, float)
|
||||
DIVRC_SIMD(short, float)
|
||||
DIVRC_SIMD(float, float)
|
||||
|
||||
#undef DIVRC_SIMD
|
||||
|
||||
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
|
||||
uchar out3[], const int width)
|
||||
{
|
||||
|
@ -187,6 +187,29 @@ ABSDIFFC_SIMD(float)
|
||||
|
||||
#undef ABSDIFFC_SIMD
|
||||
|
||||
#define DIVRC_SIMD(SRC, DST) \
|
||||
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
|
||||
const int length, const int chan, const float scale);
|
||||
|
||||
DIVRC_SIMD(uchar, uchar)
|
||||
DIVRC_SIMD(ushort, uchar)
|
||||
DIVRC_SIMD(short, uchar)
|
||||
DIVRC_SIMD(float, uchar)
|
||||
DIVRC_SIMD(short, short)
|
||||
DIVRC_SIMD(ushort, short)
|
||||
DIVRC_SIMD(uchar, short)
|
||||
DIVRC_SIMD(float, short)
|
||||
DIVRC_SIMD(ushort, ushort)
|
||||
DIVRC_SIMD(uchar, ushort)
|
||||
DIVRC_SIMD(short, ushort)
|
||||
DIVRC_SIMD(float, ushort)
|
||||
DIVRC_SIMD(uchar, float)
|
||||
DIVRC_SIMD(ushort, float)
|
||||
DIVRC_SIMD(short, float)
|
||||
DIVRC_SIMD(float, float)
|
||||
|
||||
#undef DIVRC_SIMD
|
||||
|
||||
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
|
||||
uchar out3[], const int width);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user