mirror of
https://github.com/opencv/opencv.git
synced 2025-06-16 14:50:52 +08:00
imgproc:simd Enable VSX and wide universal intrinsics for accumulate operations
- improve cpu dispatching calls to allow more SIMD extentions (SSE4.1, AVX2, VSX) - wide universal intrinsics - replace dummy v_expand with v_expand_low - replace v_expand + v_mul_wrap with v_mul_expand for product accumulate operations - use FMA for accumulate operations - add mask and more types to accumulate's performance tests
This commit is contained in:
parent
5771fd693d
commit
8965f3ae06
@ -1,3 +1,3 @@
|
|||||||
set(the_description "Image Processing")
|
set(the_description "Image Processing")
|
||||||
ocv_add_dispatched_file(accum SSE2 AVX NEON)
|
ocv_add_dispatched_file(accum SSE4_1 AVX AVX2)
|
||||||
ocv_define_module(imgproc opencv_core WRAP java python js)
|
ocv_define_module(imgproc opencv_core WRAP java python js)
|
||||||
|
@ -5,94 +5,102 @@
|
|||||||
|
|
||||||
namespace opencv_test {
|
namespace opencv_test {
|
||||||
|
|
||||||
#ifdef HAVE_OPENVX
|
typedef Size_MatType Accumulate;
|
||||||
PERF_TEST_P(Size_MatType, Accumulate,
|
|
||||||
testing::Combine(
|
|
||||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
|
||||||
testing::Values(CV_16SC1, CV_32FC1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
#else
|
|
||||||
PERF_TEST_P( Size_MatType, Accumulate,
|
|
||||||
testing::Combine(
|
|
||||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
|
||||||
testing::Values(CV_32FC1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
Size sz = get<0>(GetParam());
|
|
||||||
int dstType = get<1>(GetParam());
|
|
||||||
|
|
||||||
Mat src(sz, CV_8UC1);
|
#define MAT_TYPES_ACCUMLATE CV_8UC1, CV_16UC1, CV_32FC1
|
||||||
Mat dst(sz, dstType);
|
#define MAT_TYPES_ACCUMLATE_C MAT_TYPES_ACCUMLATE, CV_8UC3, CV_16UC3, CV_32FC3
|
||||||
|
#define MAT_TYPES_ACCUMLATE_D MAT_TYPES_ACCUMLATE, CV_64FC1
|
||||||
|
#define MAT_TYPES_ACCUMLATE_D_C MAT_TYPES_ACCUMLATE_C, CV_64FC1, CV_64FC1
|
||||||
|
|
||||||
declare.time(100);
|
#define PERF_ACCUMULATE_INIT(_FLTC) \
|
||||||
declare.in(src, WARMUP_RNG).out(dst);
|
const Size srcSize = get<0>(GetParam()); \
|
||||||
|
const int srcType = get<1>(GetParam()); \
|
||||||
|
const int dstType = _FLTC(CV_MAT_CN(srcType)); \
|
||||||
|
Mat src1(srcSize, srcType), dst(srcSize, dstType); \
|
||||||
|
declare.in(src1, dst, WARMUP_RNG).out(dst);
|
||||||
|
|
||||||
TEST_CYCLE() accumulate(src, dst);
|
#define PERF_ACCUMULATE_MASK_INIT(_FLTC) \
|
||||||
|
PERF_ACCUMULATE_INIT(_FLTC) \
|
||||||
|
Mat mask(srcSize, CV_8UC1); \
|
||||||
|
declare.in(mask, WARMUP_RNG);
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
#define PERF_TEST_P_ACCUMULATE(_NAME, _TYPES, _INIT, _FUN) \
|
||||||
|
PERF_TEST_P(Accumulate, _NAME, \
|
||||||
|
testing::Combine( \
|
||||||
|
testing::Values(sz1080p, sz720p, szVGA, szQVGA, szODD), \
|
||||||
|
testing::Values(_TYPES) \
|
||||||
|
) \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
_INIT \
|
||||||
|
TEST_CYCLE() _FUN; \
|
||||||
|
SANITY_CHECK_NOTHING(); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENVX
|
/////////////////////////////////// Accumulate ///////////////////////////////////
|
||||||
PERF_TEST_P(Size_MatType, AccumulateSquare,
|
|
||||||
testing::Combine(
|
|
||||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
|
||||||
testing::Values(CV_16SC1, CV_32FC1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
#else
|
|
||||||
PERF_TEST_P( Size_MatType, AccumulateSquare,
|
|
||||||
testing::Combine(
|
|
||||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
|
||||||
testing::Values(CV_32FC1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
Size sz = get<0>(GetParam());
|
|
||||||
int dstType = get<1>(GetParam());
|
|
||||||
|
|
||||||
Mat src(sz, CV_8UC1);
|
PERF_TEST_P_ACCUMULATE(Accumulate, MAT_TYPES_ACCUMLATE,
|
||||||
Mat dst(sz, dstType);
|
PERF_ACCUMULATE_INIT(CV_32FC), accumulate(src1, dst))
|
||||||
|
|
||||||
declare.time(100);
|
PERF_TEST_P_ACCUMULATE(AccumulateMask, MAT_TYPES_ACCUMLATE_C,
|
||||||
declare.in(src, WARMUP_RNG).out(dst);
|
PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulate(src1, dst, mask))
|
||||||
|
|
||||||
TEST_CYCLE() accumulateSquare(src, dst);
|
PERF_TEST_P_ACCUMULATE(AccumulateDouble, MAT_TYPES_ACCUMLATE_D,
|
||||||
|
PERF_ACCUMULATE_INIT(CV_64FC), accumulate(src1, dst))
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
PERF_TEST_P_ACCUMULATE(AccumulateDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||||
}
|
PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulate(src1, dst, mask))
|
||||||
|
|
||||||
#ifdef HAVE_OPENVX
|
///////////////////////////// AccumulateSquare ///////////////////////////////////
|
||||||
PERF_TEST_P(Size_MatType, AccumulateWeighted,
|
|
||||||
testing::Combine(
|
|
||||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
|
||||||
testing::Values(CV_8UC1, CV_32FC1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
#else
|
|
||||||
PERF_TEST_P( Size_MatType, AccumulateWeighted,
|
|
||||||
testing::Combine(
|
|
||||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
|
||||||
testing::Values(CV_32FC1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
Size sz = get<0>(GetParam());
|
|
||||||
int dstType = get<1>(GetParam());
|
|
||||||
|
|
||||||
Mat src(sz, CV_8UC1);
|
PERF_TEST_P_ACCUMULATE(Square, MAT_TYPES_ACCUMLATE,
|
||||||
Mat dst(sz, dstType);
|
PERF_ACCUMULATE_INIT(CV_32FC), accumulateSquare(src1, dst))
|
||||||
|
|
||||||
declare.time(100);
|
PERF_TEST_P_ACCUMULATE(SquareMask, MAT_TYPES_ACCUMLATE_C,
|
||||||
declare.in(src, WARMUP_RNG).out(dst);
|
PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulateSquare(src1, dst, mask))
|
||||||
|
|
||||||
TEST_CYCLE() accumulateWeighted(src, dst, 0.314);
|
PERF_TEST_P_ACCUMULATE(SquareDouble, MAT_TYPES_ACCUMLATE_D,
|
||||||
|
PERF_ACCUMULATE_INIT(CV_64FC), accumulateSquare(src1, dst))
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
PERF_TEST_P_ACCUMULATE(SquareDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||||
}
|
PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulateSquare(src1, dst, mask))
|
||||||
|
|
||||||
|
///////////////////////////// AccumulateProduct ///////////////////////////////////
|
||||||
|
|
||||||
|
#define PERF_ACCUMULATE_INIT_2(_FLTC) \
|
||||||
|
PERF_ACCUMULATE_INIT(_FLTC) \
|
||||||
|
Mat src2(srcSize, srcType); \
|
||||||
|
declare.in(src2);
|
||||||
|
|
||||||
|
#define PERF_ACCUMULATE_MASK_INIT_2(_FLTC) \
|
||||||
|
PERF_ACCUMULATE_MASK_INIT(_FLTC) \
|
||||||
|
Mat src2(srcSize, srcType); \
|
||||||
|
declare.in(src2);
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(Product, MAT_TYPES_ACCUMLATE,
|
||||||
|
PERF_ACCUMULATE_INIT_2(CV_32FC), accumulateProduct(src1, src2, dst))
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(ProductMask, MAT_TYPES_ACCUMLATE_C,
|
||||||
|
PERF_ACCUMULATE_MASK_INIT_2(CV_32FC), accumulateProduct(src1, src2, dst, mask))
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(ProductDouble, MAT_TYPES_ACCUMLATE_D,
|
||||||
|
PERF_ACCUMULATE_INIT_2(CV_64FC), accumulateProduct(src1, src2, dst))
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(ProductDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||||
|
PERF_ACCUMULATE_MASK_INIT_2(CV_64FC), accumulateProduct(src1, src2, dst, mask))
|
||||||
|
|
||||||
|
///////////////////////////// AccumulateWeighted ///////////////////////////////////
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(Weighted, MAT_TYPES_ACCUMLATE,
|
||||||
|
PERF_ACCUMULATE_INIT(CV_32FC), accumulateWeighted(src1, dst, 0.123))
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(WeightedMask, MAT_TYPES_ACCUMLATE_C,
|
||||||
|
PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulateWeighted(src1, dst, 0.123, mask))
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(WeightedDouble, MAT_TYPES_ACCUMLATE_D,
|
||||||
|
PERF_ACCUMULATE_INIT(CV_64FC), accumulateWeighted(src1, dst, 0.123456))
|
||||||
|
|
||||||
|
PERF_TEST_P_ACCUMULATE(WeightedDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||||
|
PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulateWeighted(src1, dst, 0.123456, mask))
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user