mirror of
https://github.com/opencv/opencv.git
synced 2025-01-12 15:49:32 +08:00
ea47cb3ffe
Backport to 4.x: patchNaNs() SIMD acceleration #24480 backport from #23098 connected PR in extra: [#1118@extra](https://github.com/opencv/opencv_extra/pull/1118) ### This PR contains: * new SIMD code for `patchNaNs()` * CPU perf test <details> <summary>Performance comparison</summary> Geometric mean (ms) |Name of Test|noopt|sse2|avx2|sse2 vs noopt (x-factor)|avx2 vs noopt (x-factor)| |---|:-:|:-:|:-:|:-:|:-:| |PatchNaNs::OCL_PatchNaNsFixture::(640x480, 32FC1)|0.019|0.017|0.018|1.11|1.07| |PatchNaNs::OCL_PatchNaNsFixture::(640x480, 32FC4)|0.037|0.037|0.033|1.00|1.10| |PatchNaNs::OCL_PatchNaNsFixture::(1280x720, 32FC1)|0.032|0.032|0.033|0.99|0.98| |PatchNaNs::OCL_PatchNaNsFixture::(1280x720, 32FC4)|0.072|0.072|0.070|1.00|1.03| |PatchNaNs::OCL_PatchNaNsFixture::(1920x1080, 32FC1)|0.051|0.051|0.050|1.00|1.01| |PatchNaNs::OCL_PatchNaNsFixture::(1920x1080, 32FC4)|0.137|0.138|0.128|0.99|1.06| |PatchNaNs::OCL_PatchNaNsFixture::(3840x2160, 32FC1)|0.137|0.128|0.129|1.07|1.06| |PatchNaNs::OCL_PatchNaNsFixture::(3840x2160, 32FC4)|0.450|0.450|0.448|1.00|1.01| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC1)|0.149|0.029|0.020|5.13|7.44| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC2)|0.304|0.058|0.040|5.25|7.65| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC3)|0.448|0.086|0.059|5.22|7.55| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC4)|0.601|0.133|0.083|4.51|7.23| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC1)|0.451|0.093|0.060|4.83|7.52| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC2)|0.892|0.184|0.126|4.85|7.06| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC3)|1.345|0.311|0.230|4.32|5.84| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC4)|1.831|0.546|0.436|3.35|4.20| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC1)|1.017|0.250|0.160|4.06|6.35| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC2)|2.077|0.646|0.605|3.21|3.43| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC3)|3.134|1.053|0.961|2.97|3.26| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC4)|4.222|1.436|1.288|2.94|3.28| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC1)|4.225|1.401|1.277|3.01|3.31| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC2)|8.310|2.953|2.635|2.81|3.15| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC3)|12.396|4.455|4.252|2.78|2.92| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC4)|17.174|5.831|5.824|2.95|2.95| </details> ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
521 lines
12 KiB
C++
521 lines
12 KiB
C++
#include "perf_precomp.hpp"
|
|
#include <numeric>
|
|
#include "opencv2/core/softfloat.hpp"
|
|
|
|
namespace opencv_test
|
|
{
|
|
using namespace perf;
|
|
|
|
using BroadcastTest = perf::TestBaseWithParam<std::tuple<std::vector<int>, perf::MatType, std::vector<int>>>;
|
|
typedef Size_MatType BinaryOpTest;
|
|
|
|
PERF_TEST_P_(BroadcastTest, basic)
|
|
{
|
|
std::vector<int> shape_src = get<0>(GetParam());
|
|
int dt_type = get<1>(GetParam());
|
|
std::vector<int> shape_dst = get<2>(GetParam());
|
|
|
|
cv::Mat src(static_cast<int>(shape_src.size()), shape_src.data(), dt_type);
|
|
cv::Mat dst(static_cast<int>(shape_dst.size()), shape_dst.data(), dt_type);
|
|
|
|
cv::randu(src, -1.f, 1.f);
|
|
|
|
TEST_CYCLE() cv::broadcast(src, shape_dst, dst);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/*nothing*/ , BroadcastTest,
|
|
testing::Combine(
|
|
testing::Values(std::vector<int>{1, 100, 800},
|
|
std::vector<int>{10, 1, 800},
|
|
std::vector<int>{10, 100, 1}),
|
|
testing::Values(CV_32FC1),
|
|
testing::Values(std::vector<int>{10, 100, 800})
|
|
)
|
|
);
|
|
|
|
PERF_TEST_P_(BinaryOpTest, min)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Mat b = Mat(sz, type);
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
TEST_CYCLE() cv::min(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, minScalarDouble)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
TEST_CYCLE() cv::min(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, minScalarSameType)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) < CV_32S)
|
|
{
|
|
b = Scalar(1, 0, 3, 4); // don't pass non-integer values for 8U/8S/16U/16S processing
|
|
}
|
|
else if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
b = Scalar(1, 0, -3, 4); // don't pass non-integer values for 32S processing
|
|
}
|
|
|
|
TEST_CYCLE() cv::min(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, max)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Mat b = Mat(sz, type);
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
TEST_CYCLE() cv::max(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, maxScalarDouble)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
TEST_CYCLE() cv::max(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, maxScalarSameType)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) < CV_32S)
|
|
{
|
|
b = Scalar(1, 0, 3, 4); // don't pass non-integer values for 8U/8S/16U/16S processing
|
|
}
|
|
else if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
b = Scalar(1, 0, -3, 4); // don't pass non-integer values for 32S processing
|
|
}
|
|
|
|
TEST_CYCLE() cv::max(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, absdiff)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Mat b = Mat(sz, type);
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: absdiff can be without saturation on 32S
|
|
a /= 2;
|
|
b /= 2;
|
|
}
|
|
|
|
TEST_CYCLE() cv::absdiff(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, absdiffScalarDouble)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: absdiff can be without saturation on 32S
|
|
a /= 2;
|
|
b /= 2;
|
|
}
|
|
|
|
TEST_CYCLE() cv::absdiff(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, absdiffScalarSameType)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) < CV_32S)
|
|
{
|
|
b = Scalar(1, 0, 3, 4); // don't pass non-integer values for 8U/8S/16U/16S processing
|
|
}
|
|
else if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: absdiff can be without saturation on 32S
|
|
a /= 2;
|
|
b = Scalar(1, 0, -3, 4); // don't pass non-integer values for 32S processing
|
|
}
|
|
|
|
TEST_CYCLE() cv::absdiff(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, add)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Mat b = Mat(sz, type);
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
declare.time(50);
|
|
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: add can be without saturation on 32S
|
|
a /= 2;
|
|
b /= 2;
|
|
}
|
|
|
|
TEST_CYCLE() cv::add(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, addScalarDouble)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: add can be without saturation on 32S
|
|
a /= 2;
|
|
b /= 2;
|
|
}
|
|
|
|
TEST_CYCLE() cv::add(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, addScalarSameType)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) < CV_32S)
|
|
{
|
|
b = Scalar(1, 0, 3, 4); // don't pass non-integer values for 8U/8S/16U/16S processing
|
|
}
|
|
else if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: add can be without saturation on 32S
|
|
a /= 2;
|
|
b = Scalar(1, 0, -3, 4); // don't pass non-integer values for 32S processing
|
|
}
|
|
|
|
TEST_CYCLE() cv::add(a, b, c, noArray(), type);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, subtract)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Mat b = Mat(sz, type);
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: subtract can be without saturation on 32S
|
|
a /= 2;
|
|
b /= 2;
|
|
}
|
|
|
|
TEST_CYCLE() cv::subtract(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, subtractScalarDouble)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: subtract can be without saturation on 32S
|
|
a /= 2;
|
|
b /= 2;
|
|
}
|
|
|
|
TEST_CYCLE() cv::subtract(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, subtractScalarSameType)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type);
|
|
cv::Scalar b;
|
|
cv::Mat c = Mat(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) < CV_32S)
|
|
{
|
|
b = Scalar(1, 0, 3, 4); // don't pass non-integer values for 8U/8S/16U/16S processing
|
|
}
|
|
else if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//see ticket 1529: subtract can be without saturation on 32S
|
|
a /= 2;
|
|
b = Scalar(1, 0, -3, 4); // don't pass non-integer values for 32S processing
|
|
}
|
|
|
|
TEST_CYCLE() cv::subtract(a, b, c, noArray(), type);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, multiply)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a(sz, type), b(sz, type), c(sz, type);
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//According to docs, saturation is not applied when result is 32bit integer
|
|
a /= (2 << 16);
|
|
b /= (2 << 16);
|
|
}
|
|
|
|
TEST_CYCLE() cv::multiply(a, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, multiplyScale)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a(sz, type), b(sz, type), c(sz, type);
|
|
double scale = 0.5;
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
if (CV_MAT_DEPTH(type) == CV_32S)
|
|
{
|
|
//According to docs, saturation is not applied when result is 32bit integer
|
|
a /= (2 << 16);
|
|
b /= (2 << 16);
|
|
}
|
|
|
|
TEST_CYCLE() cv::multiply(a, b, c, scale);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, divide)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a(sz, type), b(sz, type), c(sz, type);
|
|
double scale = 0.5;
|
|
|
|
declare.in(a, b, WARMUP_RNG).out(c);
|
|
|
|
TEST_CYCLE() cv::divide(a, b, c, scale);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P_(BinaryOpTest, reciprocal)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat b(sz, type), c(sz, type);
|
|
double scale = 0.5;
|
|
|
|
declare.in(b, WARMUP_RNG).out(c);
|
|
|
|
TEST_CYCLE() cv::divide(scale, b, c);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
|
|
PERF_TEST_P_(BinaryOpTest, transposeND)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int type = get<1>(GetParam());
|
|
cv::Mat a = Mat(sz, type).reshape(1);
|
|
|
|
std::vector<int> order(a.dims);
|
|
std::iota(order.begin(), order.end(), 0);
|
|
std::reverse(order.begin(), order.end());
|
|
|
|
std::vector<int> new_sz(a.dims);
|
|
std::copy(a.size.p, a.size.p + a.dims, new_sz.begin());
|
|
std::reverse(new_sz.begin(), new_sz.end());
|
|
cv::Mat b = Mat(new_sz, type);
|
|
|
|
declare.in(a,WARMUP_RNG).out(b);
|
|
|
|
TEST_CYCLE() cv::transposeND(a, order, b);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest,
|
|
testing::Combine(
|
|
testing::Values(szVGA, sz720p, sz1080p),
|
|
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_8SC1, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, CV_32SC1, CV_32FC1)
|
|
)
|
|
);
|
|
|
|
///////////// PatchNaNs ////////////////////////
|
|
|
|
template<typename _Tp>
|
|
_Tp randomNan(RNG& rng);
|
|
|
|
template<>
|
|
float randomNan(RNG& rng)
|
|
{
|
|
uint32_t r = rng.next();
|
|
Cv32suf v;
|
|
v.u = r;
|
|
// exp & set a bit to avoid zero mantissa
|
|
v.u = v.u | 0x7f800001;
|
|
return v.f;
|
|
}
|
|
|
|
template<>
|
|
double randomNan(RNG& rng)
|
|
{
|
|
uint32_t r0 = rng.next();
|
|
uint32_t r1 = rng.next();
|
|
Cv64suf v;
|
|
v.u = (uint64_t(r0) << 32) | uint64_t(r1);
|
|
// exp &set a bit to avoid zero mantissa
|
|
v.u = v.u | 0x7ff0000000000001;
|
|
return v.f;
|
|
}
|
|
|
|
typedef Size_MatType PatchNaNsFixture;
|
|
|
|
PERF_TEST_P_(PatchNaNsFixture, PatchNaNs)
|
|
{
|
|
const Size_MatType_t params = GetParam();
|
|
Size srcSize = get<0>(params);
|
|
const int type = get<1>(params), cn = CV_MAT_CN(type);
|
|
|
|
Mat src(srcSize, type);
|
|
declare.in(src, WARMUP_RNG).out(src);
|
|
|
|
// generating NaNs
|
|
{
|
|
srcSize.width *= cn;
|
|
RNG& rng = theRNG();
|
|
for (int y = 0; y < srcSize.height; ++y)
|
|
{
|
|
float *const ptrf = src.ptr<float>(y);
|
|
for (int x = 0; x < srcSize.width; ++x)
|
|
{
|
|
ptrf[x] = (x + y) % 2 == 0 ? randomNan<float >(rng) : ptrf[x];
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CYCLE() cv::patchNaNs(src, 17.7);
|
|
|
|
SANITY_CHECK(src);
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/*nothing*/ , PatchNaNsFixture,
|
|
testing::Combine(
|
|
testing::Values(szVGA, sz720p, sz1080p, sz2160p),
|
|
testing::Values(CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4)
|
|
)
|
|
);
|
|
|
|
} // namespace
|