diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 3cd5901af4..bd5de32d8d 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -1697,7 +1697,7 @@ elements. CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0, double minVal = -DBL_MAX, double maxVal = DBL_MAX); -/** @brief converts NaNs to the given number +/** @brief Replaces NaNs by given number @param a input/output matrix (CV_32F type). @param val value to convert the NaNs */ diff --git a/modules/core/perf/perf_arithm.cpp b/modules/core/perf/perf_arithm.cpp index 872963fc65..c4cc7500a7 100644 --- a/modules/core/perf/perf_arithm.cpp +++ b/modules/core/perf/perf_arithm.cpp @@ -1,5 +1,6 @@ #include "perf_precomp.hpp" #include +#include "opencv2/core/softfloat.hpp" namespace opencv_test { @@ -451,4 +452,69 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest, ) ); +///////////// PatchNaNs //////////////////////// + +template +_Tp randomNan(RNG& rng); + +template<> +float randomNan(RNG& rng) +{ + uint32_t r = rng.next(); + Cv32suf v; + v.u = r; + // exp & set a bit to avoid zero mantissa + v.u = v.u | 0x7f800001; + return v.f; +} + +template<> +double randomNan(RNG& rng) +{ + uint32_t r0 = rng.next(); + uint32_t r1 = rng.next(); + Cv64suf v; + v.u = (uint64_t(r0) << 32) | uint64_t(r1); + // exp &set a bit to avoid zero mantissa + v.u = v.u | 0x7ff0000000000001; + return v.f; +} + +typedef Size_MatType PatchNaNsFixture; + +PERF_TEST_P_(PatchNaNsFixture, PatchNaNs) +{ + const Size_MatType_t params = GetParam(); + Size srcSize = get<0>(params); + const int type = get<1>(params), cn = CV_MAT_CN(type); + + Mat src(srcSize, type); + declare.in(src, WARMUP_RNG).out(src); + + // generating NaNs + { + srcSize.width *= cn; + RNG& rng = theRNG(); + for (int y = 0; y < srcSize.height; ++y) + { + float *const ptrf = src.ptr(y); + for (int x = 0; x < srcSize.width; ++x) + { + ptrf[x] = (x + y) % 2 == 0 ? randomNan(rng) : ptrf[x]; + } + } + } + + TEST_CYCLE() cv::patchNaNs(src, 17.7); + + SANITY_CHECK(src); +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/ , PatchNaNsFixture, + testing::Combine( + testing::Values(szVGA, sz720p, sz1080p, sz2160p), + testing::Values(CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4) + ) +); + } // namespace diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 9e3a1dbad2..525d71ba09 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -1610,30 +1610,37 @@ void patchNaNs( InputOutputArray _a, double _val ) const Mat* arrays[] = {&a, 0}; int* ptrs[1] = {}; NAryMatIterator it(arrays, (uchar**)ptrs); - size_t len = it.size*a.channels(); + int len = (int)(it.size*a.channels()); Cv32suf val; val.f = (float)_val; -#if (CV_SIMD || CV_SIMD_SCALABLE) - v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000); - v_int32 v_val = vx_setall_s32(val.i); -#endif - for( size_t i = 0; i < it.nplanes; i++, ++it ) { int* tptr = ptrs[0]; - size_t j = 0; + int j = 0; #if (CV_SIMD || CV_SIMD_SCALABLE) - size_t cWidth = (size_t)VTraits::vlanes(); - for ( ; j + cWidth <= len; j += cWidth) + v_int32 v_pos_mask = vx_setall_s32(0x7fffffff), v_exp_mask = vx_setall_s32(0x7f800000); + v_int32 v_val = vx_setall_s32(val.i); + + int cWidth = VTraits::vlanes(); + for (; j < len - cWidth * 2 + 1; j += cWidth * 2) { - v_int32 v_src = vx_load(tptr + j); - v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1)); - v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src); - v_store(tptr + j, v_dst); + v_int32 v_src0 = vx_load(tptr + j); + v_int32 v_src1 = vx_load(tptr + j + cWidth); + + v_int32 v_cmp_mask0 = v_lt(v_exp_mask, v_and(v_src0, v_pos_mask)); + v_int32 v_cmp_mask1 = v_lt(v_exp_mask, v_and(v_src1, v_pos_mask)); + + if (v_check_any(v_or(v_cmp_mask0, v_cmp_mask1))) + { + v_int32 v_dst0 = v_select(v_cmp_mask0, v_val, v_src0); + v_int32 v_dst1 = v_select(v_cmp_mask1, v_val, v_src1); + + v_store(tptr + j, v_dst0); + v_store(tptr + j + cWidth, v_dst1); + } } - vx_cleanup(); #endif for( ; j < len; j++ )