diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index 3cd5901af4..bd5de32d8d 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -1697,7 +1697,7 @@ elements.
 CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0,
                             double minVal = -DBL_MAX, double maxVal = DBL_MAX);
 
-/** @brief converts NaNs to the given number
+/** @brief Replaces NaNs by given number
 @param a input/output matrix (CV_32F type).
 @param val value to convert the NaNs
 */
diff --git a/modules/core/perf/perf_arithm.cpp b/modules/core/perf/perf_arithm.cpp
index 872963fc65..c4cc7500a7 100644
--- a/modules/core/perf/perf_arithm.cpp
+++ b/modules/core/perf/perf_arithm.cpp
@@ -1,5 +1,6 @@
 #include "perf_precomp.hpp"
 #include <numeric>
+#include "opencv2/core/softfloat.hpp"
 
 namespace opencv_test
 {
@@ -451,4 +452,69 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest,
     )
 );
 
+///////////// PatchNaNs ////////////////////////
+
+template<typename _Tp>
+_Tp randomNan(RNG& rng);
+
+template<>
+float randomNan(RNG& rng)
+{
+    uint32_t r = rng.next();
+    Cv32suf v;
+    v.u = r;
+    // exp & set a bit to avoid zero mantissa
+    v.u = v.u | 0x7f800001;
+    return v.f;
+}
+
+template<>
+double randomNan(RNG& rng)
+{
+    uint32_t r0 = rng.next();
+    uint32_t r1 = rng.next();
+    Cv64suf v;
+    v.u = (uint64_t(r0) << 32) | uint64_t(r1);
+    // exp &set a bit to avoid zero mantissa
+    v.u = v.u | 0x7ff0000000000001;
+    return v.f;
+}
+
+typedef Size_MatType PatchNaNsFixture;
+
+PERF_TEST_P_(PatchNaNsFixture, PatchNaNs)
+{
+    const Size_MatType_t params = GetParam();
+    Size srcSize = get<0>(params);
+    const int type = get<1>(params), cn = CV_MAT_CN(type);
+
+    Mat src(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(src);
+
+    // generating NaNs
+    {
+        srcSize.width *= cn;
+        RNG& rng = theRNG();
+        for (int y = 0; y < srcSize.height; ++y)
+        {
+            float  *const ptrf = src.ptr<float>(y);
+            for (int x = 0; x < srcSize.width; ++x)
+            {
+                ptrf[x] = (x + y) % 2 == 0 ? randomNan<float >(rng) : ptrf[x];
+            }
+        }
+    }
+
+    TEST_CYCLE() cv::patchNaNs(src, 17.7);
+
+    SANITY_CHECK(src);
+}
+
+INSTANTIATE_TEST_CASE_P(/*nothing*/ , PatchNaNsFixture,
+    testing::Combine(
+        testing::Values(szVGA, sz720p, sz1080p, sz2160p),
+        testing::Values(CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4)
+    )
+);
+
 } // namespace
diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp
index 9e3a1dbad2..525d71ba09 100644
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@@ -1610,30 +1610,37 @@ void patchNaNs( InputOutputArray _a, double _val )
     const Mat* arrays[] = {&a, 0};
     int* ptrs[1] = {};
     NAryMatIterator it(arrays, (uchar**)ptrs);
-    size_t len = it.size*a.channels();
+    int len = (int)(it.size*a.channels());
     Cv32suf val;
     val.f = (float)_val;
 
-#if (CV_SIMD || CV_SIMD_SCALABLE)
-    v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000);
-    v_int32 v_val = vx_setall_s32(val.i);
-#endif
-
     for( size_t i = 0; i < it.nplanes; i++, ++it )
     {
         int* tptr = ptrs[0];
-        size_t j = 0;
+        int j = 0;
 
 #if (CV_SIMD || CV_SIMD_SCALABLE)
-        size_t cWidth = (size_t)VTraits<v_int32>::vlanes();
-        for ( ; j + cWidth <= len; j += cWidth)
+        v_int32 v_pos_mask = vx_setall_s32(0x7fffffff), v_exp_mask = vx_setall_s32(0x7f800000);
+        v_int32 v_val = vx_setall_s32(val.i);
+
+        int cWidth = VTraits<v_int32>::vlanes();
+        for (; j < len - cWidth * 2 + 1; j += cWidth * 2)
         {
-            v_int32 v_src = vx_load(tptr + j);
-            v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1));
-            v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src);
-            v_store(tptr + j, v_dst);
+            v_int32 v_src0 = vx_load(tptr + j);
+            v_int32 v_src1 = vx_load(tptr + j + cWidth);
+
+            v_int32 v_cmp_mask0 = v_lt(v_exp_mask, v_and(v_src0, v_pos_mask));
+            v_int32 v_cmp_mask1 = v_lt(v_exp_mask, v_and(v_src1, v_pos_mask));
+
+            if (v_check_any(v_or(v_cmp_mask0, v_cmp_mask1)))
+            {
+                v_int32 v_dst0 = v_select(v_cmp_mask0, v_val, v_src0);
+                v_int32 v_dst1 = v_select(v_cmp_mask1, v_val, v_src1);
+
+                v_store(tptr + j, v_dst0);
+                v_store(tptr + j + cWidth, v_dst1);
+            }
         }
-        vx_cleanup();
 #endif
 
         for( ; j < len; j++ )