diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp
index b1e8c4202d..0e8187cac0 100644
--- a/modules/core/include/opencv2/core/fast_math.hpp
+++ b/modules/core/include/opencv2/core/fast_math.hpp
@@ -47,12 +47,6 @@
 
 #include "opencv2/core/cvdef.h"
 
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
-    && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
-#include <emmintrin.h>
-#endif
-
-
 //! @addtogroup core_utils
 //! @{
 
@@ -70,19 +64,24 @@
 #  endif
 #endif
 
-#ifdef HAVE_TEGRA_OPTIMIZATION
-#  include "tegra_round.hpp"
-#endif
+#if defined(__CUDACC__)
+  // nothing, intrinsics/asm code is not supported
+#else
+  #if ((defined _MSC_VER && defined _M_X64) \
+      || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
+      && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
+    #include <emmintrin.h>
+  #endif
 
-#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__)
-#  include <altivec.h>
-#endif
+  #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
+      && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
+    #include <altivec.h>
+  #endif
 
-#if ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
-        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
-    #define CV_INLINE_ROUND_DBL(value) TEGRA_ROUND_DBL(value);
-    #define CV_INLINE_ROUND_FLT(value) TEGRA_ROUND_FLT(value);
-#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
+  #if defined(CV_INLINE_ROUND_FLT)
+    // user-specified version
+    // CV_INLINE_ROUND_DBL should be defined too
+  #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
     // 1. general scheme
     #define ARM_ROUND(_value, _asm_string) \
         int res; \
@@ -98,7 +97,7 @@
     #endif
     // 3. version for float
     #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
-#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__)
+  #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
     // P8 and newer machines can convert fp32/64 to int quickly.
     #define CV_INLINE_ROUND_DBL(value) \
         int out; \
@@ -108,40 +107,86 @@
 
     // FP32 also works with FP64 routine above
     #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
+  #endif
 
-    #ifdef _ARCH_PWR9
-        #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
-        #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
-        #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
-        #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
+  #ifdef CV_INLINE_ISINF_FLT
+    // user-specified version
+    // CV_INLINE_ISINF_DBL should be defined too
+  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
+    #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
+    #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
+  #endif
+
+  #ifdef CV_INLINE_ISNAN_FLT
+    // user-specified version
+    // CV_INLINE_ISNAN_DBL should be defined too
+  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
+    #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
+    #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
+  #endif
+
+  #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
+    && ( \
+        defined(__x86_64__) || defined(__i686__) \
+        || defined(__arm__) \
+        || defined(__PPC64__) \
+    )
+    /* Let builtin C math functions when available. Dedicated hardware is available to
+       round and convert FP values. */
+    #define OPENCV_USE_FASTMATH_BUILTINS 1
+  #endif
+
+  /* Enable builtin math functions if possible, desired, and available.
+     Note, not all math functions inline equally. E.g lrint will not inline
+     without the -fno-math-errno option. */
+  #if defined(CV_ICC)
+    // nothing
+  #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
+    #if defined(__clang__)
+      #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
+      #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
+        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
+        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
+        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
+        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
+      #endif
+    #elif defined(__GNUC__)
+      #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
+      #if !defined(CV_INLINE_ISNAN_DBL)
+        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT)
+        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL)
+        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT)
+        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
+      #endif
+    #elif defined(_MSC_VER)
+      #if !defined(CV_INLINE_ISNAN_DBL)
+        #define CV_INLINE_ISNAN_DBL(value) return isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT)
+        #define CV_INLINE_ISNAN_FLT(value) return isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL)
+        #define CV_INLINE_ISINF_DBL(value) return isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT)
+        #define CV_INLINE_ISINF_FLT(value) return isinf(value);
+      #endif
     #endif
-#elif defined CV_ICC || defined __GNUC__
-    #define CV_INLINE_ROUND_DBL(value) return (int)(lrint(value));
-    #define CV_INLINE_ROUND_FLT(value) return (int)(lrintf(value));
-#endif
+  #endif
 
-#if defined __PPC64__ && !defined OPENCV_USE_FASTMATH_GCC_BUILTINS
-    /* Let GCC inline C math functions when available. Dedicated hardware is available to
-       round and covert FP values. */
-    #define OPENCV_USE_FASTMATH_GCC_BUILTINS
-#endif
-
-/* Enable GCC builtin math functions if possible, desired, and available.
-   Note, not all math functions inline equally. E.g lrint will not inline
-   without the -fno-math-errno option. */
-#if defined OPENCV_USE_FASTMATH_GCC_BUILTINS && defined __GNUC__ && !defined __clang__ && !defined (__CUDACC__)
-    #define _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
-#endif
-
-/* Allow overrides for some functions which may benefit from tuning. Likewise,
-   note that isinf is not used as the return value is signed. */
-#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_DBL
-    #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
-#endif
-
-#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_FLT
-    #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
-#endif
+#endif // defined(__CUDACC__)
 
 /** @brief Rounds floating-point number to the nearest integer
 
@@ -151,8 +196,11 @@
 CV_INLINE int
 cvRound( double value )
 {
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
-    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
+#if defined CV_INLINE_ROUND_DBL
+    CV_INLINE_ROUND_DBL(value);
+#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
+    && !defined(__CUDACC__)
     __m128d t = _mm_set_sd( value );
     return _mm_cvtsd_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
@@ -163,8 +211,8 @@ cvRound( double value )
         fistp t;
     }
     return t;
-#elif defined CV_INLINE_ROUND_DBL
-    CV_INLINE_ROUND_DBL(value);
+#elif defined CV_ICC || defined __GNUC__
+    return (int)(lrint(value));
 #else
     /* it's ok if round does not comply with IEEE754 standard;
        the tests should allow +/-1 difference when the tested functions use round */
@@ -182,7 +230,10 @@ cvRound( double value )
  */
 CV_INLINE int cvFloor( double value )
 {
-#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
     return __builtin_floor(value);
 #else
     int i = (int)value;
@@ -199,7 +250,10 @@ CV_INLINE int cvFloor( double value )
  */
 CV_INLINE int cvCeil( double value )
 {
-#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
     return __builtin_ceil(value);
 #else
     int i = (int)value;
@@ -235,6 +289,11 @@ CV_INLINE int cvIsInf( double value )
 {
 #if defined CV_INLINE_ISINF_DBL
     CV_INLINE_ISINF_DBL(value);
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(__PPC64__)
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff00000000) ==
+                        0x7ff0000000000000;
 #else
     Cv64suf ieee754;
     ieee754.f = value;
@@ -248,8 +307,11 @@ CV_INLINE int cvIsInf( double value )
 /** @overload */
 CV_INLINE int cvRound(float value)
 {
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
-    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
+#if defined CV_INLINE_ROUND_FLT
+    CV_INLINE_ROUND_FLT(value);
+#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
+    && !defined(__CUDACC__)
     __m128 t = _mm_set_ss( value );
     return _mm_cvtss_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
@@ -260,8 +322,8 @@ CV_INLINE int cvRound(float value)
         fistp t;
     }
     return t;
-#elif defined CV_INLINE_ROUND_FLT
-    CV_INLINE_ROUND_FLT(value);
+#elif defined CV_ICC || defined __GNUC__
+    return (int)(lrintf(value));
 #else
     /* it's ok if round does not comply with IEEE754 standard;
      the tests should allow +/-1 difference when the tested functions use round */
@@ -278,7 +340,10 @@ CV_INLINE int cvRound( int value )
 /** @overload */
 CV_INLINE int cvFloor( float value )
 {
-#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
     return __builtin_floorf(value);
 #else
     int i = (int)value;
@@ -295,7 +360,10 @@ CV_INLINE int cvFloor( int value )
 /** @overload */
 CV_INLINE int cvCeil( float value )
 {
-#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
     return __builtin_ceilf(value);
 #else
     int i = (int)value;