diff --git a/modules/core/include/opencv2/core/utils/fp_control.private.hpp b/modules/core/include/opencv2/core/utils/fp_control.private.hpp new file mode 100644 index 0000000000..12ee363dd8 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/fp_control.private.hpp @@ -0,0 +1,29 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP +#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP + +#include "fp_control_utils.hpp" + +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0 + // disabled +#elif defined(OPENCV_IMPL_FP_HINTS) + // custom +#elif defined(OPENCV_IMPL_FP_HINTS_X86) + // custom +#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + #include + #define OPENCV_IMPL_FP_HINTS_X86 1 + #define OPENCV_IMPL_FP_HINTS 1 +#endif + +#ifndef OPENCV_IMPL_FP_HINTS +#define OPENCV_IMPL_FP_HINTS 0 +#endif +#ifndef OPENCV_IMPL_FP_HINTS_X86 +#define OPENCV_IMPL_FP_HINTS_X86 0 +#endif + +#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP diff --git a/modules/core/include/opencv2/core/utils/fp_control_utils.hpp b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp new file mode 100644 index 0000000000..930bc5d367 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP +#define OPENCV_CORE_FP_CONTROL_UTILS_HPP + +namespace cv { + +namespace details { + +struct FPDenormalsModeState +{ + uint32_t reserved[16]; // 64-bytes +}; // FPDenormalsModeState + +CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state); +CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state); +CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state); + +class FPDenormalsIgnoreHintScope +{ +public: + inline explicit FPDenormalsIgnoreHintScope(bool ignore = true) + { + details::setFPDenormalsIgnoreHint(ignore, saved_state); + } + + inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state) + { + details::saveFPDenormalsState(saved_state); + details::restoreFPDenormalsState(state); + } + + inline ~FPDenormalsIgnoreHintScope() + { + details::restoreFPDenormalsState(saved_state); + } + +protected: + FPDenormalsModeState saved_state; +}; // FPDenormalsIgnoreHintScope + +class FPDenormalsIgnoreHintScopeNOOP +{ +public: + inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); } + inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); } + inline ~FPDenormalsIgnoreHintScopeNOOP() { } +}; // FPDenormalsIgnoreHintScopeNOOP + +} // namespace details + + +// Should depend on target compilation architecture only +// Note: previously added archs should NOT be removed to preserve ABI compatibility +#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT) + // preserve configuration overloading through ports +#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_) +typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope; +#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1 +#else +#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0 +typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope; +#endif + +} // namespace cv + +#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index d1454457b7..95df5454c2 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -142,6 +142,9 @@ #include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available +#include +#include + using namespace cv; namespace cv { @@ -191,6 +194,9 @@ namespace { // propagate main thread state rng = cv::theRNG(); +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS + details::saveFPDenormalsState(fp_denormals_base_state); +#endif #ifdef OPENCV_TRACE traceRootRegion = CV_TRACE_NS::details::getCurrentRegion(); @@ -271,6 +277,11 @@ namespace { } } } + +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS + details::FPDenormalsModeState fp_denormals_base_state; +#endif + private: ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled @@ -307,6 +318,9 @@ namespace { // propagate main thread state cv::theRNG() = ctx.rng; +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS + FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state); +#endif cv::Range r; cv::Range wholeRange = ctx.wholeRange; diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index e8c5c20d89..9bade08177 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -53,6 +53,9 @@ #include #include +#include +#include + #ifndef OPENCV_WITH_THREAD_SANITIZER #if defined(__clang__) && defined(__has_feature) #if __has_feature(thread_sanitizer) @@ -2733,6 +2736,82 @@ void setUseIPP_NE(bool flag) } // namespace ipp + +namespace details { + +#if OPENCV_IMPL_FP_HINTS_X86 +#ifndef _MM_DENORMALS_ZERO_ON // requires pmmintrin.h (SSE3) +#define _MM_DENORMALS_ZERO_ON 0x0040 +#endif +#ifndef _MM_DENORMALS_ZERO_MASK // requires pmmintrin.h (SSE3) +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif +#endif + +void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state) +{ +#if OPENCV_IMPL_FP_HINTS_X86 + unsigned mask = _MM_FLUSH_ZERO_MASK; + unsigned value = ignore ? _MM_FLUSH_ZERO_ON : 0; + if (featuresEnabled.have[CPU_SSE3]) + { + mask |= _MM_DENORMALS_ZERO_MASK; + value |= ignore ? _MM_DENORMALS_ZERO_ON : 0; + } + const unsigned old_flags = _mm_getcsr(); + const unsigned old_value = old_flags & mask; + unsigned flags = (old_flags & ~mask) | value; + CV_LOG_DEBUG(NULL, "core: update FP mxcsr flags = " << cv::format("0x%08x", flags)); + // save state + state.reserved[0] = (uint32_t)mask; + state.reserved[1] = (uint32_t)old_value; + _mm_setcsr(flags); +#else + CV_UNUSED(ignore); CV_UNUSED(state); +#endif +} + +int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state) +{ +#if OPENCV_IMPL_FP_HINTS_X86 + unsigned mask = _MM_FLUSH_ZERO_MASK; + if (featuresEnabled.have[CPU_SSE3]) + { + mask |= _MM_DENORMALS_ZERO_MASK; + } + const unsigned old_flags = _mm_getcsr(); + const unsigned old_value = old_flags & mask; + // save state + state.reserved[0] = (uint32_t)mask; + state.reserved[1] = (uint32_t)old_value; + return 2; +#else + CV_UNUSED(state); + return 0; +#endif +} + +bool restoreFPDenormalsState(const FPDenormalsModeState& state) +{ +#if OPENCV_IMPL_FP_HINTS_X86 + const unsigned mask = (unsigned)state.reserved[0]; + CV_DbgAssert(mask != 0); // invalid state (ensure that state is properly saved earlier) + const unsigned value = (unsigned)state.reserved[1]; + CV_DbgCheck((int)value, value == (value & mask), "invalid SSE FP state"); + const unsigned old_flags = _mm_getcsr(); + unsigned flags = (old_flags & ~mask) | value; + CV_LOG_DEBUG(NULL, "core: restore FP mxcsr flags = " << cv::format("0x%08x", flags)); + _mm_setcsr(flags); + return true; +#else + CV_UNUSED(state); + return false; +#endif +} + +} // namespace details + + } // namespace cv #ifdef HAVE_TEGRA_OPTIMIZATION diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index d9e9119230..e00544fdf4 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -3,6 +3,15 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" +#include "opencv2/core/utils/logger.hpp" + +#include + +#ifdef CV_CXX11 +#include +#include +#endif + namespace opencv_test { namespace { TEST(Core_OutputArrayCreate, _1997) @@ -242,6 +251,62 @@ TEST(Core_Parallel, propagate_exceptions) }, cv::Exception); } +class FPDenormalsHintCheckerParallelLoopBody : public cv::ParallelLoopBody +{ +public: + FPDenormalsHintCheckerParallelLoopBody() + : isOK(true) + { + state_values_to_check = cv::details::saveFPDenormalsState(base_state); + } + ~FPDenormalsHintCheckerParallelLoopBody() {} + void operator()(const cv::Range& r) const + { + CV_UNUSED(r); + cv::details::FPDenormalsModeState state; + if (cv::details::saveFPDenormalsState(state)) + { + for (int i = 0; i < state_values_to_check; ++i) + { + if (base_state.reserved[i] != state.reserved[i]) + { + CV_LOG_ERROR(NULL, cv::format("FP state[%d] mismatch: base=0x%08x thread=0x%08x", i, base_state.reserved[i], state.reserved[i])); + isOK = false; + cv::details::restoreFPDenormalsState(base_state); + } + } + } + else + { + // FP state is not supported + // no checks + } +#ifdef CV_CXX11 + std::this_thread::sleep_for(std::chrono::milliseconds(100)); +#endif + } + + cv::details::FPDenormalsModeState base_state; + int state_values_to_check; + + mutable bool isOK; +}; + +TEST(Core_Parallel, propagate_fp_denormals_ignore_hint) +{ + int nThreads = std::max(1, cv::getNumThreads()) * 3; + for (int i = 0; i < 4; ++i) + { + SCOPED_TRACE(cv::format("Case=%d: FP denormals ignore hint: %s\n", i, ((i & 1) != 0) ? "enable" : "disable")); + FPDenormalsIgnoreHintScope fp_denormals_scope((i & 1) != 0); + FPDenormalsHintCheckerParallelLoopBody job; + ASSERT_NO_THROW({ + parallel_for_(cv::Range(0, nThreads), job); + }); + EXPECT_TRUE(job.isOK); + } +} + TEST(Core_Version, consistency) { // this test verifies that OpenCV version loaded in runtime