mirror of
https://github.com/opencv/opencv.git
synced 2025-07-31 01:47:12 +08:00
Merge pull request #21506 from alalek:core_fp_denormals
This commit is contained in:
commit
83ce1de8e7
@ -0,0 +1,29 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
|
||||
#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
|
||||
|
||||
#include "fp_control_utils.hpp"
|
||||
|
||||
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0
|
||||
// disabled
|
||||
#elif defined(OPENCV_IMPL_FP_HINTS)
|
||||
// custom
|
||||
#elif defined(OPENCV_IMPL_FP_HINTS_X86)
|
||||
// custom
|
||||
#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
|
||||
#include <xmmintrin.h>
|
||||
#define OPENCV_IMPL_FP_HINTS_X86 1
|
||||
#define OPENCV_IMPL_FP_HINTS 1
|
||||
#endif
|
||||
|
||||
#ifndef OPENCV_IMPL_FP_HINTS
|
||||
#define OPENCV_IMPL_FP_HINTS 0
|
||||
#endif
|
||||
#ifndef OPENCV_IMPL_FP_HINTS_X86
|
||||
#define OPENCV_IMPL_FP_HINTS_X86 0
|
||||
#endif
|
||||
|
||||
#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
|
69
modules/core/include/opencv2/core/utils/fp_control_utils.hpp
Normal file
69
modules/core/include/opencv2/core/utils/fp_control_utils.hpp
Normal file
@ -0,0 +1,69 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP
|
||||
#define OPENCV_CORE_FP_CONTROL_UTILS_HPP
|
||||
|
||||
namespace cv {
|
||||
|
||||
namespace details {
|
||||
|
||||
struct FPDenormalsModeState
|
||||
{
|
||||
uint32_t reserved[16]; // 64-bytes
|
||||
}; // FPDenormalsModeState
|
||||
|
||||
CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state);
|
||||
CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state);
|
||||
CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state);
|
||||
|
||||
class FPDenormalsIgnoreHintScope
|
||||
{
|
||||
public:
|
||||
inline explicit FPDenormalsIgnoreHintScope(bool ignore = true)
|
||||
{
|
||||
details::setFPDenormalsIgnoreHint(ignore, saved_state);
|
||||
}
|
||||
|
||||
inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state)
|
||||
{
|
||||
details::saveFPDenormalsState(saved_state);
|
||||
details::restoreFPDenormalsState(state);
|
||||
}
|
||||
|
||||
inline ~FPDenormalsIgnoreHintScope()
|
||||
{
|
||||
details::restoreFPDenormalsState(saved_state);
|
||||
}
|
||||
|
||||
protected:
|
||||
FPDenormalsModeState saved_state;
|
||||
}; // FPDenormalsIgnoreHintScope
|
||||
|
||||
class FPDenormalsIgnoreHintScopeNOOP
|
||||
{
|
||||
public:
|
||||
inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); }
|
||||
inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); }
|
||||
inline ~FPDenormalsIgnoreHintScopeNOOP() { }
|
||||
}; // FPDenormalsIgnoreHintScopeNOOP
|
||||
|
||||
} // namespace details
|
||||
|
||||
|
||||
// Should depend on target compilation architecture only
|
||||
// Note: previously added archs should NOT be removed to preserve ABI compatibility
|
||||
#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT)
|
||||
// preserve configuration overloading through ports
|
||||
#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_)
|
||||
typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope;
|
||||
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1
|
||||
#else
|
||||
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0
|
||||
typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope;
|
||||
#endif
|
||||
|
||||
} // namespace cv
|
||||
|
||||
#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP
|
@ -142,6 +142,9 @@
|
||||
|
||||
#include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
|
||||
|
||||
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||
#include <opencv2/core/utils/fp_control.private.hpp>
|
||||
|
||||
using namespace cv;
|
||||
|
||||
namespace cv {
|
||||
@ -191,6 +194,9 @@ namespace {
|
||||
|
||||
// propagate main thread state
|
||||
rng = cv::theRNG();
|
||||
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
|
||||
details::saveFPDenormalsState(fp_denormals_base_state);
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_TRACE
|
||||
traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
|
||||
@ -271,6 +277,11 @@ namespace {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
|
||||
details::FPDenormalsModeState fp_denormals_base_state;
|
||||
#endif
|
||||
|
||||
private:
|
||||
ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
|
||||
ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
|
||||
@ -307,6 +318,9 @@ namespace {
|
||||
|
||||
// propagate main thread state
|
||||
cv::theRNG() = ctx.rng;
|
||||
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
|
||||
FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state);
|
||||
#endif
|
||||
|
||||
cv::Range r;
|
||||
cv::Range wholeRange = ctx.wholeRange;
|
||||
|
@ -53,6 +53,9 @@
|
||||
#include <opencv2/core/utils/tls.hpp>
|
||||
#include <opencv2/core/utils/instrumentation.hpp>
|
||||
|
||||
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||
#include <opencv2/core/utils/fp_control.private.hpp>
|
||||
|
||||
#ifndef OPENCV_WITH_THREAD_SANITIZER
|
||||
#if defined(__clang__) && defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
@ -2733,6 +2736,82 @@ void setUseIPP_NE(bool flag)
|
||||
|
||||
} // namespace ipp
|
||||
|
||||
|
||||
namespace details {
|
||||
|
||||
#if OPENCV_IMPL_FP_HINTS_X86
|
||||
#ifndef _MM_DENORMALS_ZERO_ON // requires pmmintrin.h (SSE3)
|
||||
#define _MM_DENORMALS_ZERO_ON 0x0040
|
||||
#endif
|
||||
#ifndef _MM_DENORMALS_ZERO_MASK // requires pmmintrin.h (SSE3)
|
||||
#define _MM_DENORMALS_ZERO_MASK 0x0040
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state)
|
||||
{
|
||||
#if OPENCV_IMPL_FP_HINTS_X86
|
||||
unsigned mask = _MM_FLUSH_ZERO_MASK;
|
||||
unsigned value = ignore ? _MM_FLUSH_ZERO_ON : 0;
|
||||
if (featuresEnabled.have[CPU_SSE3])
|
||||
{
|
||||
mask |= _MM_DENORMALS_ZERO_MASK;
|
||||
value |= ignore ? _MM_DENORMALS_ZERO_ON : 0;
|
||||
}
|
||||
const unsigned old_flags = _mm_getcsr();
|
||||
const unsigned old_value = old_flags & mask;
|
||||
unsigned flags = (old_flags & ~mask) | value;
|
||||
CV_LOG_DEBUG(NULL, "core: update FP mxcsr flags = " << cv::format("0x%08x", flags));
|
||||
// save state
|
||||
state.reserved[0] = (uint32_t)mask;
|
||||
state.reserved[1] = (uint32_t)old_value;
|
||||
_mm_setcsr(flags);
|
||||
#else
|
||||
CV_UNUSED(ignore); CV_UNUSED(state);
|
||||
#endif
|
||||
}
|
||||
|
||||
int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state)
|
||||
{
|
||||
#if OPENCV_IMPL_FP_HINTS_X86
|
||||
unsigned mask = _MM_FLUSH_ZERO_MASK;
|
||||
if (featuresEnabled.have[CPU_SSE3])
|
||||
{
|
||||
mask |= _MM_DENORMALS_ZERO_MASK;
|
||||
}
|
||||
const unsigned old_flags = _mm_getcsr();
|
||||
const unsigned old_value = old_flags & mask;
|
||||
// save state
|
||||
state.reserved[0] = (uint32_t)mask;
|
||||
state.reserved[1] = (uint32_t)old_value;
|
||||
return 2;
|
||||
#else
|
||||
CV_UNUSED(state);
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool restoreFPDenormalsState(const FPDenormalsModeState& state)
|
||||
{
|
||||
#if OPENCV_IMPL_FP_HINTS_X86
|
||||
const unsigned mask = (unsigned)state.reserved[0];
|
||||
CV_DbgAssert(mask != 0); // invalid state (ensure that state is properly saved earlier)
|
||||
const unsigned value = (unsigned)state.reserved[1];
|
||||
CV_DbgCheck((int)value, value == (value & mask), "invalid SSE FP state");
|
||||
const unsigned old_flags = _mm_getcsr();
|
||||
unsigned flags = (old_flags & ~mask) | value;
|
||||
CV_LOG_DEBUG(NULL, "core: restore FP mxcsr flags = " << cv::format("0x%08x", flags));
|
||||
_mm_setcsr(flags);
|
||||
return true;
|
||||
#else
|
||||
CV_UNUSED(state);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
|
||||
|
||||
} // namespace cv
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
|
@ -3,6 +3,15 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
#include "opencv2/core/utils/logger.hpp"
|
||||
|
||||
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||
|
||||
#ifdef CV_CXX11
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
TEST(Core_OutputArrayCreate, _1997)
|
||||
@ -242,6 +251,62 @@ TEST(Core_Parallel, propagate_exceptions)
|
||||
}, cv::Exception);
|
||||
}
|
||||
|
||||
class FPDenormalsHintCheckerParallelLoopBody : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
FPDenormalsHintCheckerParallelLoopBody()
|
||||
: isOK(true)
|
||||
{
|
||||
state_values_to_check = cv::details::saveFPDenormalsState(base_state);
|
||||
}
|
||||
~FPDenormalsHintCheckerParallelLoopBody() {}
|
||||
void operator()(const cv::Range& r) const
|
||||
{
|
||||
CV_UNUSED(r);
|
||||
cv::details::FPDenormalsModeState state;
|
||||
if (cv::details::saveFPDenormalsState(state))
|
||||
{
|
||||
for (int i = 0; i < state_values_to_check; ++i)
|
||||
{
|
||||
if (base_state.reserved[i] != state.reserved[i])
|
||||
{
|
||||
CV_LOG_ERROR(NULL, cv::format("FP state[%d] mismatch: base=0x%08x thread=0x%08x", i, base_state.reserved[i], state.reserved[i]));
|
||||
isOK = false;
|
||||
cv::details::restoreFPDenormalsState(base_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// FP state is not supported
|
||||
// no checks
|
||||
}
|
||||
#ifdef CV_CXX11
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
#endif
|
||||
}
|
||||
|
||||
cv::details::FPDenormalsModeState base_state;
|
||||
int state_values_to_check;
|
||||
|
||||
mutable bool isOK;
|
||||
};
|
||||
|
||||
TEST(Core_Parallel, propagate_fp_denormals_ignore_hint)
|
||||
{
|
||||
int nThreads = std::max(1, cv::getNumThreads()) * 3;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("Case=%d: FP denormals ignore hint: %s\n", i, ((i & 1) != 0) ? "enable" : "disable"));
|
||||
FPDenormalsIgnoreHintScope fp_denormals_scope((i & 1) != 0);
|
||||
FPDenormalsHintCheckerParallelLoopBody job;
|
||||
ASSERT_NO_THROW({
|
||||
parallel_for_(cv::Range(0, nThreads), job);
|
||||
});
|
||||
EXPECT_TRUE(job.isOK);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Core_Version, consistency)
|
||||
{
|
||||
// this test verifies that OpenCV version loaded in runtime
|
||||
|
Loading…
Reference in New Issue
Block a user