mirror of
https://github.com/opencv/opencv.git
synced 2025-08-01 18:37:04 +08:00
Merge pull request #21506 from alalek:core_fp_denormals
This commit is contained in:
commit
83ce1de8e7
@ -0,0 +1,29 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
|
||||||
|
#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
|
||||||
|
|
||||||
|
#include "fp_control_utils.hpp"
|
||||||
|
|
||||||
|
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0
|
||||||
|
// disabled
|
||||||
|
#elif defined(OPENCV_IMPL_FP_HINTS)
|
||||||
|
// custom
|
||||||
|
#elif defined(OPENCV_IMPL_FP_HINTS_X86)
|
||||||
|
// custom
|
||||||
|
#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#define OPENCV_IMPL_FP_HINTS_X86 1
|
||||||
|
#define OPENCV_IMPL_FP_HINTS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef OPENCV_IMPL_FP_HINTS
|
||||||
|
#define OPENCV_IMPL_FP_HINTS 0
|
||||||
|
#endif
|
||||||
|
#ifndef OPENCV_IMPL_FP_HINTS_X86
|
||||||
|
#define OPENCV_IMPL_FP_HINTS_X86 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
|
69
modules/core/include/opencv2/core/utils/fp_control_utils.hpp
Normal file
69
modules/core/include/opencv2/core/utils/fp_control_utils.hpp
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP
|
||||||
|
#define OPENCV_CORE_FP_CONTROL_UTILS_HPP
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
struct FPDenormalsModeState
|
||||||
|
{
|
||||||
|
uint32_t reserved[16]; // 64-bytes
|
||||||
|
}; // FPDenormalsModeState
|
||||||
|
|
||||||
|
CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state);
|
||||||
|
CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state);
|
||||||
|
CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state);
|
||||||
|
|
||||||
|
class FPDenormalsIgnoreHintScope
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
inline explicit FPDenormalsIgnoreHintScope(bool ignore = true)
|
||||||
|
{
|
||||||
|
details::setFPDenormalsIgnoreHint(ignore, saved_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state)
|
||||||
|
{
|
||||||
|
details::saveFPDenormalsState(saved_state);
|
||||||
|
details::restoreFPDenormalsState(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ~FPDenormalsIgnoreHintScope()
|
||||||
|
{
|
||||||
|
details::restoreFPDenormalsState(saved_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
FPDenormalsModeState saved_state;
|
||||||
|
}; // FPDenormalsIgnoreHintScope
|
||||||
|
|
||||||
|
class FPDenormalsIgnoreHintScopeNOOP
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); }
|
||||||
|
inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); }
|
||||||
|
inline ~FPDenormalsIgnoreHintScopeNOOP() { }
|
||||||
|
}; // FPDenormalsIgnoreHintScopeNOOP
|
||||||
|
|
||||||
|
} // namespace details
|
||||||
|
|
||||||
|
|
||||||
|
// Should depend on target compilation architecture only
|
||||||
|
// Note: previously added archs should NOT be removed to preserve ABI compatibility
|
||||||
|
#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT)
|
||||||
|
// preserve configuration overloading through ports
|
||||||
|
#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_)
|
||||||
|
typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope;
|
||||||
|
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1
|
||||||
|
#else
|
||||||
|
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0
|
||||||
|
typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace cv
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP
|
@ -142,6 +142,9 @@
|
|||||||
|
|
||||||
#include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
|
#include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
|
||||||
|
|
||||||
|
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||||
|
#include <opencv2/core/utils/fp_control.private.hpp>
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
namespace cv {
|
namespace cv {
|
||||||
@ -191,6 +194,9 @@ namespace {
|
|||||||
|
|
||||||
// propagate main thread state
|
// propagate main thread state
|
||||||
rng = cv::theRNG();
|
rng = cv::theRNG();
|
||||||
|
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
|
||||||
|
details::saveFPDenormalsState(fp_denormals_base_state);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef OPENCV_TRACE
|
#ifdef OPENCV_TRACE
|
||||||
traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
|
traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
|
||||||
@ -271,6 +277,11 @@ namespace {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
|
||||||
|
details::FPDenormalsModeState fp_denormals_base_state;
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
|
ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
|
||||||
ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
|
ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
|
||||||
@ -307,6 +318,9 @@ namespace {
|
|||||||
|
|
||||||
// propagate main thread state
|
// propagate main thread state
|
||||||
cv::theRNG() = ctx.rng;
|
cv::theRNG() = ctx.rng;
|
||||||
|
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
|
||||||
|
FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state);
|
||||||
|
#endif
|
||||||
|
|
||||||
cv::Range r;
|
cv::Range r;
|
||||||
cv::Range wholeRange = ctx.wholeRange;
|
cv::Range wholeRange = ctx.wholeRange;
|
||||||
|
@ -53,6 +53,9 @@
|
|||||||
#include <opencv2/core/utils/tls.hpp>
|
#include <opencv2/core/utils/tls.hpp>
|
||||||
#include <opencv2/core/utils/instrumentation.hpp>
|
#include <opencv2/core/utils/instrumentation.hpp>
|
||||||
|
|
||||||
|
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||||
|
#include <opencv2/core/utils/fp_control.private.hpp>
|
||||||
|
|
||||||
#ifndef OPENCV_WITH_THREAD_SANITIZER
|
#ifndef OPENCV_WITH_THREAD_SANITIZER
|
||||||
#if defined(__clang__) && defined(__has_feature)
|
#if defined(__clang__) && defined(__has_feature)
|
||||||
#if __has_feature(thread_sanitizer)
|
#if __has_feature(thread_sanitizer)
|
||||||
@ -2733,6 +2736,82 @@ void setUseIPP_NE(bool flag)
|
|||||||
|
|
||||||
} // namespace ipp
|
} // namespace ipp
|
||||||
|
|
||||||
|
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
#if OPENCV_IMPL_FP_HINTS_X86
|
||||||
|
#ifndef _MM_DENORMALS_ZERO_ON // requires pmmintrin.h (SSE3)
|
||||||
|
#define _MM_DENORMALS_ZERO_ON 0x0040
|
||||||
|
#endif
|
||||||
|
#ifndef _MM_DENORMALS_ZERO_MASK // requires pmmintrin.h (SSE3)
|
||||||
|
#define _MM_DENORMALS_ZERO_MASK 0x0040
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state)
|
||||||
|
{
|
||||||
|
#if OPENCV_IMPL_FP_HINTS_X86
|
||||||
|
unsigned mask = _MM_FLUSH_ZERO_MASK;
|
||||||
|
unsigned value = ignore ? _MM_FLUSH_ZERO_ON : 0;
|
||||||
|
if (featuresEnabled.have[CPU_SSE3])
|
||||||
|
{
|
||||||
|
mask |= _MM_DENORMALS_ZERO_MASK;
|
||||||
|
value |= ignore ? _MM_DENORMALS_ZERO_ON : 0;
|
||||||
|
}
|
||||||
|
const unsigned old_flags = _mm_getcsr();
|
||||||
|
const unsigned old_value = old_flags & mask;
|
||||||
|
unsigned flags = (old_flags & ~mask) | value;
|
||||||
|
CV_LOG_DEBUG(NULL, "core: update FP mxcsr flags = " << cv::format("0x%08x", flags));
|
||||||
|
// save state
|
||||||
|
state.reserved[0] = (uint32_t)mask;
|
||||||
|
state.reserved[1] = (uint32_t)old_value;
|
||||||
|
_mm_setcsr(flags);
|
||||||
|
#else
|
||||||
|
CV_UNUSED(ignore); CV_UNUSED(state);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state)
|
||||||
|
{
|
||||||
|
#if OPENCV_IMPL_FP_HINTS_X86
|
||||||
|
unsigned mask = _MM_FLUSH_ZERO_MASK;
|
||||||
|
if (featuresEnabled.have[CPU_SSE3])
|
||||||
|
{
|
||||||
|
mask |= _MM_DENORMALS_ZERO_MASK;
|
||||||
|
}
|
||||||
|
const unsigned old_flags = _mm_getcsr();
|
||||||
|
const unsigned old_value = old_flags & mask;
|
||||||
|
// save state
|
||||||
|
state.reserved[0] = (uint32_t)mask;
|
||||||
|
state.reserved[1] = (uint32_t)old_value;
|
||||||
|
return 2;
|
||||||
|
#else
|
||||||
|
CV_UNUSED(state);
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
bool restoreFPDenormalsState(const FPDenormalsModeState& state)
|
||||||
|
{
|
||||||
|
#if OPENCV_IMPL_FP_HINTS_X86
|
||||||
|
const unsigned mask = (unsigned)state.reserved[0];
|
||||||
|
CV_DbgAssert(mask != 0); // invalid state (ensure that state is properly saved earlier)
|
||||||
|
const unsigned value = (unsigned)state.reserved[1];
|
||||||
|
CV_DbgCheck((int)value, value == (value & mask), "invalid SSE FP state");
|
||||||
|
const unsigned old_flags = _mm_getcsr();
|
||||||
|
unsigned flags = (old_flags & ~mask) | value;
|
||||||
|
CV_LOG_DEBUG(NULL, "core: restore FP mxcsr flags = " << cv::format("0x%08x", flags));
|
||||||
|
_mm_setcsr(flags);
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
CV_UNUSED(state);
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace details
|
||||||
|
|
||||||
|
|
||||||
} // namespace cv
|
} // namespace cv
|
||||||
|
|
||||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||||
|
@ -3,6 +3,15 @@
|
|||||||
// of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
#include "test_precomp.hpp"
|
#include "test_precomp.hpp"
|
||||||
|
|
||||||
|
#include "opencv2/core/utils/logger.hpp"
|
||||||
|
|
||||||
|
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||||
|
|
||||||
|
#ifdef CV_CXX11
|
||||||
|
#include <chrono>
|
||||||
|
#include <thread>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace opencv_test { namespace {
|
namespace opencv_test { namespace {
|
||||||
|
|
||||||
TEST(Core_OutputArrayCreate, _1997)
|
TEST(Core_OutputArrayCreate, _1997)
|
||||||
@ -242,6 +251,62 @@ TEST(Core_Parallel, propagate_exceptions)
|
|||||||
}, cv::Exception);
|
}, cv::Exception);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class FPDenormalsHintCheckerParallelLoopBody : public cv::ParallelLoopBody
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
FPDenormalsHintCheckerParallelLoopBody()
|
||||||
|
: isOK(true)
|
||||||
|
{
|
||||||
|
state_values_to_check = cv::details::saveFPDenormalsState(base_state);
|
||||||
|
}
|
||||||
|
~FPDenormalsHintCheckerParallelLoopBody() {}
|
||||||
|
void operator()(const cv::Range& r) const
|
||||||
|
{
|
||||||
|
CV_UNUSED(r);
|
||||||
|
cv::details::FPDenormalsModeState state;
|
||||||
|
if (cv::details::saveFPDenormalsState(state))
|
||||||
|
{
|
||||||
|
for (int i = 0; i < state_values_to_check; ++i)
|
||||||
|
{
|
||||||
|
if (base_state.reserved[i] != state.reserved[i])
|
||||||
|
{
|
||||||
|
CV_LOG_ERROR(NULL, cv::format("FP state[%d] mismatch: base=0x%08x thread=0x%08x", i, base_state.reserved[i], state.reserved[i]));
|
||||||
|
isOK = false;
|
||||||
|
cv::details::restoreFPDenormalsState(base_state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// FP state is not supported
|
||||||
|
// no checks
|
||||||
|
}
|
||||||
|
#ifdef CV_CXX11
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::details::FPDenormalsModeState base_state;
|
||||||
|
int state_values_to_check;
|
||||||
|
|
||||||
|
mutable bool isOK;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(Core_Parallel, propagate_fp_denormals_ignore_hint)
|
||||||
|
{
|
||||||
|
int nThreads = std::max(1, cv::getNumThreads()) * 3;
|
||||||
|
for (int i = 0; i < 4; ++i)
|
||||||
|
{
|
||||||
|
SCOPED_TRACE(cv::format("Case=%d: FP denormals ignore hint: %s\n", i, ((i & 1) != 0) ? "enable" : "disable"));
|
||||||
|
FPDenormalsIgnoreHintScope fp_denormals_scope((i & 1) != 0);
|
||||||
|
FPDenormalsHintCheckerParallelLoopBody job;
|
||||||
|
ASSERT_NO_THROW({
|
||||||
|
parallel_for_(cv::Range(0, nThreads), job);
|
||||||
|
});
|
||||||
|
EXPECT_TRUE(job.isOK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(Core_Version, consistency)
|
TEST(Core_Version, consistency)
|
||||||
{
|
{
|
||||||
// this test verifies that OpenCV version loaded in runtime
|
// this test verifies that OpenCV version loaded in runtime
|
||||||
|
Loading…
Reference in New Issue
Block a user