opencv/modules/core/src/system.cpp
Pablo Romero 6f417b57c1
Merge pull request #20399 from pablorcum:3.4
Improves support for Unix non-Linux systems, including QNX

* Fixes #20395. Improves support for Unix non-Linux systems. Focus on QNX Neutrino.

Signed-off-by: promero <promero@mathworks.com>

* Update system.cpp
2021-07-13 19:40:15 +00:00

2673 lines
79 KiB
C++

/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iostream>
#include <ostream>
#include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/utils/trace.private.hpp>
#include <opencv2/core/utils/logger.hpp>
#include <opencv2/core/utils/tls.hpp>
#include <opencv2/core/utils/instrumentation.hpp>
namespace cv {
static void _initSystem()
{
#ifdef __ANDROID__
// https://github.com/opencv/opencv/issues/14906
// "ios_base::Init" object is not a part of Android's "iostream" header (in case of clang toolchain, NDK 20).
// Ref1: https://en.cppreference.com/w/cpp/io/ios_base/Init
// The header <iostream> behaves as if it defines (directly or indirectly) an instance of std::ios_base::Init with static storage duration
// Ref2: https://github.com/gcc-mirror/gcc/blob/gcc-8-branch/libstdc%2B%2B-v3/include/std/iostream#L73-L74
static std::ios_base::Init s_iostream_initializer;
#endif
}
static Mutex* __initialization_mutex = NULL;
Mutex& getInitializationMutex()
{
if (__initialization_mutex == NULL)
{
(void)_initSystem();
__initialization_mutex = new Mutex();
}
return *__initialization_mutex;
}
// force initialization (single-threaded environment)
Mutex* __initialization_mutex_initializer = &getInitializationMutex();
static bool param_dumpErrors = utils::getConfigurationParameterBool("OPENCV_DUMP_ERRORS",
#if defined(_DEBUG) || defined(__ANDROID__) || (defined(__GNUC__) && !defined(__EXCEPTIONS))
true
#else
false
#endif
);
void* allocSingletonBuffer(size_t size) { return fastMalloc(size); }
void* allocSingletonNewBuffer(size_t size) { return malloc(size); }
} // namespace cv
#ifndef CV_ERROR_SET_TERMINATE_HANDLER // build config option
# if defined(_WIN32)
# define CV_ERROR_SET_TERMINATE_HANDLER 1
# endif
#endif
#if defined(CV_ERROR_SET_TERMINATE_HANDLER) && !CV_ERROR_SET_TERMINATE_HANDLER
# undef CV_ERROR_SET_TERMINATE_HANDLER
#endif
#ifdef _MSC_VER
# if _MSC_VER >= 1700
# pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
# endif
#endif
#ifdef CV_ERROR_SET_TERMINATE_HANDLER
#include <exception> // std::set_terminate
#include <cstdlib> // std::abort
#endif
#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__
# include <unistd.h>
# include <fcntl.h>
#if defined __QNXNTO__
# include <sys/elf.h>
#else
# include <elf.h>
#endif
#if defined __ANDROID__ || defined __linux__
# include <linux/auxvec.h>
#endif
#endif
#if defined __ANDROID__ && defined HAVE_CPUFEATURES
# include <cpu-features.h>
#endif
#if (defined __ppc64__ || defined __PPC64__) && defined __unix__
# include "sys/auxv.h"
# ifndef AT_HWCAP2
# define AT_HWCAP2 26
# endif
# ifndef PPC_FEATURE2_ARCH_2_07
# define PPC_FEATURE2_ARCH_2_07 0x80000000
# endif
# ifndef PPC_FEATURE2_ARCH_3_00
# define PPC_FEATURE2_ARCH_3_00 0x00800000
# endif
#endif
#if defined _WIN32 || defined WINCE
#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
#define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
#endif
#include <windows.h>
#if (_WIN32_WINNT >= 0x0602)
#include <synchapi.h>
#endif
#if ((_WIN32_WINNT >= 0x0600) && !defined(CV_DISABLE_FLS)) || defined(CV_FORCE_FLS)
#include <fibersapi.h>
#define CV_USE_FLS
#endif
#undef small
#undef min
#undef max
#undef abs
#include <tchar.h>
#ifdef WINRT
#include <wrl/client.h>
#ifndef __cplusplus_winrt
#include <windows.storage.h>
#pragma comment(lib, "runtimeobject.lib")
#endif // WINRT
std::wstring GetTempPathWinRT()
{
#ifdef __cplusplus_winrt
return std::wstring(Windows::Storage::ApplicationData::Current->TemporaryFolder->Path->Data());
#else
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IApplicationDataStatics> appdataFactory;
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IApplicationData> appdataRef;
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IStorageFolder> storagefolderRef;
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IStorageItem> storageitemRef;
HSTRING str;
HSTRING_HEADER hstrHead;
std::wstring wstr;
if (FAILED(WindowsCreateStringReference(RuntimeClass_Windows_Storage_ApplicationData,
(UINT32)wcslen(RuntimeClass_Windows_Storage_ApplicationData), &hstrHead, &str)))
return wstr;
if (FAILED(RoGetActivationFactory(str, IID_PPV_ARGS(appdataFactory.ReleaseAndGetAddressOf()))))
return wstr;
if (FAILED(appdataFactory->get_Current(appdataRef.ReleaseAndGetAddressOf())))
return wstr;
if (FAILED(appdataRef->get_TemporaryFolder(storagefolderRef.ReleaseAndGetAddressOf())))
return wstr;
if (FAILED(storagefolderRef.As(&storageitemRef)))
return wstr;
str = NULL;
if (FAILED(storageitemRef->get_Path(&str)))
return wstr;
wstr = WindowsGetStringRawBuffer(str, NULL);
WindowsDeleteString(str);
return wstr;
#endif
}
std::wstring GetTempFileNameWinRT(std::wstring prefix)
{
wchar_t guidStr[40];
GUID g;
CoCreateGuid(&g);
wchar_t* mask = L"%08x_%04x_%04x_%02x%02x_%02x%02x%02x%02x%02x%02x";
swprintf(&guidStr[0], sizeof(guidStr)/sizeof(wchar_t), mask,
g.Data1, g.Data2, g.Data3, UINT(g.Data4[0]), UINT(g.Data4[1]),
UINT(g.Data4[2]), UINT(g.Data4[3]), UINT(g.Data4[4]),
UINT(g.Data4[5]), UINT(g.Data4[6]), UINT(g.Data4[7]));
return prefix.append(std::wstring(guidStr));
}
#endif
#else
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#if defined __MACH__ && defined __APPLE__
#include <mach/mach.h>
#include <mach/mach_time.h>
#endif
#endif
#ifdef _OPENMP
#include "omp.h"
#endif
#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __GLIBC__ || defined __HAIKU__
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#if defined __ANDROID__
#include <sys/sysconf.h>
#endif
#endif
#ifdef __ANDROID__
# include <android/log.h>
#endif
#ifdef DECLARE_CV_CPUID_X86
DECLARE_CV_CPUID_X86
#endif
#ifndef CV_CPUID_X86
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
#if _MSC_VER >= 1400 // MSVS 2005
#include <intrin.h> // __cpuidex()
#define CV_CPUID_X86 __cpuidex
#else
#error "Required MSVS 2005+"
#endif
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
static void cv_cpuid(int* cpuid_data, int reg_eax, int reg_ecx)
{
int __eax = reg_eax, __ebx = 0, __ecx = reg_ecx, __edx = 0;
// tested with available compilers (-fPIC -O2 -m32/-m64): https://godbolt.org/
#if !defined(__PIC__) \
|| defined(__x86_64__) || __GNUC__ >= 5 \
|| defined(__clang__) || defined(__INTEL_COMPILER)
__asm__("cpuid\n\t"
: "+a" (__eax), "=b" (__ebx), "+c" (__ecx), "=d" (__edx)
);
#elif defined(__i386__) // ebx may be reserved as the PIC register
__asm__("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "+a" (__eax), "=&r" (__ebx), "+c" (__ecx), "=d" (__edx)
);
#else
#error "Configuration error"
#endif
cpuid_data[0] = __eax; cpuid_data[1] = __ebx; cpuid_data[2] = __ecx; cpuid_data[3] = __edx;
}
#define CV_CPUID_X86 cv_cpuid
#endif
#endif
namespace cv
{
Exception::Exception() { code = 0; line = 0; }
Exception::Exception(int _code, const String& _err, const String& _func, const String& _file, int _line)
: code(_code), err(_err), func(_func), file(_file), line(_line)
{
formatMessage();
}
Exception::~Exception() throw() {}
/*!
\return the error description and the context as a text string.
*/
const char* Exception::what() const throw() { return msg.c_str(); }
void Exception::formatMessage()
{
size_t pos = err.find('\n');
bool multiline = pos != cv::String::npos;
if (multiline)
{
std::stringstream ss;
size_t prev_pos = 0;
while (pos != cv::String::npos)
{
ss << "> " << err.substr(prev_pos, pos - prev_pos) << std::endl;
prev_pos = pos + 1;
pos = err.find('\n', prev_pos);
}
ss << "> " << err.substr(prev_pos);
if (err[err.size() - 1] != '\n')
ss << std::endl;
err = ss.str();
}
if (func.size() > 0)
{
if (multiline)
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) in function '%s'\n%s", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), func.c_str(), err.c_str());
else
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) %s in function '%s'\n", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), func.c_str());
}
else
{
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) %s%s", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), multiline ? "" : "\n");
}
}
static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };
static const char* getHWFeatureName(int id)
{
return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL;
}
static const char* getHWFeatureNameSafe(int id)
{
const char* name = getHWFeatureName(id);
return name ? name : "Unknown feature";
}
struct HWFeatures
{
enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
HWFeatures(bool run_initialize = false)
{
memset( have, 0, sizeof(have[0]) * MAX_FEATURE );
if (run_initialize)
initialize();
}
static void initializeNames()
{
for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
{
g_hwFeatureNames[i] = 0;
}
g_hwFeatureNames[CPU_MMX] = "MMX";
g_hwFeatureNames[CPU_SSE] = "SSE";
g_hwFeatureNames[CPU_SSE2] = "SSE2";
g_hwFeatureNames[CPU_SSE3] = "SSE3";
g_hwFeatureNames[CPU_SSSE3] = "SSSE3";
g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1";
g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2";
g_hwFeatureNames[CPU_POPCNT] = "POPCNT";
g_hwFeatureNames[CPU_FP16] = "FP16";
g_hwFeatureNames[CPU_AVX] = "AVX";
g_hwFeatureNames[CPU_AVX2] = "AVX2";
g_hwFeatureNames[CPU_FMA3] = "FMA3";
g_hwFeatureNames[CPU_AVX_512F] = "AVX512F";
g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW";
g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD";
g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ";
g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER";
g_hwFeatureNames[CPU_AVX_512IFMA] = "AVX512IFMA";
g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF";
g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI";
g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
g_hwFeatureNames[CPU_AVX_512VBMI2] = "AVX512VBMI2";
g_hwFeatureNames[CPU_AVX_512VNNI] = "AVX512VNNI";
g_hwFeatureNames[CPU_AVX_512BITALG] = "AVX512BITALG";
g_hwFeatureNames[CPU_AVX_512VPOPCNTDQ] = "AVX512VPOPCNTDQ";
g_hwFeatureNames[CPU_AVX_5124VNNIW] = "AVX5124VNNIW";
g_hwFeatureNames[CPU_AVX_5124FMAPS] = "AVX5124FMAPS";
g_hwFeatureNames[CPU_NEON] = "NEON";
g_hwFeatureNames[CPU_VSX] = "VSX";
g_hwFeatureNames[CPU_VSX3] = "VSX3";
g_hwFeatureNames[CPU_MSA] = "CPU_MSA";
g_hwFeatureNames[CPU_AVX512_COMMON] = "AVX512-COMMON";
g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX";
g_hwFeatureNames[CPU_AVX512_KNL] = "AVX512-KNL";
g_hwFeatureNames[CPU_AVX512_KNM] = "AVX512-KNM";
g_hwFeatureNames[CPU_AVX512_CNL] = "AVX512-CNL";
g_hwFeatureNames[CPU_AVX512_CLX] = "AVX512-CLX";
g_hwFeatureNames[CPU_AVX512_ICL] = "AVX512-ICL";
}
void initialize(void)
{
#ifndef NO_GETENV
if (getenv("OPENCV_DUMP_CONFIG"))
{
fprintf(stderr, "\nOpenCV build configuration is:\n%s\n",
cv::getBuildInformation().c_str());
}
#endif
initializeNames();
#ifdef CV_CPUID_X86
int cpuid_data[4] = { 0, 0, 0, 0 };
int cpuid_data_ex[4] = { 0, 0, 0, 0 };
CV_CPUID_X86(cpuid_data, 1, 0/*unused*/);
int x86_family = (cpuid_data[0] >> 8) & 15;
if( x86_family >= 6 )
{
have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0;
have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0;
have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
// make the second call to the cpuid command in order to get
// information about extended features like AVX2
CV_CPUID_X86(cpuid_data_ex, 7, 0);
have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0;
have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0;
have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0;
have[CV_CPU_AVX_512IFMA] = (cpuid_data_ex[1] & (1<<21)) != 0;
have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0;
have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0;
have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0;
have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0;
have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0;
have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0;
have[CV_CPU_AVX_512VBMI2] = (cpuid_data_ex[2] & (1<<6)) != 0;
have[CV_CPU_AVX_512VNNI] = (cpuid_data_ex[2] & (1<<11)) != 0;
have[CV_CPU_AVX_512BITALG] = (cpuid_data_ex[2] & (1<<12)) != 0;
have[CV_CPU_AVX_512VPOPCNTDQ] = (cpuid_data_ex[2] & (1<<14)) != 0;
have[CV_CPU_AVX_5124VNNIW] = (cpuid_data_ex[3] & (1<<2)) != 0;
have[CV_CPU_AVX_5124FMAPS] = (cpuid_data_ex[3] & (1<<3)) != 0;
bool have_AVX_OS_support = true;
bool have_AVX512_OS_support = true;
if (!(cpuid_data[2] & (1<<27)))
have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX
else
{
int xcr0 = 0;
#ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h
xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
__asm__ ("xgetbv\n\t" : "=a" (xcr0) : "c" (0) : "%edx" );
#endif
if ((xcr0 & 0x6) != 0x6)
have_AVX_OS_support = false; // YMM registers
if ((xcr0 & 0xe6) != 0xe6)
have_AVX512_OS_support = false; // ZMM registers
}
if (!have_AVX_OS_support)
{
have[CV_CPU_AVX] = false;
have[CV_CPU_FP16] = false;
have[CV_CPU_AVX2] = false;
have[CV_CPU_FMA3] = false;
}
if (!have_AVX_OS_support || !have_AVX512_OS_support)
{
have[CV_CPU_AVX_512F] = false;
have[CV_CPU_AVX_512BW] = false;
have[CV_CPU_AVX_512CD] = false;
have[CV_CPU_AVX_512DQ] = false;
have[CV_CPU_AVX_512ER] = false;
have[CV_CPU_AVX_512IFMA] = false;
have[CV_CPU_AVX_512PF] = false;
have[CV_CPU_AVX_512VBMI] = false;
have[CV_CPU_AVX_512VL] = false;
have[CV_CPU_AVX_512VBMI2] = false;
have[CV_CPU_AVX_512VNNI] = false;
have[CV_CPU_AVX_512BITALG] = false;
have[CV_CPU_AVX_512VPOPCNTDQ] = false;
have[CV_CPU_AVX_5124VNNIW] = false;
have[CV_CPU_AVX_5124FMAPS] = false;
}
have[CV_CPU_AVX512_COMMON] = have[CV_CPU_AVX_512F] && have[CV_CPU_AVX_512CD];
if (have[CV_CPU_AVX512_COMMON])
{
have[CV_CPU_AVX512_KNL] = have[CV_CPU_AVX_512ER] && have[CV_CPU_AVX_512PF];
have[CV_CPU_AVX512_KNM] = have[CV_CPU_AVX512_KNL] && have[CV_CPU_AVX_5124FMAPS] &&
have[CV_CPU_AVX_5124VNNIW] && have[CV_CPU_AVX_512VPOPCNTDQ];
have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512BW] && have[CV_CPU_AVX_512DQ] && have[CV_CPU_AVX_512VL];
have[CV_CPU_AVX512_CNL] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI];
have[CV_CPU_AVX512_CLX] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512VNNI];
have[CV_CPU_AVX512_ICL] = have[CV_CPU_AVX512_SKX] &&
have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI] &&
have[CV_CPU_AVX_512VNNI] &&
have[CV_CPU_AVX_512VBMI2] && have[CV_CPU_AVX_512BITALG] && have[CV_CPU_AVX_512VPOPCNTDQ];
}
else
{
have[CV_CPU_AVX512_KNL] = false;
have[CV_CPU_AVX512_KNM] = false;
have[CV_CPU_AVX512_SKX] = false;
have[CV_CPU_AVX512_CNL] = false;
have[CV_CPU_AVX512_CLX] = false;
have[CV_CPU_AVX512_ICL] = false;
}
}
#endif // CV_CPUID_X86
#if defined __ANDROID__ || defined __linux__
#ifdef __aarch64__
have[CV_CPU_NEON] = true;
have[CV_CPU_FP16] = true;
#elif defined __arm__ && defined __ANDROID__
#if defined HAVE_CPUFEATURES
CV_LOG_INFO(NULL, "calling android_getCpuFeatures() ...");
uint64_t features = android_getCpuFeatures();
CV_LOG_INFO(NULL, cv::format("calling android_getCpuFeatures() ... Done (%llx)", (long long)features));
have[CV_CPU_NEON] = (features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
have[CV_CPU_FP16] = (features & ANDROID_CPU_ARM_FEATURE_VFP_FP16) != 0;
#else
CV_LOG_INFO(NULL, "cpufeatures library is not available for CPU detection");
#if CV_NEON
CV_LOG_INFO(NULL, "- NEON instructions is enabled via build flags");
have[CV_CPU_NEON] = true;
#else
CV_LOG_INFO(NULL, "- NEON instructions is NOT enabled via build flags");
#endif
#if CV_FP16
CV_LOG_INFO(NULL, "- FP16 instructions is enabled via build flags");
have[CV_CPU_FP16] = true;
#else
CV_LOG_INFO(NULL, "- FP16 instructions is NOT enabled via build flags");
#endif
#endif
#elif defined __arm__
int cpufile = open("/proc/self/auxv", O_RDONLY);
if (cpufile >= 0)
{
Elf32_auxv_t auxv;
const size_t size_auxv_t = sizeof(auxv);
while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t)
{
if (auxv.a_type == AT_HWCAP)
{
have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
break;
}
}
close(cpufile);
}
#endif
#elif (defined __clang__ || defined __APPLE__)
#if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
have[CV_CPU_NEON] = true;
#endif
#if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__))
have[CV_CPU_FP16] = true;
#endif
#endif
#if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800)
have[CV_CPU_NEON] = true;
#endif
#ifdef __mips_msa
have[CV_CPU_MSA] = true;
#endif
#if (defined __ppc64__ || defined __PPC64__) && defined __unix__
unsigned int hwcap = getauxval(AT_HWCAP);
if (hwcap & PPC_FEATURE_HAS_VSX) {
hwcap = getauxval(AT_HWCAP2);
if (hwcap & PPC_FEATURE2_ARCH_3_00) {
have[CV_CPU_VSX] = have[CV_CPU_VSX3] = true;
} else {
have[CV_CPU_VSX] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
}
}
#else
// TODO: AIX, FreeBSD
#if CV_VSX || defined _ARCH_PWR8 || defined __POWER9_VECTOR__
have[CV_CPU_VSX] = true;
#endif
#if CV_VSX3 || defined __POWER9_VECTOR__
have[CV_CPU_VSX3] = true;
#endif
#endif
bool skip_baseline_check = false;
#ifndef NO_GETENV
if (getenv("OPENCV_SKIP_CPU_BASELINE_CHECK"))
{
skip_baseline_check = true;
}
#endif
int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]))
&& !skip_baseline_check)
{
fprintf(stderr, "\n"
"******************************************************************\n"
"* FATAL ERROR: *\n"
"* This OpenCV build doesn't support current CPU/HW configuration *\n"
"* *\n"
"* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n"
"******************************************************************\n");
fprintf(stderr, "\nRequired baseline features:\n");
checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true);
CV_Error(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup.");
}
readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]));
}
bool checkFeatures(const int* features, int count, bool dump = false)
{
bool result = true;
for (int i = 0; i < count; i++)
{
int feature = features[i];
if (feature)
{
if (have[feature])
{
if (dump) fprintf(stderr, " ID=%3d (%s) - OK\n", feature, getHWFeatureNameSafe(feature));
}
else
{
result = false;
if (dump) fprintf(stderr, " ID=%3d (%s) - NOT AVAILABLE\n", feature, getHWFeatureNameSafe(feature));
}
}
}
return result;
}
static inline bool isSymbolSeparator(char c)
{
return c == ',' || c == ';';
}
void readSettings(const int* baseline_features, int baseline_count)
{
bool dump = true;
const char* disabled_features =
#ifdef NO_GETENV
NULL;
#else
getenv("OPENCV_CPU_DISABLE");
#endif
if (disabled_features && disabled_features[0] != 0)
{
const char* start = disabled_features;
for (;;)
{
while (start[0] != 0 && isSymbolSeparator(start[0]))
{
start++;
}
if (start[0] == 0)
break;
const char* end = start;
while (end[0] != 0 && !isSymbolSeparator(end[0]))
{
end++;
}
if (end == start)
continue;
cv::String feature(start, end);
start = end;
CV_Assert(feature.size() > 0);
bool found = false;
for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
{
if (!g_hwFeatureNames[i]) continue;
size_t len = strlen(g_hwFeatureNames[i]);
if (len != feature.size()) continue;
if (feature.compare(g_hwFeatureNames[i]) == 0)
{
bool isBaseline = false;
for (int k = 0; k < baseline_count; k++)
{
if (baseline_features[k] == i)
{
isBaseline = true;
break;
}
}
if (isBaseline)
{
if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'."
"This has very limited effect, because code optimizations for this feature are executed unconditionally "
"in the most cases.\n", getHWFeatureNameSafe(i));
}
if (!have[i])
{
if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n",
getHWFeatureNameSafe(i));
}
have[i] = false;
found = true;
break;
}
}
if (!found)
{
if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str());
}
}
}
}
bool have[MAX_FEATURE+1];
};
static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false);
static HWFeatures* currentFeatures = &featuresEnabled;
bool checkHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
return currentFeatures->have[feature];
}
String getHardwareFeatureName(int feature)
{
const char* name = getHWFeatureName(feature);
return name ? String(name) : String();
}
std::string getCPUFeaturesLine()
{
const int features[] = { CV_CPU_BASELINE_FEATURES, CV_CPU_DISPATCH_FEATURES };
const int sz = sizeof(features) / sizeof(features[0]);
std::string result;
std::string prefix;
for (int i = 1; i < sz; ++i)
{
if (features[i] == 0)
{
prefix = "*";
continue;
}
if (i != 1) result.append(" ");
result.append(prefix);
result.append(getHWFeatureNameSafe(features[i]));
if (!checkHardwareSupport(features[i])) result.append("?");
}
return result;
}
volatile bool useOptimizedFlag = true;
void setUseOptimized( bool flag )
{
useOptimizedFlag = flag;
currentFeatures = flag ? &featuresEnabled : &featuresDisabled;
ipp::setUseIPP(flag);
#ifdef HAVE_OPENCL
ocl::setUseOpenCL(flag);
#endif
#ifdef HAVE_TEGRA_OPTIMIZATION
::tegra::setUseTegra(flag);
#endif
}
bool useOptimized(void)
{
return useOptimizedFlag;
}
int64 getTickCount(void)
{
#if defined _WIN32 || defined WINCE
LARGE_INTEGER counter;
QueryPerformanceCounter( &counter );
return (int64)counter.QuadPart;
#elif defined __MACH__ && defined __APPLE__
return (int64)mach_absolute_time();
#elif defined __unix__
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return (int64)tp.tv_sec*1000000000 + tp.tv_nsec;
#else
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64)tv.tv_sec*1000000 + tv.tv_usec;
#endif
}
double getTickFrequency(void)
{
#if defined _WIN32 || defined WINCE
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return (double)freq.QuadPart;
#elif defined __MACH__ && defined __APPLE__
static double freq = 0;
if( freq == 0 )
{
mach_timebase_info_data_t sTimebaseInfo;
mach_timebase_info(&sTimebaseInfo);
freq = sTimebaseInfo.denom*1e9/sTimebaseInfo.numer;
}
return freq;
#elif defined __unix__
return 1e9;
#else
return 1e6;
#endif
}
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || defined __ppc__)
#if defined(__i386__)
int64 getCPUTickCount(void)
{
int64 x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
int64 getCPUTickCount(void)
{
unsigned hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return (int64)lo | ((int64)hi << 32);
}
#elif defined(__ppc__)
int64 getCPUTickCount(void)
{
unsigned upper, lower, tmp;
__asm__ volatile(
"0: \n"
"\tmftbu %0 \n"
"\tmftb %1 \n"
"\tmftbu %2 \n"
"\tcmpw %2,%0 \n"
"\tbne 0b \n"
: "=r"(upper),"=r"(lower),"=r"(tmp)
);
return lower | ((int64)upper << 32);
}
#else
#error "RDTSC not defined"
#endif
#elif defined _MSC_VER && defined _WIN32 && defined _M_IX86
int64 getCPUTickCount(void)
{
__asm _emit 0x0f;
__asm _emit 0x31;
}
#else
//#ifdef HAVE_IPP
//int64 getCPUTickCount(void)
//{
// return ippGetCpuClocks();
//}
//#else
int64 getCPUTickCount(void)
{
return getTickCount();
}
//#endif
#endif
const String& getBuildInformation()
{
static String build_info =
#include "version_string.inc"
;
return build_info;
}
String getVersionString() { return String(CV_VERSION); }
int getVersionMajor() { return CV_VERSION_MAJOR; }
int getVersionMinor() { return CV_VERSION_MINOR; }
int getVersionRevision() { return CV_VERSION_REVISION; }
String format( const char* fmt, ... )
{
AutoBuffer<char, 1024> buf;
for ( ; ; )
{
va_list va;
va_start(va, fmt);
int bsize = static_cast<int>(buf.size());
int len = cv_vsnprintf(buf.data(), bsize, fmt, va);
va_end(va);
CV_Assert(len >= 0 && "Check format string for errors");
if (len >= bsize)
{
buf.resize(len + 1);
continue;
}
buf[bsize - 1] = 0;
return String(buf.data(), len);
}
}
String tempfile( const char* suffix )
{
String fname;
#ifndef NO_GETENV
const char *temp_dir = getenv("OPENCV_TEMP_PATH");
#endif
#if defined _WIN32
#ifdef WINRT
RoInitialize(RO_INIT_MULTITHREADED);
std::wstring temp_dir = GetTempPathWinRT();
std::wstring temp_file = GetTempFileNameWinRT(L"ocv");
if (temp_file.empty())
return String();
temp_file = temp_dir.append(std::wstring(L"\\")).append(temp_file);
DeleteFileW(temp_file.c_str());
char aname[MAX_PATH];
size_t copied = wcstombs(aname, temp_file.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
fname = String(aname);
RoUninitialize();
#elif defined(_WIN32_WCE)
const auto kMaxPathSize = MAX_PATH+1;
wchar_t temp_dir[kMaxPathSize] = {0};
wchar_t temp_file[kMaxPathSize] = {0};
::GetTempPathW(kMaxPathSize, temp_dir);
if(0 != ::GetTempFileNameW(temp_dir, L"ocv", 0, temp_file)) {
DeleteFileW(temp_file);
char aname[MAX_PATH];
size_t copied = wcstombs(aname, temp_file, MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
fname = String(aname);
}
#else
char temp_dir2[MAX_PATH] = { 0 };
char temp_file[MAX_PATH] = { 0 };
if (temp_dir == 0 || temp_dir[0] == 0)
{
::GetTempPathA(sizeof(temp_dir2), temp_dir2);
temp_dir = temp_dir2;
}
if(0 == ::GetTempFileNameA(temp_dir, "ocv", 0, temp_file))
return String();
DeleteFileA(temp_file);
fname = temp_file;
#endif
# else
# ifdef __ANDROID__
//char defaultTemplate[] = "/mnt/sdcard/__opencv_temp.XXXXXX";
char defaultTemplate[] = "/data/local/tmp/__opencv_temp.XXXXXX";
# else
char defaultTemplate[] = "/tmp/__opencv_temp.XXXXXX";
# endif
if (temp_dir == 0 || temp_dir[0] == 0)
fname = defaultTemplate;
else
{
fname = temp_dir;
char ech = fname[fname.size() - 1];
if(ech != '/' && ech != '\\')
fname = fname + "/";
fname = fname + "__opencv_temp.XXXXXX";
}
const int fd = mkstemp((char*)fname.c_str());
if (fd == -1) return String();
close(fd);
remove(fname.c_str());
# endif
if (suffix)
{
if (suffix[0] != '.')
return fname + "." + suffix;
else
return fname + suffix;
}
return fname;
}
static ErrorCallback customErrorCallback = 0;
static void* customErrorCallbackData = 0;
static bool breakOnError = false;
bool setBreakOnError(bool value)
{
bool prevVal = breakOnError;
breakOnError = value;
return prevVal;
}
int cv_snprintf(char* buf, int len, const char* fmt, ...)
{
va_list va;
va_start(va, fmt);
int res = cv_vsnprintf(buf, len, fmt, va);
va_end(va);
return res;
}
int cv_vsnprintf(char* buf, int len, const char* fmt, va_list args)
{
#if defined _MSC_VER
if (len <= 0) return len == 0 ? 1024 : -1;
int res = _vsnprintf_s(buf, len, _TRUNCATE, fmt, args);
// ensure null terminating on VS
if (res >= 0 && res < len)
{
buf[res] = 0;
return res;
}
else
{
buf[len - 1] = 0; // truncate happened
return res >= len ? res : (len * 2);
}
#else
return vsnprintf(buf, len, fmt, args);
#endif
}
static void dumpException(const Exception& exc)
{
const char* errorStr = cvErrorStr(exc.code);
char buf[1 << 12];
cv_snprintf(buf, sizeof(buf),
"OpenCV(%s) Error: %s (%s) in %s, file %s, line %d",
CV_VERSION,
errorStr, exc.err.c_str(), exc.func.size() > 0 ?
exc.func.c_str() : "unknown function", exc.file.c_str(), exc.line);
#ifdef __ANDROID__
__android_log_print(ANDROID_LOG_ERROR, "cv::error()", "%s", buf);
#else
fflush(stdout); fflush(stderr);
fprintf(stderr, "%s\n", buf);
fflush(stderr);
#endif
}
#ifdef CV_ERROR_SET_TERMINATE_HANDLER
static bool cv_terminate_handler_installed = false;
static std::terminate_handler cv_old_terminate_handler;
static cv::Exception cv_terminate_handler_exception;
static bool param_setupTerminateHandler = utils::getConfigurationParameterBool("OPENCV_SETUP_TERMINATE_HANDLER", true);
static void cv_terminate_handler() {
std::cerr << "OpenCV: terminate handler is called! The last OpenCV error is:\n";
dumpException(cv_terminate_handler_exception);
if (false /*cv_old_terminate_handler*/) // buggy behavior is observed with doubled "abort/retry/ignore" windows
cv_old_terminate_handler();
abort();
}
#endif
void error( const Exception& exc )
{
#ifdef CV_ERROR_SET_TERMINATE_HANDLER
{
cv::AutoLock lock(getInitializationMutex());
if (!cv_terminate_handler_installed)
{
if (param_setupTerminateHandler)
cv_old_terminate_handler = std::set_terminate(cv_terminate_handler);
cv_terminate_handler_installed = true;
}
cv_terminate_handler_exception = exc;
}
#endif
if (customErrorCallback != 0)
customErrorCallback(exc.code, exc.func.c_str(), exc.err.c_str(),
exc.file.c_str(), exc.line, customErrorCallbackData);
else if (param_dumpErrors)
{
dumpException(exc);
}
if(breakOnError)
{
static volatile int* p = 0;
*p = 0;
}
throw exc;
}
void error(int _code, const String& _err, const char* _func, const char* _file, int _line)
{
error(cv::Exception(_code, _err, _func, _file, _line));
}
ErrorCallback
redirectError( ErrorCallback errCallback, void* userdata, void** prevUserdata)
{
if( prevUserdata )
*prevUserdata = customErrorCallbackData;
ErrorCallback prevCallback = customErrorCallback;
customErrorCallback = errCallback;
customErrorCallbackData = userdata;
return prevCallback;
}
}
CV_IMPL int cvCheckHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
return cv::currentFeatures->have[feature];
}
CV_IMPL int cvUseOptimized( int flag )
{
int prevMode = cv::useOptimizedFlag;
cv::setUseOptimized( flag != 0 );
return prevMode;
}
CV_IMPL int64 cvGetTickCount(void)
{
return cv::getTickCount();
}
CV_IMPL double cvGetTickFrequency(void)
{
return cv::getTickFrequency()*1e-6;
}
CV_IMPL CvErrorCallback
cvRedirectError( CvErrorCallback errCallback, void* userdata, void** prevUserdata)
{
return cv::redirectError(errCallback, userdata, prevUserdata);
}
CV_IMPL int cvNulDevReport( int, const char*, const char*,
const char*, int, void* )
{
return 0;
}
CV_IMPL int cvStdErrReport( int, const char*, const char*,
const char*, int, void* )
{
return 0;
}
CV_IMPL int cvGuiBoxReport( int, const char*, const char*,
const char*, int, void* )
{
return 0;
}
CV_IMPL int cvGetErrInfo( const char**, const char**, const char**, int* )
{
return 0;
}
CV_IMPL const char* cvErrorStr( int status )
{
static char buf[256];
switch (status)
{
case CV_StsOk : return "No Error";
case CV_StsBackTrace : return "Backtrace";
case CV_StsError : return "Unspecified error";
case CV_StsInternal : return "Internal error";
case CV_StsNoMem : return "Insufficient memory";
case CV_StsBadArg : return "Bad argument";
case CV_StsNoConv : return "Iterations do not converge";
case CV_StsAutoTrace : return "Autotrace call";
case CV_StsBadSize : return "Incorrect size of input array";
case CV_StsNullPtr : return "Null pointer";
case CV_StsDivByZero : return "Division by zero occurred";
case CV_BadStep : return "Image step is wrong";
case CV_StsInplaceNotSupported : return "Inplace operation is not supported";
case CV_StsObjectNotFound : return "Requested object was not found";
case CV_BadDepth : return "Input image depth is not supported by function";
case CV_StsUnmatchedFormats : return "Formats of input arguments do not match";
case CV_StsUnmatchedSizes : return "Sizes of input arguments do not match";
case CV_StsOutOfRange : return "One of the arguments\' values is out of range";
case CV_StsUnsupportedFormat : return "Unsupported format or combination of formats";
case CV_BadCOI : return "Input COI is not supported";
case CV_BadNumChannels : return "Bad number of channels";
case CV_StsBadFlag : return "Bad flag (parameter or structure field)";
case CV_StsBadPoint : return "Bad parameter of type CvPoint";
case CV_StsBadMask : return "Bad type of mask argument";
case CV_StsParseError : return "Parsing error";
case CV_StsNotImplemented : return "The function/feature is not implemented";
case CV_StsBadMemBlock : return "Memory block has been corrupted";
case CV_StsAssert : return "Assertion failed";
case CV_GpuNotSupported : return "No CUDA support";
case CV_GpuApiCallError : return "Gpu API call";
case CV_OpenGlNotSupported : return "No OpenGL support";
case CV_OpenGlApiCallError : return "OpenGL API call";
};
sprintf(buf, "Unknown %s code %d", status >= 0 ? "status":"error", status);
return buf;
}
CV_IMPL int cvGetErrMode(void)
{
return 0;
}
CV_IMPL int cvSetErrMode(int)
{
return 0;
}
CV_IMPL int cvGetErrStatus(void)
{
return 0;
}
CV_IMPL void cvSetErrStatus(int)
{
}
CV_IMPL void cvError( int code, const char* func_name,
const char* err_msg,
const char* file_name, int line )
{
cv::error(cv::Exception(code, err_msg, func_name, file_name, line));
}
/* function, which converts int to int */
CV_IMPL int
cvErrorFromIppStatus( int status )
{
switch (status)
{
case CV_BADSIZE_ERR: return CV_StsBadSize;
case CV_BADMEMBLOCK_ERR: return CV_StsBadMemBlock;
case CV_NULLPTR_ERR: return CV_StsNullPtr;
case CV_DIV_BY_ZERO_ERR: return CV_StsDivByZero;
case CV_BADSTEP_ERR: return CV_BadStep;
case CV_OUTOFMEM_ERR: return CV_StsNoMem;
case CV_BADARG_ERR: return CV_StsBadArg;
case CV_NOTDEFINED_ERR: return CV_StsError;
case CV_INPLACE_NOT_SUPPORTED_ERR: return CV_StsInplaceNotSupported;
case CV_NOTFOUND_ERR: return CV_StsObjectNotFound;
case CV_BADCONVERGENCE_ERR: return CV_StsNoConv;
case CV_BADDEPTH_ERR: return CV_BadDepth;
case CV_UNMATCHED_FORMATS_ERR: return CV_StsUnmatchedFormats;
case CV_UNSUPPORTED_COI_ERR: return CV_BadCOI;
case CV_UNSUPPORTED_CHANNELS_ERR: return CV_BadNumChannels;
case CV_BADFLAG_ERR: return CV_StsBadFlag;
case CV_BADRANGE_ERR: return CV_StsBadArg;
case CV_BADCOEF_ERR: return CV_StsBadArg;
case CV_BADFACTOR_ERR: return CV_StsBadArg;
case CV_BADPOINT_ERR: return CV_StsBadPoint;
default:
return CV_StsError;
}
}
namespace cv {
bool __termination = false;
}
namespace cv
{
#if defined _WIN32 || defined WINCE
struct Mutex::Impl
{
Impl()
{
#if (_WIN32_WINNT >= 0x0600)
::InitializeCriticalSectionEx(&cs, 1000, 0);
#else
::InitializeCriticalSection(&cs);
#endif
refcount = 1;
}
~Impl() { DeleteCriticalSection(&cs); }
void lock() { EnterCriticalSection(&cs); }
bool trylock() { return TryEnterCriticalSection(&cs) != 0; }
void unlock() { LeaveCriticalSection(&cs); }
CRITICAL_SECTION cs;
int refcount;
};
#else
struct Mutex::Impl
{
Impl()
{
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
pthread_mutex_init(&mt, &attr);
pthread_mutexattr_destroy(&attr);
refcount = 1;
}
~Impl() { pthread_mutex_destroy(&mt); }
void lock() { pthread_mutex_lock(&mt); }
bool trylock() { return pthread_mutex_trylock(&mt) == 0; }
void unlock() { pthread_mutex_unlock(&mt); }
pthread_mutex_t mt;
int refcount;
};
#endif
Mutex::Mutex()
{
impl = new Mutex::Impl;
}
Mutex::~Mutex()
{
if( CV_XADD(&impl->refcount, -1) == 1 )
delete impl;
impl = 0;
}
Mutex::Mutex(const Mutex& m)
{
impl = m.impl;
CV_XADD(&impl->refcount, 1);
}
Mutex& Mutex::operator = (const Mutex& m)
{
if (this != &m)
{
CV_XADD(&m.impl->refcount, 1);
if( CV_XADD(&impl->refcount, -1) == 1 )
delete impl;
impl = m.impl;
}
return *this;
}
void Mutex::lock() { impl->lock(); }
void Mutex::unlock() { impl->unlock(); }
bool Mutex::trylock() { return impl->trylock(); }
//////////////////////////////// thread-local storage ////////////////////////////////
namespace details {
#ifdef _WIN32
#ifdef _MSC_VER
#pragma warning(disable:4505) // unreferenced local function has been removed
#endif
#ifndef TLS_OUT_OF_INDEXES
#define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
#endif
#endif
template <class T>
class DisposedSingletonMark
{
private:
static bool mark;
protected:
DisposedSingletonMark() {}
~DisposedSingletonMark()
{
mark = true;
}
public:
static bool isDisposed() { return mark; }
};
// TLS platform abstraction layer
class TlsAbstraction : public DisposedSingletonMark<TlsAbstraction>
{
public:
TlsAbstraction();
~TlsAbstraction();
void* getData() const
{
if (isDisposed()) // guard: static initialization order fiasco
return NULL;
return getData_();
}
void setData(void *pData)
{
if (isDisposed()) // guard: static initialization order fiasco
return;
return setData_(pData);
}
private:
void* getData_() const;
void setData_(void *pData);
#ifdef _WIN32
#ifndef WINRT
DWORD tlsKey;
#endif
#else // _WIN32
pthread_key_t tlsKey;
#endif
};
template<> bool DisposedSingletonMark<TlsAbstraction>::mark = false;
static TlsAbstraction& getTlsAbstraction_()
{
static TlsAbstraction g_tls; // disposed in atexit() handlers (required for unregistering our callbacks)
return g_tls;
}
static TlsAbstraction* getTlsAbstraction()
{
#ifdef CV_CXX11
static TlsAbstraction* instance = &getTlsAbstraction_();
#else
static TlsAbstraction* volatile instance = NULL;
if (instance == NULL)
{
cv::AutoLock lock(cv::getInitializationMutex());
if (instance == NULL)
instance = &getTlsAbstraction_();
}
#endif
return DisposedSingletonMark<TlsAbstraction>::isDisposed() ? NULL : instance;
}
#ifdef _WIN32
#ifdef WINRT
static __declspec( thread ) void* tlsData = NULL; // using C++11 thread attribute for local thread data
TlsAbstraction::TlsAbstraction() {}
TlsAbstraction::~TlsAbstraction() {}
void* TlsAbstraction::getData_() const
{
return tlsData;
}
void TlsAbstraction::setData_(void *pData)
{
tlsData = pData;
}
#else //WINRT
#ifdef CV_USE_FLS
static void NTAPI opencv_fls_destructor(void* pData);
#endif // CV_USE_FLS
TlsAbstraction::TlsAbstraction()
{
#ifndef CV_USE_FLS
tlsKey = TlsAlloc();
#else // CV_USE_FLS
tlsKey = FlsAlloc(opencv_fls_destructor);
#endif // CV_USE_FLS
CV_Assert(tlsKey != TLS_OUT_OF_INDEXES);
}
TlsAbstraction::~TlsAbstraction()
{
#ifndef CV_USE_FLS
TlsFree(tlsKey);
#else // CV_USE_FLS
FlsFree(tlsKey);
#endif // CV_USE_FLS
tlsKey = TLS_OUT_OF_INDEXES;
}
void* TlsAbstraction::getData_() const
{
#ifndef CV_USE_FLS
return TlsGetValue(tlsKey);
#else // CV_USE_FLS
return FlsGetValue(tlsKey);
#endif // CV_USE_FLS
}
void TlsAbstraction::setData_(void *pData)
{
#ifndef CV_USE_FLS
CV_Assert(TlsSetValue(tlsKey, pData) == TRUE);
#else // CV_USE_FLS
CV_Assert(FlsSetValue(tlsKey, pData) == TRUE);
#endif // CV_USE_FLS
}
#endif // WINRT
#else // _WIN32
static void opencv_tls_destructor(void* pData);
TlsAbstraction::TlsAbstraction()
{
CV_Assert(pthread_key_create(&tlsKey, opencv_tls_destructor) == 0);
}
TlsAbstraction::~TlsAbstraction()
{
if (pthread_key_delete(tlsKey) != 0)
{
// Don't use logging here
fprintf(stderr, "OpenCV ERROR: TlsAbstraction::~TlsAbstraction(): pthread_key_delete() call failed\n");
fflush(stderr);
}
}
void* TlsAbstraction::getData_() const
{
return pthread_getspecific(tlsKey);
}
void TlsAbstraction::setData_(void *pData)
{
CV_Assert(pthread_setspecific(tlsKey, pData) == 0);
}
#endif
// Per-thread data structure
struct ThreadData
{
ThreadData()
{
idx = 0;
slots.reserve(32);
}
std::vector<void*> slots; // Data array for a thread
size_t idx; // Thread index in TLS storage. This is not OS thread ID!
};
static bool g_isTlsStorageInitialized = false;
// Main TLS storage class
class TlsStorage
{
public:
TlsStorage() :
tlsSlotsSize(0)
{
tlsSlots.reserve(32);
threads.reserve(32);
g_isTlsStorageInitialized = true;
}
~TlsStorage()
{
// TlsStorage object should not be released
// There is no reliable way to avoid problems caused by static initialization order fiasco
// Don't use logging here
fprintf(stderr, "OpenCV FATAL: TlsStorage::~TlsStorage() call is not expected\n");
fflush(stderr);
}
void releaseThread(void* tlsValue = NULL)
{
TlsAbstraction* tls = getTlsAbstraction();
if (NULL == tls)
return; // TLS singleton is not available (terminated)
ThreadData *pTD = tlsValue == NULL ? (ThreadData*)tls->getData() : (ThreadData*)tlsValue;
if (pTD == NULL)
return; // no OpenCV TLS data for this thread
AutoLock guard(mtxGlobalAccess);
for (size_t i = 0; i < threads.size(); i++)
{
if (pTD == threads[i])
{
threads[i] = NULL;
if (tlsValue == NULL)
tls->setData(0);
std::vector<void*>& thread_slots = pTD->slots;
for (size_t slotIdx = 0; slotIdx < thread_slots.size(); slotIdx++)
{
void* pData = thread_slots[slotIdx];
thread_slots[slotIdx] = NULL;
if (!pData)
continue;
TLSDataContainer* container = tlsSlots[slotIdx].container;
if (container)
container->deleteDataInstance(pData);
else
{
fprintf(stderr, "OpenCV ERROR: TLS: container for slotIdx=%d is NULL. Can't release thread data\n", (int)slotIdx);
fflush(stderr);
}
}
delete pTD;
return;
}
}
fprintf(stderr, "OpenCV WARNING: TLS: Can't release thread TLS data (unknown pointer or data race): %p\n", (void*)pTD); fflush(stderr);
}
// Reserve TLS storage index
size_t reserveSlot(TLSDataContainer* container)
{
AutoLock guard(mtxGlobalAccess);
CV_Assert(tlsSlotsSize == tlsSlots.size());
// Find unused slots
for(size_t slot = 0; slot < tlsSlotsSize; slot++)
{
if (tlsSlots[slot].container == NULL)
{
tlsSlots[slot].container = container;
return slot;
}
}
// Create new slot
tlsSlots.push_back(TlsSlotInfo(container)); tlsSlotsSize++;
return tlsSlotsSize - 1;
}
// Release TLS storage index and pass associated data to caller
void releaseSlot(size_t slotIdx, std::vector<void*> &dataVec, bool keepSlot = false)
{
AutoLock guard(mtxGlobalAccess);
CV_Assert(tlsSlotsSize == tlsSlots.size());
CV_Assert(tlsSlotsSize > slotIdx);
for(size_t i = 0; i < threads.size(); i++)
{
if(threads[i])
{
std::vector<void*>& thread_slots = threads[i]->slots;
if (thread_slots.size() > slotIdx && thread_slots[slotIdx])
{
dataVec.push_back(thread_slots[slotIdx]);
thread_slots[slotIdx] = NULL;
}
}
}
if (!keepSlot)
{
tlsSlots[slotIdx].container = NULL; // mark slot as free (see reserveSlot() implementation)
}
}
// Get data by TLS storage index
void* getData(size_t slotIdx) const
{
#ifndef CV_THREAD_SANITIZER
CV_Assert(tlsSlotsSize > slotIdx);
#endif
TlsAbstraction* tls = getTlsAbstraction();
if (NULL == tls)
return NULL; // TLS singleton is not available (terminated)
ThreadData* threadData = (ThreadData*)tls->getData();
if(threadData && threadData->slots.size() > slotIdx)
return threadData->slots[slotIdx];
return NULL;
}
// Gather data from threads by TLS storage index
void gather(size_t slotIdx, std::vector<void*> &dataVec)
{
AutoLock guard(mtxGlobalAccess);
CV_Assert(tlsSlotsSize == tlsSlots.size());
CV_Assert(tlsSlotsSize > slotIdx);
for(size_t i = 0; i < threads.size(); i++)
{
if(threads[i])
{
std::vector<void*>& thread_slots = threads[i]->slots;
if (thread_slots.size() > slotIdx && thread_slots[slotIdx])
dataVec.push_back(thread_slots[slotIdx]);
}
}
}
// Set data to storage index
void setData(size_t slotIdx, void* pData)
{
#ifndef CV_THREAD_SANITIZER
CV_Assert(tlsSlotsSize > slotIdx);
#endif
TlsAbstraction* tls = getTlsAbstraction();
if (NULL == tls)
return; // TLS singleton is not available (terminated)
ThreadData* threadData = (ThreadData*)tls->getData();
if(!threadData)
{
threadData = new ThreadData;
tls->setData((void*)threadData);
{
AutoLock guard(mtxGlobalAccess);
bool found = false;
// Find unused slots
for(size_t slot = 0; slot < threads.size(); slot++)
{
if (threads[slot] == NULL)
{
threadData->idx = (int)slot;
threads[slot] = threadData;
found = true;
break;
}
}
if (!found)
{
// Create new slot
threadData->idx = threads.size();
threads.push_back(threadData);
}
}
}
if(slotIdx >= threadData->slots.size())
{
AutoLock guard(mtxGlobalAccess); // keep synchronization with gather() calls
threadData->slots.resize(slotIdx + 1, NULL);
}
threadData->slots[slotIdx] = pData;
}
private:
Mutex mtxGlobalAccess; // Shared objects operation guard
size_t tlsSlotsSize; // equal to tlsSlots.size() in synchronized sections
// without synchronization this counter doesn't decrease - it is used for slotIdx sanity checks
struct TlsSlotInfo
{
TlsSlotInfo(TLSDataContainer* _container) : container(_container) {}
TLSDataContainer* container; // attached container (to dispose data of terminated threads)
};
std::vector<struct TlsSlotInfo> tlsSlots; // TLS keys state
std::vector<ThreadData*> threads; // Array for all allocated data. Thread data pointers are placed here to allow data cleanup
};
// Create global TLS storage object
static TlsStorage &getTlsStorage()
{
CV_SINGLETON_LAZY_INIT_REF(TlsStorage, new TlsStorage())
}
#ifndef _WIN32 // pthread key destructor
static void opencv_tls_destructor(void* pData)
{
if (!g_isTlsStorageInitialized)
return; // nothing to release, so prefer to avoid creation of new global structures
getTlsStorage().releaseThread(pData);
}
#else // _WIN32
#ifdef CV_USE_FLS
static void WINAPI opencv_fls_destructor(void* pData)
{
// Empiric detection of ExitProcess call
DWORD code = STILL_ACTIVE/*259*/;
BOOL res = GetExitCodeProcess(GetCurrentProcess(), &code);
if (res && code != STILL_ACTIVE)
{
// Looks like we are in ExitProcess() call
// This is FLS specific only because their callback is called before DllMain.
// TLS doesn't have similar problem, DllMain() is called first which mark __termination properly.
// Note: this workaround conflicts with ExitProcess() steps order described in documentation, however it works:
// 3. ... called with DLL_PROCESS_DETACH
// 7. The termination status of the process changes from STILL_ACTIVE to the exit value of the process.
// (ref: https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-exitprocess)
cv::__termination = true;
}
if (!g_isTlsStorageInitialized)
return; // nothing to release, so prefer to avoid creation of new global structures
getTlsStorage().releaseThread(pData);
}
#endif // CV_USE_FLS
#endif // _WIN32
} // namespace details
using namespace details;
void releaseTlsStorageThread()
{
if (!g_isTlsStorageInitialized)
return; // nothing to release, so prefer to avoid creation of new global structures
getTlsStorage().releaseThread();
}
TLSDataContainer::TLSDataContainer()
{
key_ = (int)getTlsStorage().reserveSlot(this); // Reserve key from TLS storage
}
TLSDataContainer::~TLSDataContainer()
{
CV_Assert(key_ == -1); // Key must be released in child object
}
void TLSDataContainer::gatherData(std::vector<void*> &data) const
{
getTlsStorage().gather(key_, data);
}
void TLSDataContainer::detachData(std::vector<void*> &data)
{
getTlsStorage().releaseSlot(key_, data, true);
}
void TLSDataContainer::release()
{
if (key_ == -1)
return; // already released
std::vector<void*> data; data.reserve(32);
getTlsStorage().releaseSlot(key_, data, false); // Release key and get stored data for proper destruction
key_ = -1;
for(size_t i = 0; i < data.size(); i++) // Delete all associated data
deleteDataInstance(data[i]);
}
void TLSDataContainer::cleanup()
{
std::vector<void*> data; data.reserve(32);
getTlsStorage().releaseSlot(key_, data, true); // Extract stored data with removal from TLS tables
for(size_t i = 0; i < data.size(); i++) // Delete all associated data
deleteDataInstance(data[i]);
}
void* TLSDataContainer::getData() const
{
CV_Assert(key_ != -1 && "Can't fetch data from terminated TLS container.");
void* pData = getTlsStorage().getData(key_); // Check if data was already allocated
if(!pData)
{
// Create new data instance and save it to TLS storage
pData = createDataInstance();
getTlsStorage().setData(key_, pData);
}
return pData;
}
static TLSData<CoreTLSData>& getCoreTlsDataTLS()
{
CV_SINGLETON_LAZY_INIT_REF(TLSData<CoreTLSData>, new TLSData<CoreTLSData>())
}
CoreTLSData& getCoreTlsData()
{
return getCoreTlsDataTLS().getRef();
}
#if defined CVAPI_EXPORTS && defined _WIN32 && !defined WINCE
#ifdef WINRT
#pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
#endif
extern "C"
BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved);
extern "C"
BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved)
{
if (fdwReason == DLL_THREAD_DETACH || fdwReason == DLL_PROCESS_DETACH)
{
if (lpReserved != NULL) // called after ExitProcess() call
{
cv::__termination = true;
}
else
{
// Not allowed to free resources if lpReserved is non-null
// http://msdn.microsoft.com/en-us/library/windows/desktop/ms682583.aspx
releaseTlsStorageThread();
}
}
return TRUE;
}
#endif
namespace {
#ifdef OPENCV_WITH_ITT
bool overrideThreadName()
{
static bool param = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_SET_THREAD_NAME", false);
return param;
}
#endif
static int g_threadNum = 0;
class ThreadID {
public:
const int id;
ThreadID() :
id(CV_XADD(&g_threadNum, 1))
{
#ifdef OPENCV_WITH_ITT
if (overrideThreadName())
__itt_thread_set_name(cv::format("OpenCVThread-%03d", id).c_str());
#endif
}
};
static TLSData<ThreadID>& getThreadIDTLS()
{
CV_SINGLETON_LAZY_INIT_REF(TLSData<ThreadID>, new TLSData<ThreadID>());
}
} // namespace
int utils::getThreadID() { return getThreadIDTLS().get()->id; }
class ParseError
{
std::string bad_value;
public:
ParseError(const std::string &bad_value_) :bad_value(bad_value_) {}
std::string toString(const std::string &param) const
{
std::ostringstream out;
out << "Invalid value for parameter " << param << ": " << bad_value;
return out.str();
}
};
template <typename T>
T parseOption(const std::string &);
template<>
inline bool parseOption(const std::string & value)
{
if (value == "1" || value == "True" || value == "true" || value == "TRUE")
{
return true;
}
if (value == "0" || value == "False" || value == "false" || value == "FALSE")
{
return false;
}
throw ParseError(value);
}
template<>
inline size_t parseOption(const std::string &value)
{
size_t pos = 0;
for (; pos < value.size(); pos++)
{
if (!isdigit(value[pos]))
break;
}
cv::String valueStr = value.substr(0, pos);
cv::String suffixStr = value.substr(pos, value.length() - pos);
#ifdef CV_CXX11
size_t v = (size_t)std::stoull(valueStr);
#else
size_t v = (size_t)atol(valueStr.c_str());
#endif
if (suffixStr.length() == 0)
return v;
else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
return v * 1024 * 1024;
else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
return v * 1024;
throw ParseError(value);
}
template<>
inline cv::String parseOption(const std::string &value)
{
return value;
}
template<>
inline utils::Paths parseOption(const std::string &value)
{
utils::Paths result;
#ifdef _WIN32
const char sep = ';';
#else
const char sep = ':';
#endif
size_t start_pos = 0;
while (start_pos != std::string::npos)
{
const size_t pos = value.find(sep, start_pos);
const std::string one_piece(value, start_pos, pos == std::string::npos ? pos : pos - start_pos);
if (!one_piece.empty())
result.push_back(one_piece);
start_pos = pos == std::string::npos ? pos : pos + 1;
}
return result;
}
static inline const char * envRead(const char * name)
{
#ifdef NO_GETENV
CV_UNUSED(name);
return NULL;
#else
return getenv(name);
#endif
}
template<typename T>
inline T read(const std::string & k, const T & defaultValue)
{
try
{
const char * res = envRead(k.c_str());
if (res)
return parseOption<T>(std::string(res));
}
catch (const ParseError &err)
{
CV_Error(cv::Error::StsBadArg, err.toString(k));
}
return defaultValue;
}
bool utils::getConfigurationParameterBool(const char* name, bool defaultValue)
{
return read<bool>(name, defaultValue);
}
size_t utils::getConfigurationParameterSizeT(const char* name, size_t defaultValue)
{
return read<size_t>(name, defaultValue);
}
cv::String utils::getConfigurationParameterString(const char* name, const char* defaultValue)
{
return read<cv::String>(name, defaultValue);
}
utils::Paths utils::getConfigurationParameterPaths(const char* name, const utils::Paths &defaultValue)
{
return read<utils::Paths>(name, defaultValue);
}
#ifdef CV_COLLECT_IMPL_DATA
ImplCollector& getImplData()
{
CV_SINGLETON_LAZY_INIT_REF(ImplCollector, new ImplCollector())
}
void setImpl(int flags)
{
cv::AutoLock lock(getImplData().mutex);
getImplData().implFlags = flags;
getImplData().implCode.clear();
getImplData().implFun.clear();
}
void addImpl(int flag, const char* func)
{
cv::AutoLock lock(getImplData().mutex);
getImplData().implFlags |= flag;
if(func) // use lazy collection if name was not specified
{
size_t index = getImplData().implCode.size();
if(!index || (getImplData().implCode[index-1] != flag || getImplData().implFun[index-1].compare(func))) // avoid duplicates
{
getImplData().implCode.push_back(flag);
getImplData().implFun.push_back(func);
}
}
}
int getImpl(std::vector<int> &impl, std::vector<String> &funName)
{
cv::AutoLock lock(getImplData().mutex);
impl = getImplData().implCode;
funName = getImplData().implFun;
return getImplData().implFlags; // return actual flags for lazy collection
}
bool useCollection()
{
return getImplData().useCollection;
}
void setUseCollection(bool flag)
{
cv::AutoLock lock(getImplData().mutex);
getImplData().useCollection = flag;
}
#endif
namespace instr
{
bool useInstrumentation()
{
#ifdef ENABLE_INSTRUMENTATION
return getInstrumentStruct().useInstr;
#else
return false;
#endif
}
void setUseInstrumentation(bool flag)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().useInstr = flag;
#else
CV_UNUSED(flag);
#endif
}
InstrNode* getTrace()
{
#ifdef ENABLE_INSTRUMENTATION
return &getInstrumentStruct().rootNode;
#else
return NULL;
#endif
}
void resetTrace()
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().rootNode.removeChilds();
getInstrumentTLSStruct().pCurrentNode = &getInstrumentStruct().rootNode;
#endif
}
void setFlags(FLAGS modeFlags)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().flags = modeFlags;
#else
CV_UNUSED(modeFlags);
#endif
}
FLAGS getFlags()
{
#ifdef ENABLE_INSTRUMENTATION
return (FLAGS)getInstrumentStruct().flags;
#else
return (FLAGS)0;
#endif
}
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, void* retAddress, bool alwaysExpand, cv::instr::TYPE instrType, cv::instr::IMPL implType)
{
m_funName = funName ? cv::String(funName) : cv::String(); // std::string doesn't accept NULL
m_instrType = instrType;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
m_retAddress = retAddress;
m_alwaysExpand = alwaysExpand;
m_threads = 1;
m_counter = 0;
m_ticksTotal = 0;
m_funError = false;
}
NodeData::NodeData(NodeData &ref)
{
*this = ref;
}
NodeData& NodeData::operator=(const NodeData &right)
{
this->m_funName = right.m_funName;
this->m_instrType = right.m_instrType;
this->m_implType = right.m_implType;
this->m_fileName = right.m_fileName;
this->m_lineNum = right.m_lineNum;
this->m_retAddress = right.m_retAddress;
this->m_alwaysExpand = right.m_alwaysExpand;
this->m_threads = right.m_threads;
this->m_counter = right.m_counter;
this->m_ticksTotal = right.m_ticksTotal;
this->m_funError = right.m_funError;
return *this;
}
NodeData::~NodeData()
{
}
bool operator==(const NodeData& left, const NodeData& right)
{
if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
{
if(left.m_retAddress == right.m_retAddress || !(cv::instr::getFlags()&cv::instr::FLAGS_EXPAND_SAME_NAMES || left.m_alwaysExpand))
return true;
}
return false;
}
#ifdef ENABLE_INSTRUMENTATION
InstrStruct& getInstrumentStruct()
{
static InstrStruct instr;
return instr;
}
InstrTLSStruct& getInstrumentTLSStruct()
{
return *getInstrumentStruct().tlsStruct.get();
}
InstrNode* getCurrentNode()
{
return getInstrumentTLSStruct().pCurrentNode;
}
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType, IMPL implType)
{
m_disabled = false;
m_regionTicks = 0;
InstrStruct *pStruct = &getInstrumentStruct();
if(pStruct->useInstr)
{
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
// Disable in case of failure
if(!pTLS->pCurrentNode)
{
m_disabled = true;
return;
}
int depth = pTLS->pCurrentNode->getDepth();
if(pStruct->maxDepth && pStruct->maxDepth <= depth)
{
m_disabled = true;
return;
}
NodeData payload(funName, fileName, lineNum, retAddress, alwaysExpand, instrType, implType);
Node<NodeData>* pChild = NULL;
if(pStruct->flags&FLAGS_MAPPING)
{
// Critical section
cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild)
{
pChild = new Node<NodeData>(payload);
pTLS->pCurrentNode->addChild(pChild);
}
}
else
{
pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild)
{
m_disabled = true;
return;
}
}
pTLS->pCurrentNode = pChild;
m_regionTicks = getTickCount();
}
}
IntrumentationRegion::~IntrumentationRegion()
{
InstrStruct *pStruct = &getInstrumentStruct();
if(pStruct->useInstr)
{
if(!m_disabled)
{
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
{
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
}
uint64 ticks = (getTickCount() - m_regionTicks);
{
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
pTLS->pCurrentNode->m_payload.m_counter++;
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
pTLS->pCurrentNode->m_payload.m_tls.get()->m_ticksTotal += ticks;
}
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
}
}
}
#endif
}
namespace ipp
{
#ifdef HAVE_IPP
struct IPPInitSingleton
{
public:
IPPInitSingleton()
{
useIPP = true;
useIPP_NE = false;
ippStatus = 0;
funcname = NULL;
filename = NULL;
linen = 0;
cpuFeatures = 0;
ippFeatures = 0;
ippTopFeatures = 0;
pIppLibInfo = NULL;
ippStatus = ippGetCpuFeatures(&cpuFeatures, NULL);
if(ippStatus < 0)
{
std::cerr << "ERROR: IPP cannot detect CPU features, IPP was disabled " << std::endl;
useIPP = false;
return;
}
ippFeatures = cpuFeatures;
const char* pIppEnv = getenv("OPENCV_IPP");
cv::String env = pIppEnv;
if(env.size())
{
#if IPP_VERSION_X100 >= 201900
const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C|
ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_MPX|ippCPUID_AVX512CD|ippCPUID_AVX512ER|
ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI|ippCPUID_AVX512_4FMADDPS|
ippCPUID_AVX512_4VNNIW|ippCPUID_AVX512IFMA;
#elif IPP_VERSION_X100 >= 201703
const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C|
ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_MPX|ippCPUID_AVX512CD|ippCPUID_AVX512ER|
ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI;
#elif IPP_VERSION_X100 >= 201700
const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C|
ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_AVX512CD|ippCPUID_AVX512ER|
ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI;
#else
const Ipp64u minorFeatures = 0;
#endif
env = env.toLowerCase();
if(env.substr(0, 2) == "ne")
{
useIPP_NE = true;
env = env.substr(3, env.size());
}
if(env == "disabled")
{
std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl;
useIPP = false;
}
else if(env == "sse42")
ippFeatures = minorFeatures|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42;
else if(env == "avx2")
ippFeatures = minorFeatures|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2;
#if IPP_VERSION_X100 >= 201700
#if defined (_M_AMD64) || defined (__x86_64__)
else if(env == "avx512")
ippFeatures = minorFeatures|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2|ippCPUID_AVX512F;
#endif
#endif
else
std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << ". Correct values are: disabled, sse42, avx2, avx512 (Intel64 only)" << std::endl;
// Trim unsupported features
ippFeatures &= cpuFeatures;
}
// Disable AVX1 since we don't track regressions for it. SSE42 will be used instead
if(cpuFeatures&ippCPUID_AVX && !(cpuFeatures&ippCPUID_AVX2))
ippFeatures &= ~((Ipp64u)ippCPUID_AVX);
// IPP integrations in OpenCV support only SSE4.2, AVX2 and AVX-512 optimizations.
if(!(
#if IPP_VERSION_X100 >= 201700
cpuFeatures&ippCPUID_AVX512F ||
#endif
cpuFeatures&ippCPUID_AVX2 ||
cpuFeatures&ippCPUID_SSE42
))
{
useIPP = false;
return;
}
if(ippFeatures == cpuFeatures)
IPP_INITIALIZER(0)
else
IPP_INITIALIZER(ippFeatures)
ippFeatures = ippGetEnabledCpuFeatures();
// Detect top level optimizations to make comparison easier for optimizations dependent conditions
#if IPP_VERSION_X100 >= 201700
if(ippFeatures&ippCPUID_AVX512F)
{
if((ippFeatures&ippCPUID_AVX512_SKX) == ippCPUID_AVX512_SKX)
ippTopFeatures = ippCPUID_AVX512_SKX;
else if((ippFeatures&ippCPUID_AVX512_KNL) == ippCPUID_AVX512_KNL)
ippTopFeatures = ippCPUID_AVX512_KNL;
else
ippTopFeatures = ippCPUID_AVX512F; // Unknown AVX512 configuration
}
else
#endif
if(ippFeatures&ippCPUID_AVX2)
ippTopFeatures = ippCPUID_AVX2;
else if(ippFeatures&ippCPUID_SSE42)
ippTopFeatures = ippCPUID_SSE42;
pIppLibInfo = ippiGetLibVersion();
// workaround: https://github.com/opencv/opencv/issues/12959
std::string ippName(pIppLibInfo->Name ? pIppLibInfo->Name : "");
if (ippName.find("SSE4.2") != std::string::npos)
{
ippTopFeatures = ippCPUID_SSE42;
}
}
public:
bool useIPP;
bool useIPP_NE;
int ippStatus; // 0 - all is ok, -1 - IPP functions failed
const char *funcname;
const char *filename;
int linen;
Ipp64u ippFeatures;
Ipp64u cpuFeatures;
Ipp64u ippTopFeatures;
const IppLibraryVersion *pIppLibInfo;
};
static IPPInitSingleton& getIPPSingleton()
{
CV_SINGLETON_LAZY_INIT_REF(IPPInitSingleton, new IPPInitSingleton())
}
#endif
#if OPENCV_ABI_COMPATIBILITY > 300
unsigned long long getIppFeatures()
#else
int getIppFeatures()
#endif
{
#ifdef HAVE_IPP
#if OPENCV_ABI_COMPATIBILITY > 300
return getIPPSingleton().ippFeatures;
#else
return (int)getIPPSingleton().ippFeatures;
#endif
#else
return 0;
#endif
}
#ifdef HAVE_IPP
unsigned long long getIppTopFeatures()
{
return getIPPSingleton().ippTopFeatures;
}
#endif
void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line)
{
#ifdef HAVE_IPP
getIPPSingleton().ippStatus = status;
getIPPSingleton().funcname = _funcname;
getIPPSingleton().filename = _filename;
getIPPSingleton().linen = _line;
#else
CV_UNUSED(status); CV_UNUSED(_funcname); CV_UNUSED(_filename); CV_UNUSED(_line);
#endif
}
int getIppStatus()
{
#ifdef HAVE_IPP
return getIPPSingleton().ippStatus;
#else
return 0;
#endif
}
String getIppErrorLocation()
{
#ifdef HAVE_IPP
return format("%s:%d %s", getIPPSingleton().filename ? getIPPSingleton().filename : "", getIPPSingleton().linen, getIPPSingleton().funcname ? getIPPSingleton().funcname : "");
#else
return String();
#endif
}
String getIppVersion()
{
#ifdef HAVE_IPP
const IppLibraryVersion *pInfo = getIPPSingleton().pIppLibInfo;
if(pInfo)
return format("%s %s %s", pInfo->Name, pInfo->Version, pInfo->BuildDate);
else
return String("error");
#else
return String("disabled");
#endif
}
bool useIPP()
{
#ifdef HAVE_IPP
CoreTLSData& data = getCoreTlsData();
if (data.useIPP < 0)
{
data.useIPP = getIPPSingleton().useIPP;
}
return (data.useIPP > 0);
#else
return false;
#endif
}
void setUseIPP(bool flag)
{
CoreTLSData& data = getCoreTlsData();
#ifdef HAVE_IPP
data.useIPP = (getIPPSingleton().useIPP)?flag:false;
#else
CV_UNUSED(flag);
data.useIPP = false;
#endif
}
bool useIPP_NotExact()
{
#ifdef HAVE_IPP
CoreTLSData& data = getCoreTlsData();
if (data.useIPP_NE < 0)
{
data.useIPP_NE = getIPPSingleton().useIPP_NE;
}
return (data.useIPP_NE > 0);
#else
return false;
#endif
}
void setUseIPP_NotExact(bool flag)
{
CoreTLSData& data = getCoreTlsData();
#ifdef HAVE_IPP
data.useIPP_NE = flag;
#else
CV_UNUSED(flag);
data.useIPP_NE = false;
#endif
}
#if OPENCV_ABI_COMPATIBILITY < 400
bool useIPP_NE()
{
return useIPP_NotExact();
}
void setUseIPP_NE(bool flag)
{
setUseIPP_NotExact(flag);
}
#endif
} // namespace ipp
} // namespace cv
#ifdef HAVE_TEGRA_OPTIMIZATION
namespace tegra {
bool useTegra()
{
cv::CoreTLSData* data = cv::getCoreTlsData();
if (data->useTegra < 0)
{
const char* pTegraEnv = getenv("OPENCV_TEGRA");
if (pTegraEnv && (cv::String(pTegraEnv) == "disabled"))
data->useTegra = false;
else
data->useTegra = true;
}
return (data->useTegra > 0);
}
void setUseTegra(bool flag)
{
cv::CoreTLSData* data = cv::getCoreTlsData();
data->useTegra = flag;
}
} // namespace tegra
#endif
/* End of file. */