opencv/modules/core/src/system.cpp

2235 lines
63 KiB
C++
Raw Normal View History

/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
2015-01-12 15:59:30 +08:00
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iostream>
Merge pull request #9114 from pengli:dnn_rebase add libdnn acceleration to dnn module (#9114) * import libdnn code Signed-off-by: Li Peng <peng.li@intel.com> * add convolution layer ocl acceleration Signed-off-by: Li Peng <peng.li@intel.com> * add pooling layer ocl acceleration Signed-off-by: Li Peng <peng.li@intel.com> * add softmax layer ocl acceleration Signed-off-by: Li Peng <peng.li@intel.com> * add lrn layer ocl acceleration Signed-off-by: Li Peng <peng.li@intel.com> * add innerproduct layer ocl acceleration Signed-off-by: Li Peng <peng.li@intel.com> * add HAVE_OPENCL macro Signed-off-by: Li Peng <peng.li@intel.com> * fix for convolution ocl Signed-off-by: Li Peng <peng.li@intel.com> * enable getUMat() for multi-dimension Mat Signed-off-by: Li Peng <peng.li@intel.com> * use getUMat for ocl acceleration Signed-off-by: Li Peng <peng.li@intel.com> * use CV_OCL_RUN macro Signed-off-by: Li Peng <peng.li@intel.com> * set OPENCL target when it is available and disable fuseLayer for OCL target for the time being Signed-off-by: Li Peng <peng.li@intel.com> * fix innerproduct accuracy test Signed-off-by: Li Peng <peng.li@intel.com> * remove trailing space Signed-off-by: Li Peng <peng.li@intel.com> * Fixed tensorflow demo bug. Root cause is that tensorflow has different algorithm with libdnn to calculate convolution output dimension. libdnn don't calculate output dimension anymore and just use one passed in by config. * split gemm ocl file split it into gemm_buffer.cl and gemm_image.cl Signed-off-by: Li Peng <peng.li@intel.com> * Fix compile failure Signed-off-by: Li Peng <peng.li@intel.com> * check env flag for auto tuning Signed-off-by: Li Peng <peng.li@intel.com> * switch to new ocl kernels for softmax layer Signed-off-by: Li Peng <peng.li@intel.com> * update softmax layer on some platform subgroup extension may not work well, fallback to non subgroup ocl acceleration. Signed-off-by: Li Peng <peng.li@intel.com> * fallback to cpu path for fc layer with multi output Signed-off-by: Li Peng <peng.li@intel.com> * update output message Signed-off-by: Li Peng <peng.li@intel.com> * update fully connected layer fallback to gemm API if libdnn return false Signed-off-by: Li Peng <peng.li@intel.com> * Add ReLU OCL implementation * disable layer fusion for now Signed-off-by: Li Peng <peng.li@intel.com> * Add OCL implementation for concat layer Signed-off-by: Wu Zhiwen <zhiwen.wu@intel.com> * libdnn: update license and copyrights Also refine libdnn coding style Signed-off-by: Wu Zhiwen <zhiwen.wu@intel.com> Signed-off-by: Li Peng <peng.li@intel.com> * DNN: Don't link OpenCL library explicitly * DNN: Make default preferableTarget to DNN_TARGET_CPU User should set it to DNN_TARGET_OPENCL explicitly if want to use OpenCL acceleration. Also don't fusion when using DNN_TARGET_OPENCL * DNN: refine coding style * Add getOpenCLErrorString * DNN: Use int32_t/uint32_t instread of alias * Use namespace ocl4dnn to include libdnn things * remove extra copyTo in softmax ocl path Signed-off-by: Li Peng <peng.li@intel.com> * update ReLU layer ocl path Signed-off-by: Li Peng <peng.li@intel.com> * Add prefer target property for layer class It is used to indicate the target for layer forwarding, either the default CPU target or OCL target. Signed-off-by: Li Peng <peng.li@intel.com> * Add cl_event based timer for cv::ocl * Rename libdnn to ocl4dnn Signed-off-by: Li Peng <peng.li@intel.com> Signed-off-by: wzw <zhiwen.wu@intel.com> * use UMat for ocl4dnn internal buffer Remove allocateMemory which use clCreateBuffer directly Signed-off-by: Li Peng <peng.li@intel.com> Signed-off-by: wzw <zhiwen.wu@intel.com> * enable buffer gemm in ocl4dnn innerproduct Signed-off-by: Li Peng <peng.li@intel.com> * replace int_tp globally for ocl4dnn kernels. Signed-off-by: wzw <zhiwen.wu@intel.com> Signed-off-by: Li Peng <peng.li@intel.com> * create UMat for layer params Signed-off-by: Li Peng <peng.li@intel.com> * update sign ocl kernel Signed-off-by: Li Peng <peng.li@intel.com> * update image based gemm of inner product layer Signed-off-by: Li Peng <peng.li@intel.com> * remove buffer gemm of inner product layer call cv::gemm API instead Signed-off-by: Li Peng <peng.li@intel.com> * change ocl4dnn forward parameter to UMat Signed-off-by: Li Peng <peng.li@intel.com> * Refine auto-tuning mechanism. - Use OPENCV_OCL4DNN_KERNEL_CONFIG_PATH to set cache directory for fine-tuned kernel configuration. e.g. export OPENCV_OCL4DNN_KERNEL_CONFIG_PATH=/home/tmp, the cache directory will be /home/tmp/spatialkernels/ on Linux. - Define environment OPENCV_OCL4DNN_ENABLE_AUTO_TUNING to enable auto-tuning. - OPENCV_OPENCL_ENABLE_PROFILING is only used to enable profiling for OpenCL command queue. This fix basic kernel get wrong running time, i.e. 0ms. - If creating cache directory failed, disable auto-tuning. * Detect and create cache dir on windows Signed-off-by: Li Peng <peng.li@intel.com> * Refine gemm like convolution kernel. Signed-off-by: Li Peng <peng.li@intel.com> * Fix redundant swizzleWeights calling when use cached kernel config. * Fix "out of resource" bug when auto-tuning too many kernels. * replace cl_mem with UMat in ocl4dnnConvSpatial class * OCL4DNN: reduce the tuning kernel candidate. This patch could reduce 75% of the tuning candidates with less than 2% performance impact for the final result. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> * replace cl_mem with umat in ocl4dnn convolution Signed-off-by: Li Peng <peng.li@intel.com> * remove weight_image_ of ocl4dnn inner product Actually it is unused in the computation Signed-off-by: Li Peng <peng.li@intel.com> * Various fixes for ocl4dnn 1. OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()) 2. Ptr<OCL4DNNInnerProduct<float> > innerProductOp 3. Code comments cleanup 4. ignore check on OCL cpu device Signed-off-by: Li Peng <peng.li@intel.com> * add build option for log softmax Signed-off-by: Li Peng <peng.li@intel.com> * remove unused ocl kernels in ocl4dnn Signed-off-by: Li Peng <peng.li@intel.com> * replace ocl4dnnSet with opencv setTo Signed-off-by: Li Peng <peng.li@intel.com> * replace ALIGN with cv::alignSize Signed-off-by: Li Peng <peng.li@intel.com> * check kernel build options Signed-off-by: Li Peng <peng.li@intel.com> * Handle program compilation fail properly. * Use std::numeric_limits<float>::infinity() for large float number * check ocl4dnn kernel compilation result Signed-off-by: Li Peng <peng.li@intel.com> * remove unused ctx_id Signed-off-by: Li Peng <peng.li@intel.com> * change clEnqueueNDRangeKernel to kernel.run() Signed-off-by: Li Peng <peng.li@intel.com> * change cl_mem to UMat in image based gemm Signed-off-by: Li Peng <peng.li@intel.com> * check intel subgroup support for lrn and pooling layer Signed-off-by: Li Peng <peng.li@intel.com> * Fix convolution bug if group is greater than 1 Signed-off-by: Li Peng <peng.li@intel.com> * Set default layer preferableTarget to be DNN_TARGET_CPU Signed-off-by: Li Peng <peng.li@intel.com> * Add ocl perf test for convolution Signed-off-by: Li Peng <peng.li@intel.com> * Add more ocl accuracy test Signed-off-by: Li Peng <peng.li@intel.com> * replace cl_image with ocl::Image2D Signed-off-by: Li Peng <peng.li@intel.com> * Fix build failure in elementwise layer Signed-off-by: Li Peng <peng.li@intel.com> * use getUMat() to get blob data Signed-off-by: Li Peng <peng.li@intel.com> * replace cl_mem handle with ocl::KernelArg Signed-off-by: Li Peng <peng.li@intel.com> * dnn(build): don't use C++11, OPENCL_LIBRARIES fix * dnn(ocl4dnn): remove unused OpenCL kernels * dnn(ocl4dnn): extract OpenCL code into .cl files * dnn(ocl4dnn): refine auto-tuning Defaultly disable auto-tuning, set OPENCV_OCL4DNN_ENABLE_AUTO_TUNING environment variable to enable it. Use a set of pre-tuned configs as default config if auto-tuning is disabled. These configs are tuned for Intel GPU with 48/72 EUs, and for googlenet, AlexNet, ResNet-50 If default config is not suitable, use the first available kernel config from the candidates. Candidate priority from high to low is gemm like kernel, IDLF kernel, basick kernel. * dnn(ocl4dnn): pooling doesn't use OpenCL subgroups * dnn(ocl4dnn): fix perf test OpenCV has default 3sec time limit for each performance test. Warmup OpenCL backend outside of perf measurement loop. * use ocl::KernelArg as much as possible Signed-off-by: Li Peng <peng.li@intel.com> * dnn(ocl4dnn): fix bias bug for gemm like kernel * dnn(ocl4dnn): wrap cl_mem into UMat Signed-off-by: Li Peng <peng.li@intel.com> * dnn(ocl4dnn): Refine signature of kernel config - Use more readable string as signture of kernel config - Don't count device name and vendor in signature string - Default kernel configurations are tuned for Intel GPU with 24/48/72 EUs, and for googlenet, AlexNet, ResNet-50 net model. * dnn(ocl4dnn): swap width/height in configuration * dnn(ocl4dnn): enable configs for Intel OpenCL runtime only * core: make configuration helper functions accessible from non-core modules * dnn(ocl4dnn): update kernel auto-tuning behavior Avoid unwanted creation of directories * dnn(ocl4dnn): simplify kernel to workaround OpenCL compiler crash * dnn(ocl4dnn): remove redundant code * dnn(ocl4dnn): Add more clear message for simd size dismatch. * dnn(ocl4dnn): add const to const argument Signed-off-by: Li Peng <peng.li@intel.com> * dnn(ocl4dnn): force compiler use a specific SIMD size for IDLF kernel * dnn(ocl4dnn): drop unused tuneLocalSize() * dnn(ocl4dnn): specify OpenCL queue for Timer and convolve() method * dnn(ocl4dnn): sanitize file names used for cache * dnn(perf): enable Network tests with OpenCL * dnn(ocl4dnn/conv): drop computeGlobalSize() * dnn(ocl4dnn/conv): drop unused fields * dnn(ocl4dnn/conv): simplify ctor * dnn(ocl4dnn/conv): refactor kernelConfig localSize=NULL * dnn(ocl4dnn/conv): drop unsupported double / untested half types * dnn(ocl4dnn/conv): drop unused variable * dnn(ocl4dnn/conv): alignSize/divUp * dnn(ocl4dnn/conv): use enum values * dnn(ocl4dnn): drop unused innerproduct variable Signed-off-by: Li Peng <peng.li@intel.com> * dnn(ocl4dnn): add an generic function to check cl option support * dnn(ocl4dnn): run softmax subgroup version kernel first Signed-off-by: Li Peng <peng.li@intel.com>
2017-10-02 20:38:00 +08:00
#include <opencv2/core/utils/configuration.private.hpp>
2017-05-25 23:59:01 +08:00
#include <opencv2/core/utils/trace.private.hpp>
namespace cv {
static Mutex* __initialization_mutex = NULL;
Mutex& getInitializationMutex()
{
if (__initialization_mutex == NULL)
__initialization_mutex = new Mutex();
return *__initialization_mutex;
}
// force initialization (single-threaded environment)
Mutex* __initialization_mutex_initializer = &getInitializationMutex();
} // namespace cv
#ifdef _MSC_VER
# if _MSC_VER >= 1700
# pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
# endif
#endif
2017-08-26 17:37:59 +08:00
#if defined __ANDROID__ || defined __linux__ || defined __FreeBSD__ || defined __HAIKU__
2014-12-30 21:53:19 +08:00
# include <unistd.h>
# include <fcntl.h>
# include <elf.h>
#if defined __ANDROID__ || defined __linux__
2014-12-30 21:53:19 +08:00
# include <linux/auxvec.h>
#endif
#endif
2014-12-30 21:53:19 +08:00
#if defined __ANDROID__ && defined HAVE_CPUFEATURES
2017-05-19 21:14:01 +08:00
# include <cpu-features.h>
#endif
2017-10-03 06:54:31 +08:00
#ifndef __VSX__
# if defined __PPC64__ && defined __linux__
# include "sys/auxv.h"
# ifndef AT_HWCAP2
# define AT_HWCAP2 26
# endif
# ifndef PPC_FEATURE2_ARCH_2_07
# define PPC_FEATURE2_ARCH_2_07 0x80000000
# endif
# endif
#endif
2017-07-25 18:23:44 +08:00
#if defined _WIN32 || defined WINCE
#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
#define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
#endif
#include <windows.h>
#if (_WIN32_WINNT >= 0x0602)
#include <synchapi.h>
#endif
#undef small
#undef min
#undef max
#undef abs
#include <tchar.h>
2015-12-15 20:55:43 +08:00
#if defined _MSC_VER
#if _MSC_VER >= 1400
#include <intrin.h>
#elif defined _M_IX86
static void __cpuid(int* cpuid_data, int)
{
__asm
{
push ebx
push edi
mov edi, cpuid_data
mov eax, 1
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
pop edi
pop ebx
}
}
static void __cpuidex(int* cpuid_data, int, int)
{
__asm
{
push edi
mov edi, cpuid_data
mov eax, 7
mov ecx, 0
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
pop edi
}
}
#endif
#endif
#ifdef WINRT
#include <wrl/client.h>
#ifndef __cplusplus_winrt
#include <windows.storage.h>
#pragma comment(lib, "runtimeobject.lib")
#endif
std::wstring GetTempPathWinRT()
{
#ifdef __cplusplus_winrt
return std::wstring(Windows::Storage::ApplicationData::Current->TemporaryFolder->Path->Data());
#else
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IApplicationDataStatics> appdataFactory;
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IApplicationData> appdataRef;
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IStorageFolder> storagefolderRef;
Microsoft::WRL::ComPtr<ABI::Windows::Storage::IStorageItem> storageitemRef;
HSTRING str;
HSTRING_HEADER hstrHead;
std::wstring wstr;
if (FAILED(WindowsCreateStringReference(RuntimeClass_Windows_Storage_ApplicationData,
(UINT32)wcslen(RuntimeClass_Windows_Storage_ApplicationData), &hstrHead, &str)))
return wstr;
if (FAILED(RoGetActivationFactory(str, IID_PPV_ARGS(appdataFactory.ReleaseAndGetAddressOf()))))
return wstr;
if (FAILED(appdataFactory->get_Current(appdataRef.ReleaseAndGetAddressOf())))
return wstr;
if (FAILED(appdataRef->get_TemporaryFolder(storagefolderRef.ReleaseAndGetAddressOf())))
return wstr;
if (FAILED(storagefolderRef.As(&storageitemRef)))
return wstr;
str = NULL;
if (FAILED(storageitemRef->get_Path(&str)))
return wstr;
wstr = WindowsGetStringRawBuffer(str, NULL);
WindowsDeleteString(str);
return wstr;
#endif
}
std::wstring GetTempFileNameWinRT(std::wstring prefix)
{
wchar_t guidStr[40];
GUID g;
CoCreateGuid(&g);
wchar_t* mask = L"%08x_%04x_%04x_%02x%02x_%02x%02x%02x%02x%02x%02x";
swprintf(&guidStr[0], sizeof(guidStr)/sizeof(wchar_t), mask,
g.Data1, g.Data2, g.Data3, UINT(g.Data4[0]), UINT(g.Data4[1]),
UINT(g.Data4[2]), UINT(g.Data4[3]), UINT(g.Data4[4]),
UINT(g.Data4[5]), UINT(g.Data4[6]), UINT(g.Data4[7]));
return prefix.append(std::wstring(guidStr));
}
#endif
#else
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#if defined __MACH__ && defined __APPLE__
#include <mach/mach.h>
#include <mach/mach_time.h>
#endif
#endif
#ifdef _OPENMP
#include "omp.h"
#endif
#if defined __linux__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __GLIBC__ || defined __HAIKU__
2011-08-02 20:42:58 +08:00
#include <unistd.h>
#include <stdio.h>
2012-06-08 01:21:29 +08:00
#include <sys/types.h>
#if defined __ANDROID__
2011-08-02 22:56:51 +08:00
#include <sys/sysconf.h>
2011-08-02 20:42:58 +08:00
#endif
2011-08-02 22:56:51 +08:00
#endif
2011-08-02 20:42:58 +08:00
#ifdef __ANDROID__
# include <android/log.h>
#endif
namespace cv
{
Exception::Exception() { code = 0; line = 0; }
Exception::Exception(int _code, const String& _err, const String& _func, const String& _file, int _line)
: code(_code), err(_err), func(_func), file(_file), line(_line)
{
formatMessage();
}
Exception::~Exception() throw() {}
/*!
\return the error description and the context as a text string.
2012-06-08 01:21:29 +08:00
*/
const char* Exception::what() const throw() { return msg.c_str(); }
void Exception::formatMessage()
{
if( func.size() > 0 )
msg = format("OpenCV(%s) %s:%d: error: (%d) %s in function %s\n", CV_VERSION, file.c_str(), line, code, err.c_str(), func.c_str());
else
msg = format("OpenCV(%s) %s:%d: error: (%d) %s\n", CV_VERSION, file.c_str(), line, code, err.c_str());
}
2012-06-08 01:21:29 +08:00
2016-09-07 23:02:36 +08:00
static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };
static const char* getHWFeatureName(int id)
{
return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL;
}
static const char* getHWFeatureNameSafe(int id)
{
const char* name = getHWFeatureName(id);
return name ? name : "Unknown feature";
}
2015-12-15 20:55:43 +08:00
struct HWFeatures
{
enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
2016-09-07 23:02:36 +08:00
HWFeatures(bool run_initialize = false)
2015-12-15 20:55:43 +08:00
{
2016-09-07 23:02:36 +08:00
memset( have, 0, sizeof(have[0]) * MAX_FEATURE );
if (run_initialize)
initialize();
2015-12-15 20:55:43 +08:00
}
2016-09-07 23:02:36 +08:00
static void initializeNames()
2015-12-15 20:55:43 +08:00
{
2016-09-07 23:02:36 +08:00
for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
{
g_hwFeatureNames[i] = 0;
}
g_hwFeatureNames[CPU_MMX] = "MMX";
g_hwFeatureNames[CPU_SSE] = "SSE";
g_hwFeatureNames[CPU_SSE2] = "SSE2";
g_hwFeatureNames[CPU_SSE3] = "SSE3";
g_hwFeatureNames[CPU_SSSE3] = "SSSE3";
g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1";
g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2";
g_hwFeatureNames[CPU_POPCNT] = "POPCNT";
g_hwFeatureNames[CPU_FP16] = "FP16";
g_hwFeatureNames[CPU_AVX] = "AVX";
g_hwFeatureNames[CPU_AVX2] = "AVX2";
g_hwFeatureNames[CPU_FMA3] = "FMA3";
g_hwFeatureNames[CPU_AVX_512F] = "AVX512F";
g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW";
g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD";
g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ";
g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER";
2017-12-29 13:06:52 +08:00
g_hwFeatureNames[CPU_AVX_512IFMA] = "AVX512IFMA";
2016-09-07 23:02:36 +08:00
g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF";
g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI";
g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
g_hwFeatureNames[CPU_NEON] = "NEON";
2017-10-03 06:54:31 +08:00
g_hwFeatureNames[CPU_VSX] = "VSX";
2017-12-29 13:06:52 +08:00
g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX";
2016-09-07 23:02:36 +08:00
}
void initialize(void)
{
#ifndef WINRT
if (getenv("OPENCV_DUMP_CONFIG"))
{
fprintf(stderr, "\nOpenCV build configuration is:\n%s\n",
cv::getBuildInformation().c_str());
}
#endif
initializeNames();
2015-12-15 20:55:43 +08:00
int cpuid_data[4] = { 0, 0, 0, 0 };
2016-09-07 23:02:36 +08:00
int cpuid_data_ex[4] = { 0, 0, 0, 0 };
2015-12-15 20:55:43 +08:00
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
2016-09-07 23:02:36 +08:00
#define OPENCV_HAVE_X86_CPUID 1
2015-12-15 20:55:43 +08:00
__cpuid(cpuid_data, 1);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
2016-09-07 23:02:36 +08:00
#define OPENCV_HAVE_X86_CPUID 1
2015-12-15 20:55:43 +08:00
#ifdef __x86_64__
asm __volatile__
(
"movl $1, %%eax\n\t"
"cpuid\n\t"
:[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
:
: "cc"
);
#else
asm volatile
(
"pushl %%ebx\n\t"
"movl $1,%%eax\n\t"
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(cpuid_data[0]), "=c"(cpuid_data[2]), "=d"(cpuid_data[3])
:
: "cc"
);
#endif
#endif
2016-09-07 23:02:36 +08:00
#ifdef OPENCV_HAVE_X86_CPUID
int x86_family = (cpuid_data[0] >> 8) & 15;
if( x86_family >= 6 )
2015-12-15 20:55:43 +08:00
{
2016-09-07 23:02:36 +08:00
have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0;
have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0;
have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
2015-12-15 20:55:43 +08:00
// make the second call to the cpuid command in order to get
// information about extended features like AVX2
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
2016-09-07 23:02:36 +08:00
#define OPENCV_HAVE_X86_CPUID_EX 1
__cpuidex(cpuid_data_ex, 7, 0);
2015-12-15 20:55:43 +08:00
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
2016-09-07 23:02:36 +08:00
#define OPENCV_HAVE_X86_CPUID_EX 1
2015-12-15 20:55:43 +08:00
#ifdef __x86_64__
asm __volatile__
(
"movl $7, %%eax\n\t"
"movl $0, %%ecx\n\t"
"cpuid\n\t"
2016-09-07 23:02:36 +08:00
:[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3])
2015-12-15 20:55:43 +08:00
:
: "cc"
);
#else
asm volatile
(
"pushl %%ebx\n\t"
"movl $7,%%eax\n\t"
"movl $0,%%ecx\n\t"
"cpuid\n\t"
"movl %%ebx, %0\n\t"
"popl %%ebx\n\t"
2016-09-07 23:02:36 +08:00
: "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2])
2015-12-15 20:55:43 +08:00
:
: "cc"
);
#endif
#endif
2016-09-07 23:02:36 +08:00
#ifdef OPENCV_HAVE_X86_CPUID_EX
have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0;
have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0;
have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0;
have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0;
have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0;
have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0;
have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0;
have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0;
have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0;
have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0;
#else
CV_UNUSED(cpuid_data_ex);
#endif
bool have_AVX_OS_support = true;
bool have_AVX512_OS_support = true;
if (!(cpuid_data[2] & (1<<27)))
have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX
else
{
int xcr0 = 0;
#ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h
xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
#endif
if ((xcr0 & 0x6) != 0x6)
have_AVX_OS_support = false; // YMM registers
if ((xcr0 & 0xe6) != 0xe6)
have_AVX512_OS_support = false; // ZMM registers
}
if (!have_AVX_OS_support)
{
have[CV_CPU_AVX] = false;
have[CV_CPU_FP16] = false;
have[CV_CPU_AVX2] = false;
have[CV_CPU_FMA3] = false;
}
if (!have_AVX_OS_support || !have_AVX512_OS_support)
{
have[CV_CPU_AVX_512F] = false;
have[CV_CPU_AVX_512BW] = false;
have[CV_CPU_AVX_512CD] = false;
have[CV_CPU_AVX_512DQ] = false;
have[CV_CPU_AVX_512ER] = false;
have[CV_CPU_AVX_512IFMA512] = false;
have[CV_CPU_AVX_512PF] = false;
have[CV_CPU_AVX_512VBMI] = false;
have[CV_CPU_AVX_512VL] = false;
}
2017-12-29 13:06:52 +08:00
if (have[CV_CPU_AVX_512F])
{
have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512F] & have[CV_CPU_AVX_512CD] & have[CV_CPU_AVX_512BW] & have[CV_CPU_AVX_512DQ] & have[CV_CPU_AVX_512VL];
}
2015-12-15 20:55:43 +08:00
}
2016-09-07 23:02:36 +08:00
#else
CV_UNUSED(cpuid_data);
CV_UNUSED(cpuid_data_ex);
#endif // OPENCV_HAVE_X86_CPUID
2015-12-15 20:55:43 +08:00
2017-06-09 23:24:14 +08:00
#if defined __ANDROID__ || defined __linux__
2015-12-15 20:55:43 +08:00
#ifdef __aarch64__
2016-09-07 23:02:36 +08:00
have[CV_CPU_NEON] = true;
have[CV_CPU_FP16] = true;
2017-06-09 23:24:14 +08:00
#elif defined __arm__ && defined __ANDROID__
#if defined HAVE_CPUFEATURES
2017-06-09 23:24:14 +08:00
__android_log_print(ANDROID_LOG_INFO, "OpenCV", "calling android_getCpuFeatures() ...");
uint64_t features = android_getCpuFeatures();
__android_log_print(ANDROID_LOG_INFO, "OpenCV", "calling android_getCpuFeatures() ... Done (%llx)", features);
have[CV_CPU_NEON] = (features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
have[CV_CPU_FP16] = (features & ANDROID_CPU_ARM_FEATURE_VFP_FP16) != 0;
#else
__android_log_print(ANDROID_LOG_INFO, "OpenCV", "cpufeatures library is not avaialble for CPU detection");
#if CV_NEON
__android_log_print(ANDROID_LOG_INFO, "OpenCV", "- NEON instructions is enabled via build flags");
have[CV_CPU_NEON] = true;
#else
__android_log_print(ANDROID_LOG_INFO, "OpenCV", "- NEON instructions is NOT enabled via build flags");
#endif
#if CV_FP16
__android_log_print(ANDROID_LOG_INFO, "OpenCV", "- FP16 instructions is enabled via build flags");
have[CV_CPU_FP16] = true;
#else
__android_log_print(ANDROID_LOG_INFO, "OpenCV", "- FP16 instructions is NOT enabled via build flags");
#endif
#endif
#elif defined __arm__
2015-12-15 20:55:43 +08:00
int cpufile = open("/proc/self/auxv", O_RDONLY);
if (cpufile >= 0)
{
Elf32_auxv_t auxv;
const size_t size_auxv_t = sizeof(auxv);
while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t)
{
if (auxv.a_type == AT_HWCAP)
{
2016-09-07 23:02:36 +08:00
have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
2015-12-15 20:55:43 +08:00
break;
}
}
close(cpufile);
}
#endif
#elif (defined __clang__ || defined __APPLE__)
#if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
2016-09-07 23:02:36 +08:00
have[CV_CPU_NEON] = true;
#endif
#if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__))
2016-09-07 23:02:36 +08:00
have[CV_CPU_FP16] = true;
#endif
2015-12-15 20:55:43 +08:00
#endif
2017-10-03 06:54:31 +08:00
#ifdef __VSX__
have[CV_CPU_VSX] = true;
#elif (defined __PPC64__ && defined __linux__)
uint64 hwcaps = getauxval(AT_HWCAP);
uint64 hwcap2 = getauxval(AT_HWCAP2);
have[CV_CPU_VSX] = (hwcaps & PPC_FEATURE_PPC_LE && hwcaps & PPC_FEATURE_HAS_VSX && hwcap2 & PPC_FEATURE2_ARCH_2_07);
#else
have[CV_CPU_VSX] = false;
#endif
2016-09-07 23:02:36 +08:00
int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
{
fprintf(stderr, "\n"
"******************************************************************\n"
"* FATAL ERROR: *\n"
"* This OpenCV build doesn't support current CPU/HW configuration *\n"
"* *\n"
"* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n"
"******************************************************************\n");
fprintf(stderr, "\nRequired baseline features:\n");
checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true);
CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup.");
}
readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]));
}
bool checkFeatures(const int* features, int count, bool dump = false)
{
bool result = true;
for (int i = 0; i < count; i++)
{
int feature = features[i];
if (feature)
{
if (have[feature])
{
if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature));
}
else
{
result = false;
if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature));
}
}
}
return result;
}
static inline bool isSymbolSeparator(char c)
{
return c == ',' || c == ';' || c == '-';
}
void readSettings(const int* baseline_features, int baseline_count)
{
bool dump = true;
const char* disabled_features =
#ifndef WINRT
getenv("OPENCV_CPU_DISABLE");
#else
NULL;
#endif
if (disabled_features && disabled_features[0] != 0)
{
const char* start = disabled_features;
for (;;)
{
while (start[0] != 0 && isSymbolSeparator(start[0]))
{
start++;
}
if (start[0] == 0)
break;
const char* end = start;
while (end[0] != 0 && !isSymbolSeparator(end[0]))
{
end++;
}
if (end == start)
continue;
cv::String feature(start, end);
start = end;
CV_Assert(feature.size() > 0);
bool found = false;
for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
{
if (!g_hwFeatureNames[i]) continue;
size_t len = strlen(g_hwFeatureNames[i]);
if (len != feature.size()) continue;
if (feature.compare(g_hwFeatureNames[i]) == 0)
{
bool isBaseline = false;
for (int k = 0; k < baseline_count; k++)
{
if (baseline_features[k] == i)
{
isBaseline = true;
break;
}
}
if (isBaseline)
{
if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i));
}
if (!have[i])
{
if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i));
}
have[i] = false;
found = true;
break;
}
}
if (!found)
{
if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str());
}
}
}
2015-12-15 20:55:43 +08:00
}
bool have[MAX_FEATURE+1];
};
2016-09-07 23:02:36 +08:00
static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false);
2015-12-15 20:55:43 +08:00
static HWFeatures* currentFeatures = &featuresEnabled;
bool checkHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
2015-12-15 20:55:43 +08:00
return currentFeatures->have[feature];
}
String getHardwareFeatureName(int feature)
{
const char* name = getHWFeatureName(feature);
return name ? String(name) : String();
}
2015-12-15 20:55:43 +08:00
volatile bool useOptimizedFlag = true;
void setUseOptimized( bool flag )
{
2015-12-15 20:55:43 +08:00
useOptimizedFlag = flag;
currentFeatures = flag ? &featuresEnabled : &featuresDisabled;
ipp::setUseIPP(flag);
2015-06-20 01:52:14 +08:00
#ifdef HAVE_OPENCL
ocl::setUseOpenCL(flag);
2015-06-20 01:52:14 +08:00
#endif
#ifdef HAVE_TEGRA_OPTIMIZATION
::tegra::setUseTegra(flag);
#endif
}
bool useOptimized(void)
{
2015-12-15 20:55:43 +08:00
return useOptimizedFlag;
}
int64 getTickCount(void)
{
2017-07-25 18:23:44 +08:00
#if defined _WIN32 || defined WINCE
LARGE_INTEGER counter;
QueryPerformanceCounter( &counter );
return (int64)counter.QuadPart;
#elif defined __linux || defined __linux__
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return (int64)tp.tv_sec*1000000000 + tp.tv_nsec;
2011-05-31 23:22:22 +08:00
#elif defined __MACH__ && defined __APPLE__
return (int64)mach_absolute_time();
#else
struct timeval tv;
struct timezone tz;
gettimeofday( &tv, &tz );
return (int64)tv.tv_sec*1000000 + tv.tv_usec;
#endif
}
double getTickFrequency(void)
{
2017-07-25 18:23:44 +08:00
#if defined _WIN32 || defined WINCE
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return (double)freq.QuadPart;
#elif defined __linux || defined __linux__
return 1e9;
2011-05-31 23:22:22 +08:00
#elif defined __MACH__ && defined __APPLE__
static double freq = 0;
if( freq == 0 )
{
mach_timebase_info_data_t sTimebaseInfo;
mach_timebase_info(&sTimebaseInfo);
freq = sTimebaseInfo.denom*1e9/sTimebaseInfo.numer;
}
return freq;
#else
return 1e6;
#endif
}
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || defined __ppc__)
#if defined(__i386__)
int64 getCPUTickCount(void)
{
int64 x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
int64 getCPUTickCount(void)
{
unsigned hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return (int64)lo | ((int64)hi << 32);
}
#elif defined(__ppc__)
int64 getCPUTickCount(void)
{
int64 result = 0;
unsigned upper, lower, tmp;
__asm__ volatile(
"0: \n"
"\tmftbu %0 \n"
"\tmftb %1 \n"
"\tmftbu %2 \n"
"\tcmpw %2,%0 \n"
"\tbne 0b \n"
: "=r"(upper),"=r"(lower),"=r"(tmp)
);
return lower | ((int64)upper << 32);
}
#else
#error "RDTSC not defined"
#endif
2017-07-25 18:23:44 +08:00
#elif defined _MSC_VER && defined _WIN32 && defined _M_IX86
int64 getCPUTickCount(void)
{
__asm _emit 0x0f;
__asm _emit 0x31;
}
#else
2014-03-21 19:27:56 +08:00
//#ifdef HAVE_IPP
//int64 getCPUTickCount(void)
//{
// return ippGetCpuClocks();
//}
//#else
int64 getCPUTickCount(void)
{
return getTickCount();
}
2014-03-21 19:27:56 +08:00
//#endif
#endif
const String& getBuildInformation()
{
static String build_info =
#include "version_string.inc"
;
return build_info;
}
String format( const char* fmt, ... )
{
2014-03-25 03:07:00 +08:00
AutoBuffer<char, 1024> buf;
2014-03-25 03:07:00 +08:00
for ( ; ; )
{
2014-03-25 03:07:00 +08:00
va_list va;
va_start(va, fmt);
2017-05-23 03:24:17 +08:00
int bsize = static_cast<int>(buf.size());
int len = cv_vsnprintf((char *)buf, bsize, fmt, va);
va_end(va);
CV_Assert(len >= 0 && "Check format string for errors");
if (len >= bsize)
2014-03-25 03:07:00 +08:00
{
buf.resize(len + 1);
2014-03-25 03:07:00 +08:00
continue;
}
2017-05-23 03:24:17 +08:00
buf[bsize - 1] = 0;
2014-03-25 03:07:00 +08:00
return String((char *)buf, len);
}
}
String tempfile( const char* suffix )
{
String fname;
#ifndef WINRT
const char *temp_dir = getenv("OPENCV_TEMP_PATH");
#endif
2017-07-25 18:23:44 +08:00
#if defined _WIN32
#ifdef WINRT
RoInitialize(RO_INIT_MULTITHREADED);
std::wstring temp_dir = GetTempPathWinRT();
std::wstring temp_file = GetTempFileNameWinRT(L"ocv");
if (temp_file.empty())
return String();
temp_file = temp_dir.append(std::wstring(L"\\")).append(temp_file);
DeleteFileW(temp_file.c_str());
char aname[MAX_PATH];
size_t copied = wcstombs(aname, temp_file.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
fname = String(aname);
RoUninitialize();
#else
char temp_dir2[MAX_PATH] = { 0 };
char temp_file[MAX_PATH] = { 0 };
if (temp_dir == 0 || temp_dir[0] == 0)
{
::GetTempPathA(sizeof(temp_dir2), temp_dir2);
temp_dir = temp_dir2;
}
if(0 == ::GetTempFileNameA(temp_dir, "ocv", 0, temp_file))
return String();
DeleteFileA(temp_file);
fname = temp_file;
#endif
# else
# ifdef __ANDROID__
//char defaultTemplate[] = "/mnt/sdcard/__opencv_temp.XXXXXX";
char defaultTemplate[] = "/data/local/tmp/__opencv_temp.XXXXXX";
# else
char defaultTemplate[] = "/tmp/__opencv_temp.XXXXXX";
# endif
if (temp_dir == 0 || temp_dir[0] == 0)
fname = defaultTemplate;
else
{
fname = temp_dir;
char ech = fname[fname.size() - 1];
if(ech != '/' && ech != '\\')
fname = fname + "/";
fname = fname + "__opencv_temp.XXXXXX";
}
const int fd = mkstemp((char*)fname.c_str());
if (fd == -1) return String();
close(fd);
remove(fname.c_str());
# endif
if (suffix)
{
if (suffix[0] != '.')
return fname + "." + suffix;
else
return fname + suffix;
}
return fname;
}
static ErrorCallback customErrorCallback = 0;
static void* customErrorCallbackData = 0;
static bool breakOnError = false;
bool setBreakOnError(bool value)
{
bool prevVal = breakOnError;
breakOnError = value;
return prevVal;
}
int cv_snprintf(char* buf, int len, const char* fmt, ...)
2017-05-23 03:24:17 +08:00
{
va_list va;
va_start(va, fmt);
int res = cv_vsnprintf(buf, len, fmt, va);
2017-05-23 03:24:17 +08:00
va_end(va);
return res;
}
int cv_vsnprintf(char* buf, int len, const char* fmt, va_list args)
{
#if defined _MSC_VER
if (len <= 0) return len == 0 ? 1024 : -1;
int res = _vsnprintf_s(buf, len, _TRUNCATE, fmt, args);
// ensure null terminating on VS
if (res >= 0 && res < len)
{
buf[res] = 0;
return res;
}
else
{
buf[len - 1] = 0; // truncate happened
return res >= len ? res : (len * 2);
}
2017-05-23 03:24:17 +08:00
#else
return vsnprintf(buf, len, fmt, args);
2017-05-23 03:24:17 +08:00
#endif
}
void error( const Exception& exc )
{
if (customErrorCallback != 0)
customErrorCallback(exc.code, exc.func.c_str(), exc.err.c_str(),
exc.file.c_str(), exc.line, customErrorCallbackData);
else
{
const char* errorStr = cvErrorStr(exc.code);
char buf[1 << 12];
2017-05-23 03:24:17 +08:00
cv_snprintf(buf, sizeof(buf),
"OpenCV(%s) Error: %s (%s) in %s, file %s, line %d",
CV_VERSION,
errorStr, exc.err.c_str(), exc.func.size() > 0 ?
2017-05-23 03:24:17 +08:00
exc.func.c_str() : "unknown function", exc.file.c_str(), exc.line);
fprintf( stderr, "%s\n", buf );
fflush( stderr );
# ifdef __ANDROID__
__android_log_print(ANDROID_LOG_ERROR, "cv::error()", "%s", buf);
# endif
}
if(breakOnError)
{
static volatile int* p = 0;
*p = 0;
}
2017-11-28 17:53:40 +08:00
CV_THROW(exc);
}
void error(int _code, const String& _err, const char* _func, const char* _file, int _line)
{
error(cv::Exception(_code, _err, _func, _file, _line));
}
2017-05-23 03:24:17 +08:00
ErrorCallback
redirectError( ErrorCallback errCallback, void* userdata, void** prevUserdata)
{
if( prevUserdata )
*prevUserdata = customErrorCallbackData;
ErrorCallback prevCallback = customErrorCallback;
customErrorCallback = errCallback;
customErrorCallbackData = userdata;
return prevCallback;
}
}
CV_IMPL int cvCheckHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
2015-12-15 20:55:43 +08:00
return cv::currentFeatures->have[feature];
}
CV_IMPL int cvUseOptimized( int flag )
{
2015-12-15 20:55:43 +08:00
int prevMode = cv::useOptimizedFlag;
cv::setUseOptimized( flag != 0 );
return prevMode;
}
CV_IMPL int64 cvGetTickCount(void)
{
return cv::getTickCount();
}
CV_IMPL double cvGetTickFrequency(void)
{
return cv::getTickFrequency()*1e-6;
}
CV_IMPL CvErrorCallback
cvRedirectError( CvErrorCallback errCallback, void* userdata, void** prevUserdata)
{
return cv::redirectError(errCallback, userdata, prevUserdata);
}
CV_IMPL int cvNulDevReport( int, const char*, const char*,
const char*, int, void* )
{
return 0;
}
CV_IMPL int cvStdErrReport( int, const char*, const char*,
const char*, int, void* )
{
return 0;
}
CV_IMPL int cvGuiBoxReport( int, const char*, const char*,
const char*, int, void* )
{
return 0;
}
CV_IMPL int cvGetErrInfo( const char**, const char**, const char**, int* )
{
return 0;
}
CV_IMPL const char* cvErrorStr( int status )
{
static char buf[256];
switch (status)
{
case CV_StsOk : return "No Error";
case CV_StsBackTrace : return "Backtrace";
case CV_StsError : return "Unspecified error";
case CV_StsInternal : return "Internal error";
case CV_StsNoMem : return "Insufficient memory";
case CV_StsBadArg : return "Bad argument";
case CV_StsNoConv : return "Iterations do not converge";
case CV_StsAutoTrace : return "Autotrace call";
case CV_StsBadSize : return "Incorrect size of input array";
case CV_StsNullPtr : return "Null pointer";
case CV_StsDivByZero : return "Division by zero occurred";
case CV_BadStep : return "Image step is wrong";
case CV_StsInplaceNotSupported : return "Inplace operation is not supported";
case CV_StsObjectNotFound : return "Requested object was not found";
case CV_BadDepth : return "Input image depth is not supported by function";
case CV_StsUnmatchedFormats : return "Formats of input arguments do not match";
case CV_StsUnmatchedSizes : return "Sizes of input arguments do not match";
case CV_StsOutOfRange : return "One of arguments\' values is out of range";
case CV_StsUnsupportedFormat : return "Unsupported format or combination of formats";
case CV_BadCOI : return "Input COI is not supported";
case CV_BadNumChannels : return "Bad number of channels";
case CV_StsBadFlag : return "Bad flag (parameter or structure field)";
case CV_StsBadPoint : return "Bad parameter of type CvPoint";
case CV_StsBadMask : return "Bad type of mask argument";
case CV_StsParseError : return "Parsing error";
case CV_StsNotImplemented : return "The function/feature is not implemented";
case CV_StsBadMemBlock : return "Memory block has been corrupted";
case CV_StsAssert : return "Assertion failed";
2013-07-24 17:55:18 +08:00
case CV_GpuNotSupported : return "No CUDA support";
case CV_GpuApiCallError : return "Gpu API call";
case CV_OpenGlNotSupported : return "No OpenGL support";
case CV_OpenGlApiCallError : return "OpenGL API call";
};
sprintf(buf, "Unknown %s code %d", status >= 0 ? "status":"error", status);
return buf;
}
CV_IMPL int cvGetErrMode(void)
{
return 0;
}
CV_IMPL int cvSetErrMode(int)
{
return 0;
}
CV_IMPL int cvGetErrStatus(void)
{
return 0;
}
CV_IMPL void cvSetErrStatus(int)
{
}
CV_IMPL void cvError( int code, const char* func_name,
const char* err_msg,
const char* file_name, int line )
{
cv::error(cv::Exception(code, err_msg, func_name, file_name, line));
}
/* function, which converts int to int */
CV_IMPL int
cvErrorFromIppStatus( int status )
{
switch (status)
{
case CV_BADSIZE_ERR: return CV_StsBadSize;
case CV_BADMEMBLOCK_ERR: return CV_StsBadMemBlock;
case CV_NULLPTR_ERR: return CV_StsNullPtr;
case CV_DIV_BY_ZERO_ERR: return CV_StsDivByZero;
case CV_BADSTEP_ERR: return CV_BadStep;
case CV_OUTOFMEM_ERR: return CV_StsNoMem;
case CV_BADARG_ERR: return CV_StsBadArg;
case CV_NOTDEFINED_ERR: return CV_StsError;
case CV_INPLACE_NOT_SUPPORTED_ERR: return CV_StsInplaceNotSupported;
case CV_NOTFOUND_ERR: return CV_StsObjectNotFound;
case CV_BADCONVERGENCE_ERR: return CV_StsNoConv;
case CV_BADDEPTH_ERR: return CV_BadDepth;
case CV_UNMATCHED_FORMATS_ERR: return CV_StsUnmatchedFormats;
case CV_UNSUPPORTED_COI_ERR: return CV_BadCOI;
case CV_UNSUPPORTED_CHANNELS_ERR: return CV_BadNumChannels;
case CV_BADFLAG_ERR: return CV_StsBadFlag;
case CV_BADRANGE_ERR: return CV_StsBadArg;
case CV_BADCOEF_ERR: return CV_StsBadArg;
case CV_BADFACTOR_ERR: return CV_StsBadArg;
case CV_BADPOINT_ERR: return CV_StsBadPoint;
default:
return CV_StsError;
}
}
namespace cv {
bool __termination = false;
}
namespace cv
{
2017-07-25 18:23:44 +08:00
#if defined _WIN32 || defined WINCE
struct Mutex::Impl
{
Impl()
{
#if (_WIN32_WINNT >= 0x0600)
::InitializeCriticalSectionEx(&cs, 1000, 0);
#else
::InitializeCriticalSection(&cs);
#endif
refcount = 1;
}
~Impl() { DeleteCriticalSection(&cs); }
2012-08-17 22:28:50 +08:00
void lock() { EnterCriticalSection(&cs); }
bool trylock() { return TryEnterCriticalSection(&cs) != 0; }
void unlock() { LeaveCriticalSection(&cs); }
2012-08-17 22:28:50 +08:00
CRITICAL_SECTION cs;
int refcount;
};
2012-10-17 07:18:30 +08:00
#else
struct Mutex::Impl
{
2013-11-22 17:00:37 +08:00
Impl()
{
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
pthread_mutex_init(&mt, &attr);
pthread_mutexattr_destroy(&attr);
refcount = 1;
}
~Impl() { pthread_mutex_destroy(&mt); }
2012-08-17 22:28:50 +08:00
2013-11-22 17:00:37 +08:00
void lock() { pthread_mutex_lock(&mt); }
bool trylock() { return pthread_mutex_trylock(&mt) == 0; }
void unlock() { pthread_mutex_unlock(&mt); }
2012-08-17 22:28:50 +08:00
2013-11-22 17:00:37 +08:00
pthread_mutex_t mt;
int refcount;
};
#endif
Mutex::Mutex()
{
impl = new Mutex::Impl;
}
2012-08-17 22:28:50 +08:00
Mutex::~Mutex()
{
if( CV_XADD(&impl->refcount, -1) == 1 )
delete impl;
impl = 0;
}
2012-08-17 22:28:50 +08:00
Mutex::Mutex(const Mutex& m)
{
impl = m.impl;
CV_XADD(&impl->refcount, 1);
}
Mutex& Mutex::operator = (const Mutex& m)
{
if (this != &m)
{
CV_XADD(&m.impl->refcount, 1);
if( CV_XADD(&impl->refcount, -1) == 1 )
delete impl;
impl = m.impl;
}
return *this;
}
2012-08-17 22:28:50 +08:00
void Mutex::lock() { impl->lock(); }
void Mutex::unlock() { impl->unlock(); }
2012-08-17 22:28:50 +08:00
bool Mutex::trylock() { return impl->trylock(); }
2013-12-11 22:49:13 +08:00
//////////////////////////////// thread-local storage ////////////////////////////////
2017-07-25 18:23:44 +08:00
#ifdef _WIN32
#ifdef _MSC_VER
#pragma warning(disable:4505) // unreferenced local function has been removed
#endif
#ifndef TLS_OUT_OF_INDEXES
#define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
#endif
#endif
// TLS platform abstraction layer
class TlsAbstraction
2013-12-11 22:49:13 +08:00
{
public:
TlsAbstraction();
~TlsAbstraction();
void* GetData() const;
void SetData(void *pData);
2013-12-11 22:49:13 +08:00
private:
2017-07-25 18:23:44 +08:00
#ifdef _WIN32
#ifndef WINRT
DWORD tlsKey;
2013-12-30 16:31:00 +08:00
#endif
2017-07-25 18:23:44 +08:00
#else // _WIN32
pthread_key_t tlsKey;
#endif
};
2013-12-11 22:49:13 +08:00
2017-07-25 18:23:44 +08:00
#ifdef _WIN32
#ifdef WINRT
static __declspec( thread ) void* tlsData = NULL; // using C++11 thread attribute for local thread data
TlsAbstraction::TlsAbstraction() {}
TlsAbstraction::~TlsAbstraction() {}
void* TlsAbstraction::GetData() const
{
return tlsData;
}
void TlsAbstraction::SetData(void *pData)
{
tlsData = pData;
}
#else //WINRT
TlsAbstraction::TlsAbstraction()
{
tlsKey = TlsAlloc();
CV_Assert(tlsKey != TLS_OUT_OF_INDEXES);
}
TlsAbstraction::~TlsAbstraction()
{
TlsFree(tlsKey);
}
void* TlsAbstraction::GetData() const
{
return TlsGetValue(tlsKey);
}
void TlsAbstraction::SetData(void *pData)
{
CV_Assert(TlsSetValue(tlsKey, pData) == TRUE);
}
#endif
2017-07-25 18:23:44 +08:00
#else // _WIN32
TlsAbstraction::TlsAbstraction()
{
CV_Assert(pthread_key_create(&tlsKey, NULL) == 0);
}
TlsAbstraction::~TlsAbstraction()
{
CV_Assert(pthread_key_delete(tlsKey) == 0);
}
void* TlsAbstraction::GetData() const
{
return pthread_getspecific(tlsKey);
}
void TlsAbstraction::SetData(void *pData)
{
CV_Assert(pthread_setspecific(tlsKey, pData) == 0);
}
#endif
2013-12-11 22:49:13 +08:00
// Per-thread data structure
struct ThreadData
{
ThreadData()
2013-12-11 22:49:13 +08:00
{
idx = 0;
slots.reserve(32);
2013-12-11 22:49:13 +08:00
}
std::vector<void*> slots; // Data array for a thread
size_t idx; // Thread index in TLS storage. This is not OS thread ID!
};
2013-12-11 22:49:13 +08:00
// Main TLS storage class
class TlsStorage
{
public:
TlsStorage() :
tlsSlotsSize(0)
2013-12-11 22:49:13 +08:00
{
tlsSlots.reserve(32);
threads.reserve(32);
2013-12-11 22:49:13 +08:00
}
~TlsStorage()
2013-12-11 22:49:13 +08:00
{
for(size_t i = 0; i < threads.size(); i++)
2013-12-11 22:49:13 +08:00
{
if(threads[i])
{
/* Current architecture doesn't allow proper global objects release, so this check can cause crashes
// Check if all slots were properly cleared
for(size_t j = 0; j < threads[i]->slots.size(); j++)
{
CV_Assert(threads[i]->slots[j] == 0);
}
*/
delete threads[i];
}
2013-12-11 22:49:13 +08:00
}
threads.clear();
2013-12-11 22:49:13 +08:00
}
void releaseThread()
2013-12-11 22:49:13 +08:00
{
AutoLock guard(mtxGlobalAccess);
ThreadData *pTD = (ThreadData*)tls.GetData();
for(size_t i = 0; i < threads.size(); i++)
{
if(pTD == threads[i])
{
threads[i] = 0;
break;
}
}
tls.SetData(0);
delete pTD;
2013-12-11 22:49:13 +08:00
}
// Reserve TLS storage index
size_t reserveSlot()
2013-12-11 22:49:13 +08:00
{
AutoLock guard(mtxGlobalAccess);
CV_Assert(tlsSlotsSize == tlsSlots.size());
// Find unused slots
for(size_t slot = 0; slot < tlsSlotsSize; slot++)
{
if(!tlsSlots[slot])
{
tlsSlots[slot] = 1;
return slot;
}
}
// Create new slot
tlsSlots.push_back(1); tlsSlotsSize++;
return tlsSlotsSize - 1;
2013-12-11 22:49:13 +08:00
}
// Release TLS storage index and pass associated data to caller
2017-02-16 01:20:38 +08:00
void releaseSlot(size_t slotIdx, std::vector<void*> &dataVec, bool keepSlot = false)
2013-12-11 22:49:13 +08:00
{
AutoLock guard(mtxGlobalAccess);
CV_Assert(tlsSlotsSize == tlsSlots.size());
CV_Assert(tlsSlotsSize > slotIdx);
2013-12-11 22:49:13 +08:00
for(size_t i = 0; i < threads.size(); i++)
2013-12-11 22:49:13 +08:00
{
2016-01-14 19:38:37 +08:00
if(threads[i])
{
2016-01-14 19:38:37 +08:00
std::vector<void*>& thread_slots = threads[i]->slots;
if (thread_slots.size() > slotIdx && thread_slots[slotIdx])
{
dataVec.push_back(thread_slots[slotIdx]);
2017-02-16 01:20:38 +08:00
thread_slots[slotIdx] = NULL;
2016-01-14 19:38:37 +08:00
}
}
2013-12-11 22:49:13 +08:00
}
2017-02-16 01:20:38 +08:00
if (!keepSlot)
tlsSlots[slotIdx] = 0;
2013-12-11 22:49:13 +08:00
}
// Get data by TLS storage index
void* getData(size_t slotIdx) const
2013-12-11 22:49:13 +08:00
{
#ifndef CV_THREAD_SANITIZER
CV_Assert(tlsSlotsSize > slotIdx);
#endif
2013-12-11 22:49:13 +08:00
ThreadData* threadData = (ThreadData*)tls.GetData();
if(threadData && threadData->slots.size() > slotIdx)
return threadData->slots[slotIdx];
return NULL;
2013-12-11 22:49:13 +08:00
}
// Gather data from threads by TLS storage index
void gather(size_t slotIdx, std::vector<void*> &dataVec)
{
AutoLock guard(mtxGlobalAccess);
CV_Assert(tlsSlotsSize == tlsSlots.size());
CV_Assert(tlsSlotsSize > slotIdx);
for(size_t i = 0; i < threads.size(); i++)
{
2016-01-14 19:38:37 +08:00
if(threads[i])
{
std::vector<void*>& thread_slots = threads[i]->slots;
if (thread_slots.size() > slotIdx && thread_slots[slotIdx])
dataVec.push_back(thread_slots[slotIdx]);
}
}
}
// Set data to storage index
void setData(size_t slotIdx, void* pData)
2013-12-11 22:49:13 +08:00
{
#ifndef CV_THREAD_SANITIZER
CV_Assert(tlsSlotsSize > slotIdx);
#endif
ThreadData* threadData = (ThreadData*)tls.GetData();
if(!threadData)
2013-12-11 22:49:13 +08:00
{
threadData = new ThreadData;
tls.SetData((void*)threadData);
{
AutoLock guard(mtxGlobalAccess);
threadData->idx = threads.size();
threads.push_back(threadData);
}
2013-12-11 22:49:13 +08:00
}
if(slotIdx >= threadData->slots.size())
{
AutoLock guard(mtxGlobalAccess); // keep synchronization with gather() calls
threadData->slots.resize(slotIdx + 1, NULL);
}
threadData->slots[slotIdx] = pData;
2013-12-11 22:49:13 +08:00
}
private:
TlsAbstraction tls; // TLS abstraction layer instance
Mutex mtxGlobalAccess; // Shared objects operation guard
size_t tlsSlotsSize; // equal to tlsSlots.size() in synchronized sections
// without synchronization this counter doesn't desrease - it is used for slotIdx sanity checks
std::vector<int> tlsSlots; // TLS keys state
std::vector<ThreadData*> threads; // Array for all allocated data. Thread data pointers are placed here to allow data cleanup
2013-12-11 22:49:13 +08:00
};
// Create global TLS storage object
static TlsStorage &getTlsStorage()
{
CV_SINGLETON_LAZY_INIT_REF(TlsStorage, new TlsStorage())
}
2013-12-11 22:49:13 +08:00
TLSDataContainer::TLSDataContainer()
{
key_ = (int)getTlsStorage().reserveSlot(); // Reserve key from TLS storage
}
2013-12-11 22:49:13 +08:00
TLSDataContainer::~TLSDataContainer()
{
CV_Assert(key_ == -1); // Key must be released in child object
2013-12-11 22:49:13 +08:00
}
void TLSDataContainer::gatherData(std::vector<void*> &data) const
{
getTlsStorage().gather(key_, data);
}
void TLSDataContainer::release()
2013-12-11 22:49:13 +08:00
{
std::vector<void*> data;
data.reserve(32);
getTlsStorage().releaseSlot(key_, data); // Release key and get stored data for proper destruction
2017-02-16 01:20:38 +08:00
key_ = -1;
for(size_t i = 0; i < data.size(); i++) // Delete all associated data
deleteDataInstance(data[i]);
}
void TLSDataContainer::cleanup()
{
std::vector<void*> data;
data.reserve(32);
getTlsStorage().releaseSlot(key_, data, true); // Extract stored data with removal from TLS tables
for(size_t i = 0; i < data.size(); i++) // Delete all associated data
deleteDataInstance(data[i]);
2013-12-11 22:49:13 +08:00
}
void* TLSDataContainer::getData() const
2013-12-11 22:49:13 +08:00
{
2017-05-25 23:59:01 +08:00
CV_Assert(key_ != -1 && "Can't fetch data from terminated TLS container.");
void* pData = getTlsStorage().getData(key_); // Check if data was already allocated
if(!pData)
2013-12-11 22:49:13 +08:00
{
// Create new data instance and save it to TLS storage
pData = createDataInstance();
getTlsStorage().setData(key_, pData);
2013-12-11 22:49:13 +08:00
}
return pData;
2013-12-11 22:49:13 +08:00
}
2015-02-04 18:03:27 +08:00
TLSData<CoreTLSData>& getCoreTlsData()
{
CV_SINGLETON_LAZY_INIT_REF(TLSData<CoreTLSData>, new TLSData<CoreTLSData>())
2015-02-04 18:03:27 +08:00
}
2017-07-25 18:23:44 +08:00
#if defined CVAPI_EXPORTS && defined _WIN32 && !defined WINCE
#ifdef WINRT
#pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
#endif
extern "C"
BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved);
extern "C"
BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved)
{
if (fdwReason == DLL_THREAD_DETACH || fdwReason == DLL_PROCESS_DETACH)
{
if (lpReserved != NULL) // called after ExitProcess() call
{
cv::__termination = true;
}
else
{
// Not allowed to free resources if lpReserved is non-null
// http://msdn.microsoft.com/en-us/library/windows/desktop/ms682583.aspx
cv::getTlsStorage().releaseThread();
}
}
return TRUE;
}
#endif
2015-02-04 18:03:27 +08:00
2017-05-25 23:59:01 +08:00
namespace {
static int g_threadNum = 0;
class ThreadID {
public:
const int id;
ThreadID() :
id(CV_XADD(&g_threadNum, 1))
{
#ifdef OPENCV_WITH_ITT
__itt_thread_set_name(cv::format("OpenCVThread-%03d", id).c_str());
#endif
}
};
static TLSData<ThreadID>& getThreadIDTLS()
{
CV_SINGLETON_LAZY_INIT_REF(TLSData<ThreadID>, new TLSData<ThreadID>());
}
} // namespace
int utils::getThreadID() { return getThreadIDTLS().get()->id; }
bool utils::getConfigurationParameterBool(const char* name, bool defaultValue)
{
#ifdef NO_GETENV
const char* envValue = NULL;
#else
const char* envValue = getenv(name);
#endif
if (envValue == NULL)
{
return defaultValue;
}
cv::String value = envValue;
if (value == "1" || value == "True" || value == "true" || value == "TRUE")
{
return true;
}
if (value == "0" || value == "False" || value == "false" || value == "FALSE")
{
return false;
}
CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
}
size_t utils::getConfigurationParameterSizeT(const char* name, size_t defaultValue)
{
#ifdef NO_GETENV
const char* envValue = NULL;
#else
const char* envValue = getenv(name);
#endif
if (envValue == NULL)
{
return defaultValue;
}
cv::String value = envValue;
size_t pos = 0;
for (; pos < value.size(); pos++)
{
if (!isdigit(value[pos]))
break;
}
cv::String valueStr = value.substr(0, pos);
cv::String suffixStr = value.substr(pos, value.length() - pos);
int v = atoi(valueStr.c_str());
if (suffixStr.length() == 0)
return v;
else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
return v * 1024 * 1024;
else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
return v * 1024;
CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
}
cv::String utils::getConfigurationParameterString(const char* name, const char* defaultValue)
{
#ifdef NO_GETENV
const char* envValue = NULL;
#else
const char* envValue = getenv(name);
#endif
if (envValue == NULL)
{
return defaultValue;
}
cv::String value = envValue;
return value;
}
#ifdef CV_COLLECT_IMPL_DATA
ImplCollector& getImplData()
{
CV_SINGLETON_LAZY_INIT_REF(ImplCollector, new ImplCollector())
}
void setImpl(int flags)
{
cv::AutoLock lock(getImplData().mutex);
getImplData().implFlags = flags;
getImplData().implCode.clear();
getImplData().implFun.clear();
}
void addImpl(int flag, const char* func)
{
cv::AutoLock lock(getImplData().mutex);
getImplData().implFlags |= flag;
if(func) // use lazy collection if name was not specified
{
size_t index = getImplData().implCode.size();
if(!index || (getImplData().implCode[index-1] != flag || getImplData().implFun[index-1].compare(func))) // avoid duplicates
{
getImplData().implCode.push_back(flag);
getImplData().implFun.push_back(func);
}
}
}
int getImpl(std::vector<int> &impl, std::vector<String> &funName)
{
cv::AutoLock lock(getImplData().mutex);
impl = getImplData().implCode;
funName = getImplData().implFun;
return getImplData().implFlags; // return actual flags for lazy collection
}
bool useCollection()
{
return getImplData().useCollection;
}
void setUseCollection(bool flag)
{
cv::AutoLock lock(getImplData().mutex);
getImplData().useCollection = flag;
}
#endif
namespace instr
{
bool useInstrumentation()
{
#ifdef ENABLE_INSTRUMENTATION
return getInstrumentStruct().useInstr;
#else
return false;
#endif
}
void setUseInstrumentation(bool flag)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().useInstr = flag;
#else
CV_UNUSED(flag);
#endif
}
InstrNode* getTrace()
{
#ifdef ENABLE_INSTRUMENTATION
return &getInstrumentStruct().rootNode;
#else
return NULL;
#endif
}
void resetTrace()
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().rootNode.removeChilds();
getInstrumentTLSStruct().pCurrentNode = &getInstrumentStruct().rootNode;
#endif
}
void setFlags(FLAGS modeFlags)
{
#ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().flags = modeFlags;
#else
CV_UNUSED(modeFlags);
#endif
}
FLAGS getFlags()
{
#ifdef ENABLE_INSTRUMENTATION
return (FLAGS)getInstrumentStruct().flags;
#else
return (FLAGS)0;
#endif
}
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, void* retAddress, bool alwaysExpand, cv::instr::TYPE instrType, cv::instr::IMPL implType)
{
m_funName = funName;
m_instrType = instrType;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
m_retAddress = retAddress;
m_alwaysExpand = alwaysExpand;
m_threads = 1;
m_counter = 0;
m_ticksTotal = 0;
m_funError = false;
}
NodeData::NodeData(NodeData &ref)
{
*this = ref;
}
NodeData& NodeData::operator=(const NodeData &right)
{
this->m_funName = right.m_funName;
this->m_instrType = right.m_instrType;
this->m_implType = right.m_implType;
this->m_fileName = right.m_fileName;
this->m_lineNum = right.m_lineNum;
this->m_retAddress = right.m_retAddress;
this->m_alwaysExpand = right.m_alwaysExpand;
this->m_threads = right.m_threads;
this->m_counter = right.m_counter;
this->m_ticksTotal = right.m_ticksTotal;
this->m_funError = right.m_funError;
return *this;
}
NodeData::~NodeData()
{
}
bool operator==(const NodeData& left, const NodeData& right)
{
if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
{
if(left.m_retAddress == right.m_retAddress || !(cv::instr::getFlags()&cv::instr::FLAGS_EXPAND_SAME_NAMES || left.m_alwaysExpand))
return true;
}
return false;
}
#ifdef ENABLE_INSTRUMENTATION
InstrStruct& getInstrumentStruct()
{
static InstrStruct instr;
return instr;
}
InstrTLSStruct& getInstrumentTLSStruct()
{
return *getInstrumentStruct().tlsStruct.get();
}
InstrNode* getCurrentNode()
{
return getInstrumentTLSStruct().pCurrentNode;
}
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType, IMPL implType)
{
m_disabled = false;
m_regionTicks = 0;
InstrStruct *pStruct = &getInstrumentStruct();
if(pStruct->useInstr)
{
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
// Disable in case of failure
if(!pTLS->pCurrentNode)
{
m_disabled = true;
return;
}
int depth = pTLS->pCurrentNode->getDepth();
if(pStruct->maxDepth && pStruct->maxDepth <= depth)
{
m_disabled = true;
return;
}
NodeData payload(funName, fileName, lineNum, retAddress, alwaysExpand, instrType, implType);
Node<NodeData>* pChild = NULL;
if(pStruct->flags&FLAGS_MAPPING)
{
// Critical section
cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild)
{
pChild = new Node<NodeData>(payload);
pTLS->pCurrentNode->addChild(pChild);
}
}
else
{
pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild)
{
m_disabled = true;
return;
}
}
pTLS->pCurrentNode = pChild;
m_regionTicks = getTickCount();
}
}
IntrumentationRegion::~IntrumentationRegion()
{
InstrStruct *pStruct = &getInstrumentStruct();
if(pStruct->useInstr)
{
if(!m_disabled)
{
InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
{
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
}
uint64 ticks = (getTickCount() - m_regionTicks);
{
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
pTLS->pCurrentNode->m_payload.m_counter++;
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
pTLS->pCurrentNode->m_payload.m_tls.get()->m_ticksTotal += ticks;
}
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
}
}
}
#endif
}
2014-04-17 19:21:30 +08:00
namespace ipp
{
#ifdef HAVE_IPP
2016-08-12 14:50:42 +08:00
struct IPPInitSingleton
{
public:
2016-08-12 14:50:42 +08:00
IPPInitSingleton()
{
useIPP = true;
useIPP_NE = false;
ippStatus = 0;
funcname = NULL;
filename = NULL;
linen = 0;
cpuFeatures = 0;
ippFeatures = 0;
ippTopFeatures = 0;
pIppLibInfo = NULL;
ippStatus = ippGetCpuFeatures(&cpuFeatures, NULL);
if(ippStatus < 0)
{
std::cerr << "ERROR: IPP cannot detect CPU features, IPP was disabled " << std::endl;
useIPP = false;
return;
}
ippFeatures = cpuFeatures;
const char* pIppEnv = getenv("OPENCV_IPP");
cv::String env = pIppEnv;
if(env.size())
{
#if IPP_VERSION_X100 >= 201703
const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C|
ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_MPX|ippCPUID_AVX512CD|ippCPUID_AVX512ER|
ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI;
#elif IPP_VERSION_X100 >= 201700
const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C|
ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_AVX512CD|ippCPUID_AVX512ER|
ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI;
#else
const Ipp64u minorFeatures = 0;
#endif
env = env.toLowerCase();
if(env.substr(0, 2) == "ne")
{
useIPP_NE = true;
env = env.substr(3, env.size());
}
if(env == "disabled")
{
std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl;
useIPP = false;
}
else if(env == "sse42")
ippFeatures = minorFeatures|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42;
else if(env == "avx2")
ippFeatures = minorFeatures|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2;
#if IPP_VERSION_X100 >= 201700
#if defined (_M_AMD64) || defined (__x86_64__)
else if(env == "avx512")
ippFeatures = minorFeatures|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2|ippCPUID_AVX512F;
#endif
#endif
else
std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << ". Correct values are: disabled, sse42, avx2, avx512 (Intel64 only)" << std::endl;
// Trim unsupported features
ippFeatures &= cpuFeatures;
}
// Disable AVX1 since we don't track regressions for it. SSE42 will be used instead
if(cpuFeatures&ippCPUID_AVX && !(cpuFeatures&ippCPUID_AVX2))
ippFeatures &= ~((Ipp64u)ippCPUID_AVX);
// IPP integrations in OpenCV support only SSE4.2, AVX2 and AVX-512 optimizations.
if(!(
#if IPP_VERSION_X100 >= 201700
cpuFeatures&ippCPUID_AVX512F ||
#endif
cpuFeatures&ippCPUID_AVX2 ||
cpuFeatures&ippCPUID_SSE42
))
{
useIPP = false;
return;
}
if(ippFeatures == cpuFeatures)
IPP_INITIALIZER(0)
else
IPP_INITIALIZER(ippFeatures)
ippFeatures = ippGetEnabledCpuFeatures();
// Detect top level optimizations to make comparison easier for optimizations dependent conditions
#if IPP_VERSION_X100 >= 201700
if(ippFeatures&ippCPUID_AVX512F)
{
if((ippFeatures&ippCPUID_AVX512_SKX) == ippCPUID_AVX512_SKX)
ippTopFeatures = ippCPUID_AVX512_SKX;
else if((ippFeatures&ippCPUID_AVX512_KNL) == ippCPUID_AVX512_KNL)
ippTopFeatures = ippCPUID_AVX512_KNL;
else
ippTopFeatures = ippCPUID_AVX512F; // Unknown AVX512 configuration
}
else
#endif
if(ippFeatures&ippCPUID_AVX2)
ippTopFeatures = ippCPUID_AVX2;
else if(ippFeatures&ippCPUID_SSE42)
ippTopFeatures = ippCPUID_SSE42;
pIppLibInfo = ippiGetLibVersion();
}
public:
bool useIPP;
bool useIPP_NE;
int ippStatus; // 0 - all is ok, -1 - IPP functions failed
const char *funcname;
const char *filename;
int linen;
Ipp64u ippFeatures;
Ipp64u cpuFeatures;
Ipp64u ippTopFeatures;
const IppLibraryVersion *pIppLibInfo;
};
2016-08-12 14:50:42 +08:00
static IPPInitSingleton& getIPPSingleton()
{
2016-08-12 14:50:42 +08:00
CV_SINGLETON_LAZY_INIT_REF(IPPInitSingleton, new IPPInitSingleton())
}
#endif
#if OPENCV_ABI_COMPATIBILITY > 300
unsigned long long getIppFeatures()
#else
int getIppFeatures()
#endif
{
#ifdef HAVE_IPP
#if OPENCV_ABI_COMPATIBILITY > 300
2016-08-12 14:50:42 +08:00
return getIPPSingleton().ippFeatures;
#else
return (int)getIPPSingleton().ippFeatures;
#endif
#else
return 0;
#endif
}
2014-04-16 22:50:23 +08:00
unsigned long long getIppTopFeatures();
unsigned long long getIppTopFeatures()
{
#ifdef HAVE_IPP
return getIPPSingleton().ippTopFeatures;
#else
return 0;
#endif
}
2014-04-16 22:50:23 +08:00
void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line)
2014-04-16 19:34:18 +08:00
{
#ifdef HAVE_IPP
2016-08-12 14:50:42 +08:00
getIPPSingleton().ippStatus = status;
getIPPSingleton().funcname = _funcname;
getIPPSingleton().filename = _filename;
getIPPSingleton().linen = _line;
#else
CV_UNUSED(status); CV_UNUSED(_funcname); CV_UNUSED(_filename); CV_UNUSED(_line);
#endif
2014-04-16 19:34:18 +08:00
}
int getIppStatus()
{
#ifdef HAVE_IPP
2016-08-12 14:50:42 +08:00
return getIPPSingleton().ippStatus;
#else
return 0;
#endif
2014-04-16 22:50:23 +08:00
}
String getIppErrorLocation()
{
#ifdef HAVE_IPP
2016-08-12 14:50:42 +08:00
return format("%s:%d %s", getIPPSingleton().filename ? getIPPSingleton().filename : "", getIPPSingleton().linen, getIPPSingleton().funcname ? getIPPSingleton().funcname : "");
#else
return String();
#endif
2014-04-16 19:34:18 +08:00
}
String getIppVersion()
{
#ifdef HAVE_IPP
const IppLibraryVersion *pInfo = getIPPSingleton().pIppLibInfo;
if(pInfo)
return format("%s %s %s", pInfo->Name, pInfo->Version, pInfo->BuildDate);
else
return String("error");
#else
return String("disabled");
#endif
}
bool useIPP()
{
#ifdef HAVE_IPP
2015-02-04 18:03:27 +08:00
CoreTLSData* data = getCoreTlsData().get();
if(data->useIPP < 0)
{
2016-08-12 14:50:42 +08:00
data->useIPP = getIPPSingleton().useIPP;
}
return (data->useIPP > 0);
#else
return false;
#endif
}
void setUseIPP(bool flag)
{
2015-02-04 18:03:27 +08:00
CoreTLSData* data = getCoreTlsData().get();
#ifdef HAVE_IPP
2016-08-12 14:50:42 +08:00
data->useIPP = (getIPPSingleton().useIPP)?flag:false;
#else
(void)flag;
data->useIPP = false;
#endif
}
bool useIPP_NE()
{
#ifdef HAVE_IPP
CoreTLSData* data = getCoreTlsData().get();
if(data->useIPP_NE < 0)
{
data->useIPP_NE = getIPPSingleton().useIPP_NE;
}
return (data->useIPP_NE > 0);
#else
return false;
#endif
}
void setUseIPP_NE(bool flag)
{
CoreTLSData* data = getCoreTlsData().get();
#ifdef HAVE_IPP
data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false;
#else
(void)flag;
data->useIPP_NE = false;
#endif
}
2014-04-17 19:21:30 +08:00
} // namespace ipp
} // namespace cv
#ifdef HAVE_TEGRA_OPTIMIZATION
namespace tegra {
bool useTegra()
{
cv::CoreTLSData* data = cv::getCoreTlsData().get();
if (data->useTegra < 0)
{
const char* pTegraEnv = getenv("OPENCV_TEGRA");
if (pTegraEnv && (cv::String(pTegraEnv) == "disabled"))
data->useTegra = false;
else
data->useTegra = true;
}
return (data->useTegra > 0);
}
void setUseTegra(bool flag)
{
cv::CoreTLSData* data = cv::getCoreTlsData().get();
data->useTegra = flag;
}
} // namespace tegra
#endif
/* End of file. */