mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 03:00:14 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
cca4c47781
@ -171,6 +171,8 @@ elseif(MSVC)
|
||||
set(OpenCV_RUNTIME vc15)
|
||||
elseif(MSVC_VERSION MATCHES "^192[0-9]$")
|
||||
set(OpenCV_RUNTIME vc16)
|
||||
elseif(MSVC_VERSION MATCHES "^193[0-9]$")
|
||||
set(OpenCV_RUNTIME vc17)
|
||||
else()
|
||||
message(WARNING "OpenCV does not recognize MSVC_VERSION \"${MSVC_VERSION}\". Cannot set OpenCV_RUNTIME")
|
||||
endif()
|
||||
|
@ -137,6 +137,20 @@ elseif(MSVC)
|
||||
set(OpenCV_RUNTIME vc14) # selecting previous compatible runtime version
|
||||
endif()
|
||||
endif()
|
||||
elseif(MSVC_VERSION MATCHES "^193[0-9]$")
|
||||
set(OpenCV_RUNTIME vc17)
|
||||
check_one_config(has_VS2022)
|
||||
if(NOT has_VS2022)
|
||||
set(OpenCV_RUNTIME vc16)
|
||||
check_one_config(has_VS2019)
|
||||
if(NOT has_VS2019)
|
||||
set(OpenCV_RUNTIME vc15) # selecting previous compatible runtime version
|
||||
check_one_config(has_VS2017)
|
||||
if(NOT has_VS2017)
|
||||
set(OpenCV_RUNTIME vc14) # selecting previous compatible runtime version
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
elseif(MINGW)
|
||||
set(OpenCV_RUNTIME mingw)
|
||||
|
@ -57,8 +57,8 @@
|
||||
#endif
|
||||
|
||||
#if defined __unix__ || defined __APPLE__ || defined __GLIBC__ \
|
||||
|| defined __HAIKU__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ \
|
||||
|| defined __OpenBSD__
|
||||
|| defined __HAIKU__ || defined __EMSCRIPTEN__ \
|
||||
|| defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
|
@ -55,6 +55,18 @@
|
||||
|
||||
#include <opencv2/core/utils/filesystem.private.hpp>
|
||||
|
||||
#ifndef OPENCV_WITH_THREAD_SANITIZER
|
||||
#if defined(__clang__) && defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#define OPENCV_WITH_THREAD_SANITIZER 1
|
||||
#include <atomic> // assume C++11
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#ifndef OPENCV_WITH_THREAD_SANITIZER
|
||||
#define OPENCV_WITH_THREAD_SANITIZER 0
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
|
||||
static void _initSystem()
|
||||
@ -1383,64 +1395,62 @@ namespace details {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <class T>
|
||||
class DisposedSingletonMark
|
||||
{
|
||||
private:
|
||||
static bool mark;
|
||||
protected:
|
||||
DisposedSingletonMark() {}
|
||||
~DisposedSingletonMark()
|
||||
{
|
||||
mark = true;
|
||||
}
|
||||
public:
|
||||
static bool isDisposed() { return mark; }
|
||||
};
|
||||
|
||||
// TLS platform abstraction layer
|
||||
class TlsAbstraction : public DisposedSingletonMark<TlsAbstraction>
|
||||
class TlsAbstraction
|
||||
{
|
||||
public:
|
||||
TlsAbstraction();
|
||||
~TlsAbstraction();
|
||||
void* getData() const
|
||||
~TlsAbstraction()
|
||||
{
|
||||
if (isDisposed()) // guard: static initialization order fiasco
|
||||
return NULL;
|
||||
return getData_();
|
||||
}
|
||||
void setData(void *pData)
|
||||
{
|
||||
if (isDisposed()) // guard: static initialization order fiasco
|
||||
return;
|
||||
return setData_(pData);
|
||||
// TlsAbstraction singleton should not be released
|
||||
// There is no reliable way to avoid problems caused by static initialization order fiasco
|
||||
// NB: Do NOT use logging here
|
||||
fprintf(stderr, "OpenCV FATAL: TlsAbstraction::~TlsAbstraction() call is not expected\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
void* getData() const;
|
||||
void setData(void *pData);
|
||||
|
||||
void releaseSystemResources();
|
||||
|
||||
private:
|
||||
void* getData_() const;
|
||||
void setData_(void *pData);
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef WINRT
|
||||
DWORD tlsKey;
|
||||
bool disposed;
|
||||
#endif
|
||||
#else // _WIN32
|
||||
pthread_key_t tlsKey;
|
||||
#if OPENCV_WITH_THREAD_SANITIZER
|
||||
std::atomic<bool> disposed;
|
||||
#else
|
||||
bool disposed;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
template<> bool DisposedSingletonMark<TlsAbstraction>::mark = false;
|
||||
|
||||
static TlsAbstraction& getTlsAbstraction_()
|
||||
class TlsAbstractionReleaseGuard
|
||||
{
|
||||
static TlsAbstraction g_tls; // disposed in atexit() handlers (required for unregistering our callbacks)
|
||||
return g_tls;
|
||||
}
|
||||
TlsAbstraction& tls_;
|
||||
public:
|
||||
TlsAbstractionReleaseGuard(TlsAbstraction& tls) : tls_(tls)
|
||||
{
|
||||
/* nothing */
|
||||
}
|
||||
~TlsAbstractionReleaseGuard()
|
||||
{
|
||||
tls_.releaseSystemResources();
|
||||
}
|
||||
};
|
||||
|
||||
// TODO use reference
|
||||
static TlsAbstraction* getTlsAbstraction()
|
||||
{
|
||||
static TlsAbstraction* instance = &getTlsAbstraction_();
|
||||
return DisposedSingletonMark<TlsAbstraction>::isDisposed() ? NULL : instance;
|
||||
static TlsAbstraction *g_tls = new TlsAbstraction(); // memory leak is intended here to avoid disposing of TLS container
|
||||
static TlsAbstractionReleaseGuard g_tlsReleaseGuard(*g_tls);
|
||||
return g_tls;
|
||||
}
|
||||
|
||||
|
||||
@ -1448,15 +1458,15 @@ static TlsAbstraction* getTlsAbstraction()
|
||||
#ifdef WINRT
|
||||
static __declspec( thread ) void* tlsData = NULL; // using C++11 thread attribute for local thread data
|
||||
TlsAbstraction::TlsAbstraction() {}
|
||||
TlsAbstraction::~TlsAbstraction()
|
||||
void TlsAbstraction::releaseSystemResources()
|
||||
{
|
||||
cv::__termination = true; // DllMain is missing in static builds
|
||||
}
|
||||
void* TlsAbstraction::getData_() const
|
||||
void* TlsAbstraction::getData() const
|
||||
{
|
||||
return tlsData;
|
||||
}
|
||||
void TlsAbstraction::setData_(void *pData)
|
||||
void TlsAbstraction::setData(void *pData)
|
||||
{
|
||||
tlsData = pData;
|
||||
}
|
||||
@ -1465,6 +1475,7 @@ void TlsAbstraction::setData_(void *pData)
|
||||
static void NTAPI opencv_fls_destructor(void* pData);
|
||||
#endif // CV_USE_FLS
|
||||
TlsAbstraction::TlsAbstraction()
|
||||
: disposed(false)
|
||||
{
|
||||
#ifndef CV_USE_FLS
|
||||
tlsKey = TlsAlloc();
|
||||
@ -1473,9 +1484,10 @@ TlsAbstraction::TlsAbstraction()
|
||||
#endif // CV_USE_FLS
|
||||
CV_Assert(tlsKey != TLS_OUT_OF_INDEXES);
|
||||
}
|
||||
TlsAbstraction::~TlsAbstraction()
|
||||
void TlsAbstraction::releaseSystemResources()
|
||||
{
|
||||
cv::__termination = true; // DllMain is missing in static builds
|
||||
disposed = true;
|
||||
#ifndef CV_USE_FLS
|
||||
TlsFree(tlsKey);
|
||||
#else // CV_USE_FLS
|
||||
@ -1483,16 +1495,20 @@ TlsAbstraction::~TlsAbstraction()
|
||||
#endif // CV_USE_FLS
|
||||
tlsKey = TLS_OUT_OF_INDEXES;
|
||||
}
|
||||
void* TlsAbstraction::getData_() const
|
||||
void* TlsAbstraction::getData() const
|
||||
{
|
||||
if (disposed)
|
||||
return NULL;
|
||||
#ifndef CV_USE_FLS
|
||||
return TlsGetValue(tlsKey);
|
||||
#else // CV_USE_FLS
|
||||
return FlsGetValue(tlsKey);
|
||||
#endif // CV_USE_FLS
|
||||
}
|
||||
void TlsAbstraction::setData_(void *pData)
|
||||
void TlsAbstraction::setData(void *pData)
|
||||
{
|
||||
if (disposed)
|
||||
return; // no-op
|
||||
#ifndef CV_USE_FLS
|
||||
CV_Assert(TlsSetValue(tlsKey, pData) == TRUE);
|
||||
#else // CV_USE_FLS
|
||||
@ -1503,12 +1519,14 @@ void TlsAbstraction::setData_(void *pData)
|
||||
#else // _WIN32
|
||||
static void opencv_tls_destructor(void* pData);
|
||||
TlsAbstraction::TlsAbstraction()
|
||||
: disposed(false)
|
||||
{
|
||||
CV_Assert(pthread_key_create(&tlsKey, opencv_tls_destructor) == 0);
|
||||
}
|
||||
TlsAbstraction::~TlsAbstraction()
|
||||
void TlsAbstraction::releaseSystemResources()
|
||||
{
|
||||
cv::__termination = true; // DllMain is missing in static builds
|
||||
disposed = true;
|
||||
if (pthread_key_delete(tlsKey) != 0)
|
||||
{
|
||||
// Don't use logging here
|
||||
@ -1516,12 +1534,16 @@ TlsAbstraction::~TlsAbstraction()
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
void* TlsAbstraction::getData_() const
|
||||
void* TlsAbstraction::getData() const
|
||||
{
|
||||
if (disposed)
|
||||
return NULL;
|
||||
return pthread_getspecific(tlsKey);
|
||||
}
|
||||
void TlsAbstraction::setData_(void *pData)
|
||||
void TlsAbstraction::setData(void *pData)
|
||||
{
|
||||
if (disposed)
|
||||
return; // no-op
|
||||
CV_Assert(pthread_setspecific(tlsKey, pData) == 0);
|
||||
}
|
||||
#endif
|
||||
@ -1549,6 +1571,7 @@ public:
|
||||
TlsStorage() :
|
||||
tlsSlotsSize(0)
|
||||
{
|
||||
(void)getTlsAbstraction(); // ensure singeton initialization (for correct order of atexit calls)
|
||||
tlsSlots.reserve(32);
|
||||
threads.reserve(32);
|
||||
g_isTlsStorageInitialized = true;
|
||||
@ -1786,11 +1809,11 @@ static void WINAPI opencv_fls_destructor(void* pData)
|
||||
#endif // CV_USE_FLS
|
||||
#endif // _WIN32
|
||||
|
||||
static TlsAbstraction* const g_force_initialization_of_TlsAbstraction
|
||||
static TlsStorage* const g_force_initialization_of_TlsStorage
|
||||
#if defined __GNUC__
|
||||
__attribute__((unused))
|
||||
#endif
|
||||
= getTlsAbstraction();
|
||||
= &getTlsStorage();
|
||||
|
||||
|
||||
#else // OPENCV_DISABLE_THREAD_SUPPORT
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "../op_vkcom.hpp"
|
||||
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#include "opencv2/core/hal/hal.hpp"
|
||||
@ -1736,7 +1737,26 @@ public:
|
||||
config.pad = pad;
|
||||
config.stride = stride;
|
||||
config.dilation = dilation;
|
||||
if (inputs[0].dims != 4 && inputs[0].dims != umat_blobs[0].dims)
|
||||
{
|
||||
static bool bypassCheck = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_CONVOLUTION_IGNORE_INPUT_DIMS_4_CHECK", false);
|
||||
if (!bypassCheck)
|
||||
{
|
||||
CV_LOG_ERROR(NULL, "DNN/OpenCL: Unsupported configuration: inputs[0].dims=" << inputs[0].dims << " umat_blobs[0].dims=" << umat_blobs[0].dims
|
||||
<< ". Consider reporting complete reproducer to https://github.com/opencv/opencv/issues/20833."
|
||||
<< " You can skip this check temporary through OPENCV_OCL4DNN_CONVOLUTION_IGNORE_INPUT_DIMS_4_CHECK=1"
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
config.group = inputs[0].size[1] / umat_blobs[0].size[1];
|
||||
if (config.group < 1) // config.group == 0 causes div by zero in ocl4dnn code
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN/OpenCL: Unsupported config.group=" << config.group
|
||||
<< ". Consider reporting complete reproducer to https://github.com/opencv/opencv/issues/20833"
|
||||
);
|
||||
return false;
|
||||
}
|
||||
config.bias_term = umat_blobs.size() == 2;
|
||||
config.use_half = use_half;
|
||||
|
||||
|
@ -222,8 +222,6 @@ class OCL4DNNConvSpatial
|
||||
bool createDWConvKernel(int32_t blockWidth,
|
||||
int32_t blockHeight,
|
||||
int32_t blockDepth);
|
||||
void CreateSubBuffer(const UMat& buffer, UMat& sub_buffer,
|
||||
int32_t offset, int32_t size, bool write_only);
|
||||
bool convolve(const UMat &bottom, UMat &top,
|
||||
const UMat &weight, const UMat &bias,
|
||||
int32_t numImages,
|
||||
|
@ -167,6 +167,7 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
||||
channels_ = config.in_shape[dims - spatial_dims - 1];
|
||||
num_output_ = config.out_shape[dims - spatial_dims - 1];
|
||||
group_ = config.group;
|
||||
CV_CheckGT(group_, 0, ""); // avoid div by zero below
|
||||
|
||||
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
|
||||
fused_eltwise_ = false;
|
||||
@ -218,14 +219,7 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
||||
#endif
|
||||
if (!use_cache_path_)
|
||||
{
|
||||
static int warn_ = 0;
|
||||
if (!warn_)
|
||||
{
|
||||
std::cerr
|
||||
<< "OpenCV(ocl4dnn): Kernel configuration cache directory doesn't exist: " << cache_path_ << std::endl
|
||||
<< std::endl;
|
||||
warn_ = true;
|
||||
}
|
||||
CV_LOG_ONCE_ERROR(NULL, "OpenCV(ocl4dnn): Kernel configuration cache directory doesn't exist: " << cache_path_);
|
||||
}
|
||||
}
|
||||
|
||||
@ -418,7 +412,6 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
|
||||
addDef("CHANNELS", channels_ / group_);
|
||||
addDef("APPLY_BIAS", bias_term_);
|
||||
addDef("OUTPUT_Z", M_);
|
||||
addDef("ZPAR", 1);
|
||||
setFusionDefine(fused_activ_, fused_eltwise_);
|
||||
|
||||
src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
|
||||
@ -672,8 +665,7 @@ void interleaveMatrix(Dtype* mem_dst, const Dtype *mem,
|
||||
int r, int c, int interleavedRows, int nonInterleavedRows,
|
||||
int blockWidth, int rowAlignment )
|
||||
{
|
||||
CHECK_EQ(interleavedRows % 2, 0) <<
|
||||
"interleaveMatrix only supports even values for interleavedRows.";
|
||||
CV_Check(interleavedRows, interleavedRows % 2 == 0, "interleaveMatrix only supports even values for interleavedRows.");
|
||||
|
||||
size_t memSize = r * c * sizeof(float);
|
||||
size_t dstSize = memSize *
|
||||
@ -685,9 +677,12 @@ void interleaveMatrix(Dtype* mem_dst, const Dtype *mem,
|
||||
const int yStride = c * 2;
|
||||
const Dtype *pSrc = mem;
|
||||
Dtype* pDst = mem_dst;
|
||||
for (int y = 0; y < r;) {
|
||||
for (int rows = 0; rows < interleavedRows; rows += 2) {
|
||||
if ( y >= r ) break;
|
||||
for (int y = 0; y < r;)
|
||||
{
|
||||
for (int rows = 0; rows < interleavedRows; rows += 2)
|
||||
{
|
||||
if (y >= r)
|
||||
break;
|
||||
if ((c % xStride) == 0) {
|
||||
for (int x = 0; x < c / xStride; x++) {
|
||||
memcpy(pDst + x * xStride * 2, // NOLINT
|
||||
@ -712,11 +707,14 @@ void interleaveMatrix(Dtype* mem_dst, const Dtype *mem,
|
||||
y += 2;
|
||||
}
|
||||
|
||||
for (int rows = 0; rows < nonInterleavedRows; rows++) {
|
||||
if (y >= r) break;
|
||||
for (int rows = 0; rows < nonInterleavedRows; rows++)
|
||||
{
|
||||
if (y >= r)
|
||||
break;
|
||||
const int stride = rowAlignment;
|
||||
int remaining = c;
|
||||
for (int x = 0; x < c; x += stride) {
|
||||
for (int x = 0; x < c; x += stride)
|
||||
{
|
||||
if (remaining >= stride) {
|
||||
memcpy(pDst + x * 2, pSrc + x, stride * sizeof(Dtype)); // NOLINT
|
||||
remaining -=stride;
|
||||
@ -852,34 +850,6 @@ bool OCL4DNNConvSpatial<float>::createBasicKernel(int32_t blockWidth,
|
||||
return false;
|
||||
}
|
||||
|
||||
template<>
|
||||
void OCL4DNNConvSpatial<float>::CreateSubBuffer(const UMat& buffer, UMat& sub_buffer,
|
||||
int32_t offset, int32_t size, bool write_only)
|
||||
{
|
||||
cl_mem sub_mem;
|
||||
cl_buffer_region region;
|
||||
cl_int err;
|
||||
size_t element_size = (use_half_) ? sizeof(short) : sizeof(float);
|
||||
|
||||
region.origin = offset * element_size + buffer.offset;
|
||||
region.size = size * element_size;
|
||||
sub_mem = clCreateSubBuffer((cl_mem)buffer.handle(ACCESS_READ),
|
||||
write_only ? CL_MEM_WRITE_ONLY : CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err);
|
||||
if (err)
|
||||
{
|
||||
std::cout << "Failed to create sub buffer." << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
int step = element_size, rows = size, cols = 1;
|
||||
ocl::convertFromBuffer(sub_mem, step, rows, cols,
|
||||
(use_half_) ? CV_16SC1 : CV_32FC1, sub_buffer);
|
||||
|
||||
//decrease ocl mem refcount
|
||||
clReleaseMemObject(sub_mem);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
|
||||
const UMat &weight, const UMat &bias,
|
||||
@ -938,7 +908,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
|
||||
kernel.set(argIdx++, (uint16_t)output_h_);
|
||||
if (!kernel.run_(3, config->global_work_size, config->local_work_size, false))
|
||||
{
|
||||
std::cout << "IDLF kernel run failed." << std::endl;
|
||||
CV_LOG_ERROR(NULL, "DNN/OpenCL: IDLF kernel run failed");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1012,7 +982,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
|
||||
|
||||
if (!kernel.run_(3, global_size, config->local_work_size, false))
|
||||
{
|
||||
std::cout << "GEMM like kernel run failed." << std::endl;
|
||||
CV_LOG_ERROR(NULL, "DNN/OpenCL: GEMM like kernel run failed");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1115,14 +1085,9 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
|
||||
{
|
||||
queue = cv::ocl::Queue::getDefault();
|
||||
}
|
||||
catch (const cv::Exception&)
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
static int warn_ = 0;
|
||||
if (!warn_)
|
||||
{
|
||||
std::cout << "OpenCV(ocl4dnn): Can't get OpenCL default queue for auto-tuning." << std::endl;
|
||||
warn_ = true;
|
||||
}
|
||||
CV_LOG_ONCE_ERROR(NULL, "OpenCV(ocl4dnn): Can't get OpenCL default queue for auto-tuning: " << e.what());
|
||||
return 1e6;
|
||||
}
|
||||
|
||||
@ -1326,9 +1291,9 @@ ocl::Program OCL4DNNConvSpatial<Dtype>::compileKernel()
|
||||
phash.insert(std::pair<std::string, ocl::Program>(kernel_name_, program));
|
||||
if (!program.ptr())
|
||||
{
|
||||
std::cout << "Failed to compile kernel: " << kernel_name_
|
||||
<< ", buildflags: " << options
|
||||
<< ", errmsg: " << errmsg << std::endl;
|
||||
CV_LOG_WARNING(NULL, "DNN/OpenCL: Failed to compile kernel: " << kernel_name_
|
||||
<< ", buildflags: '" << options << "', errmsg: '" << errmsg << "'"
|
||||
);
|
||||
}
|
||||
return program;
|
||||
}
|
||||
@ -1754,7 +1719,8 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
|
||||
fastestTime = kernelQueue[x]->executionTime;
|
||||
}
|
||||
}
|
||||
if (fastestKernel < 0) break;
|
||||
if (fastestKernel < 0)
|
||||
break;
|
||||
// Test fastest kernel
|
||||
bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[fastestKernel], verifyTop);
|
||||
if (verified == true) {
|
||||
@ -1913,17 +1879,18 @@ bool OCL4DNNConvSpatial<Dtype>::setupKernelByConfig(int x, int y, int z, int typ
|
||||
{
|
||||
if (z == 1)
|
||||
z = 16;
|
||||
CHECK_EQ(z == 16 || z == 8, true) << "invalid SIMD size" << std::endl;
|
||||
CV_Check(z, z == 16 || z == 8, "DNN/OpenCL: IDLF - invalid SIMD size");
|
||||
}
|
||||
kernelQueue.clear();
|
||||
createConvolutionKernel(type, x, y, z);
|
||||
if (kernelQueue.size() != 1) {
|
||||
std::cerr << "Failed setup kernel by config:"
|
||||
if (kernelQueue.size() != 1)
|
||||
{
|
||||
CV_LOG_ERROR(NULL, "DNN/OpenCL: Failed setup kernel by config: "
|
||||
<< " x = " << x
|
||||
<< " y = " << y
|
||||
<< " z = " << z
|
||||
<< " type = " << type
|
||||
<< std::endl;
|
||||
);
|
||||
return false;
|
||||
}
|
||||
bestKernelConfig = kernelQueue[0];
|
||||
@ -1955,13 +1922,9 @@ bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
|
||||
{
|
||||
if (cache_path_.empty())
|
||||
{
|
||||
static int warn_ = 0;
|
||||
if (!warn_)
|
||||
{
|
||||
std::cout << "OpenCV(ocl4dnn): consider to specify kernel configuration cache directory " << std::endl
|
||||
<< " via OPENCV_OCL4DNN_CONFIG_PATH parameter." << std::endl;
|
||||
warn_ = true;
|
||||
}
|
||||
CV_LOG_ONCE_WARNING(NULL, "OpenCV(ocl4dnn): consider to specify kernel configuration cache directory "
|
||||
"through OPENCV_OCL4DNN_CONFIG_PATH parameter."
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -161,23 +161,15 @@ __kernel void ConvolveBasic(
|
||||
const int out_idx = get_global_id(0); // 1D task layout: [output_width * output_height * OUTPUT_Z]
|
||||
const int plane_size = output_width * output_height;
|
||||
const int out_plane_idx = out_idx % plane_size;
|
||||
const int outputZ = out_idx / plane_size;
|
||||
const int outputZ = out_idx / plane_size; // kernelNum
|
||||
const int outputY = out_plane_idx / output_width;
|
||||
const int outputX = out_plane_idx % output_width;
|
||||
const int kernelNum = outputZ * ZPAR;
|
||||
if (kernelNum < OUTPUT_Z)
|
||||
if (outputZ < OUTPUT_Z)
|
||||
{
|
||||
Dtype sum[ZPAR];
|
||||
for (int kern = 0; kern < ZPAR; kern++)
|
||||
{
|
||||
sum[kern] = 0.0f;
|
||||
}
|
||||
Dtype sum = 0.0f;
|
||||
const int org_y = outputY * STRIDE_Y - pad_h;
|
||||
const int org_x = outputX * STRIDE_X - pad_w;
|
||||
const int currentKernelOffset = kernel_offset + kernelNum*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS;
|
||||
#if APPLY_BIAS
|
||||
const int biasIndex = bias_offset + kernelNum;
|
||||
#endif
|
||||
const int currentKernelOffset = kernel_offset + outputZ*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS;
|
||||
const int local_image_offset = org_y * input_width + org_x;
|
||||
const int imageSize = input_width * input_height;
|
||||
__global Dtype* image_dataPtr = (image_data + (image_offset + local_image_offset));
|
||||
@ -186,17 +178,13 @@ __kernel void ConvolveBasic(
|
||||
{
|
||||
for (int y = 0; y < KERNEL_HEIGHT; y++)
|
||||
{
|
||||
int y_ = org_y + y * DILATION_Y;
|
||||
for (int x = 0; x < KERNEL_WIDTH; x++)
|
||||
{
|
||||
int y_ = org_y + y * DILATION_Y;
|
||||
int x_ = org_x + x * DILATION_X;
|
||||
if (!(y_ >= 0 && y_ < input_height && x_ >= 0 && x_ < input_width))
|
||||
if (y_ >= 0 && y_ < input_height && x_ >= 0 && x_ < input_width)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for (int kern = 0; kern < ZPAR; kern++)
|
||||
{
|
||||
sum[kern] += image_dataPtr[x * DILATION_X] * kernel_dataPtr[kern*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS + x];
|
||||
sum = mad(image_dataPtr[x * DILATION_X], kernel_dataPtr[x], sum);
|
||||
}
|
||||
}
|
||||
image_dataPtr += input_width * DILATION_Y;
|
||||
@ -205,18 +193,13 @@ __kernel void ConvolveBasic(
|
||||
image_dataPtr += imageSize - input_width*KERNEL_HEIGHT*DILATION_Y;
|
||||
}
|
||||
|
||||
for (int kern = 0; kern < ZPAR; kern++)
|
||||
{
|
||||
if (kernelNum + kern < OUTPUT_Z)
|
||||
{
|
||||
int offset = convolved_image_offset + (kernelNum+kern)*output_height*output_width + outputY*output_width + outputX;
|
||||
int offset = convolved_image_offset + out_idx;
|
||||
#if APPLY_BIAS
|
||||
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern] + bias[biasIndex + kern], biasIndex + kern);
|
||||
int biasIndex = bias_offset + outputZ;
|
||||
ACTIVATION_FUNCTION(convolved_image, offset, sum + bias[biasIndex], biasIndex);
|
||||
#else
|
||||
ACTIVATION_FUNCTION(convolved_image, offset, sum[kern], kernelNum + kern);
|
||||
ACTIVATION_FUNCTION(convolved_image, offset, sum, outputZ);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -131,12 +131,17 @@ static void
|
||||
HarrisResponses(const Mat& img, const std::vector<Rect>& layerinfo,
|
||||
std::vector<KeyPoint>& pts, int blockSize, float harris_k)
|
||||
{
|
||||
CV_Assert( img.type() == CV_8UC1 && blockSize*blockSize <= 2048 );
|
||||
CV_CheckTypeEQ(img.type(), CV_8UC1, "");
|
||||
CV_CheckGT(blockSize, 0, "");
|
||||
CV_CheckLE(blockSize*blockSize, 2048, "");
|
||||
|
||||
size_t ptidx, ptsize = pts.size();
|
||||
|
||||
const uchar* ptr00 = img.ptr<uchar>();
|
||||
int step = (int)(img.step/img.elemSize1());
|
||||
size_t size_t_step = img.step;
|
||||
CV_CheckLE(size_t_step * blockSize + blockSize + 1, (size_t)INT_MAX, ""); // ofs computation, step+1
|
||||
int step = static_cast<int>(size_t_step);
|
||||
|
||||
int r = blockSize/2;
|
||||
|
||||
float scale = 1.f/((1 << 2) * blockSize * 255.f);
|
||||
@ -154,7 +159,7 @@ HarrisResponses(const Mat& img, const std::vector<Rect>& layerinfo,
|
||||
int y0 = cvRound(pts[ptidx].pt.y);
|
||||
int z = pts[ptidx].octave;
|
||||
|
||||
const uchar* ptr0 = ptr00 + (y0 - r + layerinfo[z].y)*step + x0 - r + layerinfo[z].x;
|
||||
const uchar* ptr0 = ptr00 + (y0 - r + layerinfo[z].y)*size_t_step + (x0 - r + layerinfo[z].x);
|
||||
int a = 0, b = 0, c = 0;
|
||||
|
||||
for( int k = 0; k < blockSize*blockSize; k++ )
|
||||
|
@ -141,5 +141,31 @@ TEST(Features2D_ORB, regression_16197)
|
||||
ASSERT_NO_THROW(orbPtr->detectAndCompute(img, noArray(), kps, fv));
|
||||
}
|
||||
|
||||
// https://github.com/opencv/opencv-python/issues/537
|
||||
BIGDATA_TEST(Features2D_ORB, regression_opencv_python_537) // memory usage: ~3 Gb
|
||||
{
|
||||
applyTestTag(
|
||||
CV_TEST_TAG_LONG,
|
||||
CV_TEST_TAG_DEBUG_VERYLONG,
|
||||
CV_TEST_TAG_MEMORY_6GB
|
||||
);
|
||||
|
||||
const int width = 25000;
|
||||
const int height = 25000;
|
||||
Mat img(Size(width, height), CV_8UC1, Scalar::all(0));
|
||||
|
||||
const int border = 23, num_lines = 23;
|
||||
for (int i = 0; i < num_lines; i++)
|
||||
{
|
||||
cv::Point2i point1(border + i * 100, border + i * 100);
|
||||
cv::Point2i point2(width - border - i * 100, height - border * i * 100);
|
||||
cv::line(img, point1, point2, 255, 1, LINE_AA);
|
||||
}
|
||||
|
||||
Ptr<ORB> orbPtr = ORB::create(31);
|
||||
std::vector<KeyPoint> kps;
|
||||
Mat fv;
|
||||
ASSERT_NO_THROW(orbPtr->detectAndCompute(img, noArray(), kps, fv));
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
@ -860,7 +860,7 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_WARN("Error opening file: " << filename << " (" << err->message << ")");
|
||||
CV_WARN("Error opening file: " << filename << " (" << (err ? err->message : "<unknown reason>") << ")");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -868,9 +868,9 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
|
||||
{
|
||||
GSafePtr<GError> err;
|
||||
uridecodebin.attach(gst_parse_launch(filename, err.getRef()));
|
||||
if (err)
|
||||
if (!uridecodebin)
|
||||
{
|
||||
CV_WARN("Error opening bin: " << err->message);
|
||||
CV_WARN("Error opening bin: " << (err ? err->message : "<unknown reason>"));
|
||||
return false;
|
||||
}
|
||||
manualpipeline = true;
|
||||
@ -2073,7 +2073,7 @@ void handleMessage(GstElement * pipeline)
|
||||
gst_message_parse_error(msg, err.getRef(), debug.getRef());
|
||||
GSafePtr<gchar> name; name.attach(gst_element_get_name(GST_MESSAGE_SRC (msg)));
|
||||
CV_WARN("Embedded video playback halted; module " << name.get() <<
|
||||
" reported: " << err->message);
|
||||
" reported: " << (err ? err->message : "<unknown reason>"));
|
||||
CV_LOG_DEBUG(NULL, "GStreamer debug: " << debug.get());
|
||||
|
||||
gst_element_set_state(GST_ELEMENT(pipeline), GST_STATE_NULL);
|
||||
|
@ -51,7 +51,7 @@
|
||||
OpenCV-TLS-getTlsStorage
|
||||
Memcheck:Leak
|
||||
...
|
||||
fun:_ZN2cvL13getTlsStorageEv
|
||||
fun:_ZN2cv*L13getTlsStorageEv
|
||||
}
|
||||
|
||||
{
|
||||
@ -159,7 +159,7 @@
|
||||
OpenCV-DNN-getLayerFactoryMutex
|
||||
Memcheck:Leak
|
||||
...
|
||||
fun:_ZN2cv3dnn*L20getLayerFactoryMutexEv
|
||||
fun:_ZN2cv3dnn*20getLayerFactoryMutexEv
|
||||
}
|
||||
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user