mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
Merge pull request #12565 from dkurt:dnn_non_intel_gpu
* Remove isIntel check from deep learning layers * Remove fp16->fp32 fallbacks where it's not necessary * Fix Kernel::run to prevent localsize > globalsize
This commit is contained in:
parent
c8f3579f93
commit
24ab751547
@ -59,7 +59,7 @@ CV_EXPORTS_W void finish();
|
||||
CV_EXPORTS bool haveSVM();
|
||||
|
||||
class CV_EXPORTS Context;
|
||||
class CV_EXPORTS Device;
|
||||
class CV_EXPORTS_W_SIMPLE Device;
|
||||
class CV_EXPORTS Kernel;
|
||||
class CV_EXPORTS Program;
|
||||
class CV_EXPORTS ProgramSource;
|
||||
@ -67,14 +67,14 @@ class CV_EXPORTS Queue;
|
||||
class CV_EXPORTS PlatformInfo;
|
||||
class CV_EXPORTS Image2D;
|
||||
|
||||
class CV_EXPORTS Device
|
||||
class CV_EXPORTS_W_SIMPLE Device
|
||||
{
|
||||
public:
|
||||
Device();
|
||||
CV_WRAP Device();
|
||||
explicit Device(void* d);
|
||||
Device(const Device& d);
|
||||
Device& operator = (const Device& d);
|
||||
~Device();
|
||||
CV_WRAP ~Device();
|
||||
|
||||
void set(void* d);
|
||||
|
||||
@ -89,24 +89,24 @@ public:
|
||||
TYPE_ALL = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
String name() const;
|
||||
String extensions() const;
|
||||
bool isExtensionSupported(const String& extensionName) const;
|
||||
String version() const;
|
||||
String vendorName() const;
|
||||
String OpenCL_C_Version() const;
|
||||
String OpenCLVersion() const;
|
||||
int deviceVersionMajor() const;
|
||||
int deviceVersionMinor() const;
|
||||
String driverVersion() const;
|
||||
CV_WRAP String name() const;
|
||||
CV_WRAP String extensions() const;
|
||||
CV_WRAP bool isExtensionSupported(const String& extensionName) const;
|
||||
CV_WRAP String version() const;
|
||||
CV_WRAP String vendorName() const;
|
||||
CV_WRAP String OpenCL_C_Version() const;
|
||||
CV_WRAP String OpenCLVersion() const;
|
||||
CV_WRAP int deviceVersionMajor() const;
|
||||
CV_WRAP int deviceVersionMinor() const;
|
||||
CV_WRAP String driverVersion() const;
|
||||
void* ptr() const;
|
||||
|
||||
int type() const;
|
||||
CV_WRAP int type() const;
|
||||
|
||||
int addressBits() const;
|
||||
bool available() const;
|
||||
bool compilerAvailable() const;
|
||||
bool linkerAvailable() const;
|
||||
CV_WRAP int addressBits() const;
|
||||
CV_WRAP bool available() const;
|
||||
CV_WRAP bool compilerAvailable() const;
|
||||
CV_WRAP bool linkerAvailable() const;
|
||||
|
||||
enum
|
||||
{
|
||||
@ -119,21 +119,21 @@ public:
|
||||
FP_SOFT_FLOAT=(1 << 6),
|
||||
FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
|
||||
};
|
||||
int doubleFPConfig() const;
|
||||
int singleFPConfig() const;
|
||||
int halfFPConfig() const;
|
||||
CV_WRAP int doubleFPConfig() const;
|
||||
CV_WRAP int singleFPConfig() const;
|
||||
CV_WRAP int halfFPConfig() const;
|
||||
|
||||
bool endianLittle() const;
|
||||
bool errorCorrectionSupport() const;
|
||||
CV_WRAP bool endianLittle() const;
|
||||
CV_WRAP bool errorCorrectionSupport() const;
|
||||
|
||||
enum
|
||||
{
|
||||
EXEC_KERNEL=(1 << 0),
|
||||
EXEC_NATIVE_KERNEL=(1 << 1)
|
||||
};
|
||||
int executionCapabilities() const;
|
||||
CV_WRAP int executionCapabilities() const;
|
||||
|
||||
size_t globalMemCacheSize() const;
|
||||
CV_WRAP size_t globalMemCacheSize() const;
|
||||
|
||||
enum
|
||||
{
|
||||
@ -141,38 +141,38 @@ public:
|
||||
READ_ONLY_CACHE=1,
|
||||
READ_WRITE_CACHE=2
|
||||
};
|
||||
int globalMemCacheType() const;
|
||||
int globalMemCacheLineSize() const;
|
||||
size_t globalMemSize() const;
|
||||
CV_WRAP int globalMemCacheType() const;
|
||||
CV_WRAP int globalMemCacheLineSize() const;
|
||||
CV_WRAP size_t globalMemSize() const;
|
||||
|
||||
size_t localMemSize() const;
|
||||
CV_WRAP size_t localMemSize() const;
|
||||
enum
|
||||
{
|
||||
NO_LOCAL_MEM=0,
|
||||
LOCAL_IS_LOCAL=1,
|
||||
LOCAL_IS_GLOBAL=2
|
||||
};
|
||||
int localMemType() const;
|
||||
bool hostUnifiedMemory() const;
|
||||
CV_WRAP int localMemType() const;
|
||||
CV_WRAP bool hostUnifiedMemory() const;
|
||||
|
||||
bool imageSupport() const;
|
||||
CV_WRAP bool imageSupport() const;
|
||||
|
||||
bool imageFromBufferSupport() const;
|
||||
CV_WRAP bool imageFromBufferSupport() const;
|
||||
uint imagePitchAlignment() const;
|
||||
uint imageBaseAddressAlignment() const;
|
||||
|
||||
/// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
|
||||
bool intelSubgroupsSupport() const;
|
||||
CV_WRAP bool intelSubgroupsSupport() const;
|
||||
|
||||
size_t image2DMaxWidth() const;
|
||||
size_t image2DMaxHeight() const;
|
||||
CV_WRAP size_t image2DMaxWidth() const;
|
||||
CV_WRAP size_t image2DMaxHeight() const;
|
||||
|
||||
size_t image3DMaxWidth() const;
|
||||
size_t image3DMaxHeight() const;
|
||||
size_t image3DMaxDepth() const;
|
||||
CV_WRAP size_t image3DMaxWidth() const;
|
||||
CV_WRAP size_t image3DMaxHeight() const;
|
||||
CV_WRAP size_t image3DMaxDepth() const;
|
||||
|
||||
size_t imageMaxBufferSize() const;
|
||||
size_t imageMaxArraySize() const;
|
||||
CV_WRAP size_t imageMaxBufferSize() const;
|
||||
CV_WRAP size_t imageMaxArraySize() const;
|
||||
|
||||
enum
|
||||
{
|
||||
@ -181,53 +181,53 @@ public:
|
||||
VENDOR_INTEL=2,
|
||||
VENDOR_NVIDIA=3
|
||||
};
|
||||
int vendorID() const;
|
||||
CV_WRAP int vendorID() const;
|
||||
// FIXIT
|
||||
// dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
|
||||
// This method should use platform name instead of vendor name.
|
||||
// After fix restore code in arithm.cpp: ocl_compare()
|
||||
inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
|
||||
inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
|
||||
inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
|
||||
CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
|
||||
CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
|
||||
CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
|
||||
|
||||
int maxClockFrequency() const;
|
||||
int maxComputeUnits() const;
|
||||
int maxConstantArgs() const;
|
||||
size_t maxConstantBufferSize() const;
|
||||
CV_WRAP int maxClockFrequency() const;
|
||||
CV_WRAP int maxComputeUnits() const;
|
||||
CV_WRAP int maxConstantArgs() const;
|
||||
CV_WRAP size_t maxConstantBufferSize() const;
|
||||
|
||||
size_t maxMemAllocSize() const;
|
||||
size_t maxParameterSize() const;
|
||||
CV_WRAP size_t maxMemAllocSize() const;
|
||||
CV_WRAP size_t maxParameterSize() const;
|
||||
|
||||
int maxReadImageArgs() const;
|
||||
int maxWriteImageArgs() const;
|
||||
int maxSamplers() const;
|
||||
CV_WRAP int maxReadImageArgs() const;
|
||||
CV_WRAP int maxWriteImageArgs() const;
|
||||
CV_WRAP int maxSamplers() const;
|
||||
|
||||
size_t maxWorkGroupSize() const;
|
||||
int maxWorkItemDims() const;
|
||||
CV_WRAP size_t maxWorkGroupSize() const;
|
||||
CV_WRAP int maxWorkItemDims() const;
|
||||
void maxWorkItemSizes(size_t*) const;
|
||||
|
||||
int memBaseAddrAlign() const;
|
||||
CV_WRAP int memBaseAddrAlign() const;
|
||||
|
||||
int nativeVectorWidthChar() const;
|
||||
int nativeVectorWidthShort() const;
|
||||
int nativeVectorWidthInt() const;
|
||||
int nativeVectorWidthLong() const;
|
||||
int nativeVectorWidthFloat() const;
|
||||
int nativeVectorWidthDouble() const;
|
||||
int nativeVectorWidthHalf() const;
|
||||
CV_WRAP int nativeVectorWidthChar() const;
|
||||
CV_WRAP int nativeVectorWidthShort() const;
|
||||
CV_WRAP int nativeVectorWidthInt() const;
|
||||
CV_WRAP int nativeVectorWidthLong() const;
|
||||
CV_WRAP int nativeVectorWidthFloat() const;
|
||||
CV_WRAP int nativeVectorWidthDouble() const;
|
||||
CV_WRAP int nativeVectorWidthHalf() const;
|
||||
|
||||
int preferredVectorWidthChar() const;
|
||||
int preferredVectorWidthShort() const;
|
||||
int preferredVectorWidthInt() const;
|
||||
int preferredVectorWidthLong() const;
|
||||
int preferredVectorWidthFloat() const;
|
||||
int preferredVectorWidthDouble() const;
|
||||
int preferredVectorWidthHalf() const;
|
||||
CV_WRAP int preferredVectorWidthChar() const;
|
||||
CV_WRAP int preferredVectorWidthShort() const;
|
||||
CV_WRAP int preferredVectorWidthInt() const;
|
||||
CV_WRAP int preferredVectorWidthLong() const;
|
||||
CV_WRAP int preferredVectorWidthFloat() const;
|
||||
CV_WRAP int preferredVectorWidthDouble() const;
|
||||
CV_WRAP int preferredVectorWidthHalf() const;
|
||||
|
||||
size_t printfBufferSize() const;
|
||||
size_t profilingTimerResolution() const;
|
||||
CV_WRAP size_t printfBufferSize() const;
|
||||
CV_WRAP size_t profilingTimerResolution() const;
|
||||
|
||||
static const Device& getDefault();
|
||||
CV_WRAP static const Device& getDefault();
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
|
@ -3078,7 +3078,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||
dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
|
||||
CV_Assert( val > 0 );
|
||||
total *= _globalsize[i];
|
||||
if (_globalsize[i] == 1)
|
||||
if (_globalsize[i] == 1 && !_localsize)
|
||||
val = 1;
|
||||
globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
|
||||
}
|
||||
|
@ -145,6 +145,21 @@ OCL_INSTANTIATE_TEST_CASE_P(Core, Gemm, ::testing::Combine(
|
||||
testing::Values(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2),
|
||||
Bool(), Bool(), Bool(), Bool()));
|
||||
|
||||
// Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize
|
||||
OCL_TEST(Gemm, small)
|
||||
{
|
||||
UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F);
|
||||
Mat C(2, 4, CV_32F);
|
||||
|
||||
randu(A, -1, 1);
|
||||
randu(B, -1, 1);
|
||||
|
||||
OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T));
|
||||
OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T));
|
||||
|
||||
EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5);
|
||||
}
|
||||
|
||||
} } // namespace opencv_test::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -1078,12 +1078,22 @@ struct Net::Impl
|
||||
}
|
||||
#else
|
||||
{
|
||||
if (!DNN_OPENCL_ALLOW_ALL_DEVICES
|
||||
&& !(ocl::Device::getDefault().isIntel() && ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU) // Current implementation is only valid for Intel GPU (#11494)
|
||||
)
|
||||
if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with Intel GPUs only), switching to CPU.");
|
||||
preferableTarget = DNN_TARGET_CPU;
|
||||
// Current implementation is only valid for GPU (#11494)
|
||||
if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
|
||||
preferableTarget = DNN_TARGET_CPU;
|
||||
}
|
||||
else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
|
||||
{
|
||||
CV_LOG_WARNING(NULL,
|
||||
"DNN: OpenCL target with fp16 precision is not supported "
|
||||
"with current OpenCL device (tested with Intel GPUs only), "
|
||||
"switching to OpenCL with fp32 precision.");
|
||||
preferableTarget = DNN_TARGET_OPENCL;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -230,8 +230,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -95,16 +95,9 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
@ -237,16 +237,9 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
@ -1529,8 +1529,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr));
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -137,12 +137,6 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
@ -415,8 +415,7 @@ public:
|
||||
|
||||
if (_bboxesNormalized)
|
||||
{
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
}
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -354,8 +354,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -135,16 +135,9 @@ public:
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
outputs_arr.isUMatVector() &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
outputs_arr.isUMatVector(),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
@ -389,8 +389,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -148,8 +148,7 @@ public:
|
||||
|
||||
CV_Assert(inputs_arr.total() == outputs_arr.total());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -184,8 +184,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -99,19 +99,21 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
if (paddingType == "constant")
|
||||
{
|
||||
outputs[0].setTo(paddingValue);
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
std::vector<float> paddingValue_fp32(1, paddingValue);
|
||||
std::vector<int16_t> paddingValue_fp16(1);
|
||||
convertFp16(paddingValue_fp32, paddingValue_fp16);
|
||||
outputs[0].setTo(paddingValue_fp16[0]);
|
||||
}
|
||||
else
|
||||
outputs[0].setTo(paddingValue);
|
||||
inputs[0].copyTo(outputs[0](dstRanges));
|
||||
}
|
||||
else if (paddingType == "reflect")
|
||||
|
@ -304,8 +304,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -402,8 +402,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -196,8 +196,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -160,8 +160,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -233,16 +233,9 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
@ -92,8 +92,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -239,16 +239,9 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
@ -187,8 +187,7 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
|
@ -83,12 +83,6 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
@ -69,9 +69,6 @@ bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
if (!ocl::Device::getDefault().intelSubgroupsSupport())
|
||||
return false;
|
||||
|
||||
switch (lrn_type_)
|
||||
{
|
||||
case LRNParameter_NormRegion_ACROSS_CHANNELS:
|
||||
|
@ -114,6 +114,6 @@ __kernel void clip(const int nthreads,
|
||||
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
|
||||
{
|
||||
Dtype4 vec = vload4(index, dst);
|
||||
vstore4(clamp(vec, 0, 1), index, dst);
|
||||
vstore4(clamp(vec, 0.0f, 1.0f), index, dst);
|
||||
}
|
||||
}
|
||||
|
@ -295,7 +295,7 @@ TEST_P(Test_ONNX_nets, TinyYolov2)
|
||||
TEST_P(Test_ONNX_nets, CNN_MNIST)
|
||||
{
|
||||
// output range: [-1952; 6574]
|
||||
const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 3.82 : 4.3e-4;
|
||||
const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 3.82 : 4.4e-4;
|
||||
const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 13.5 : 2e-3;
|
||||
|
||||
testONNXModels("cnn_mnist", pb, l1, lInf);
|
||||
@ -341,7 +341,7 @@ TEST_P(Test_ONNX_nets, Inception_v2)
|
||||
TEST_P(Test_ONNX_nets, DenseNet121)
|
||||
{
|
||||
// output range: [-87; 138]
|
||||
const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.12 : 1.88e-5;
|
||||
const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.12 : 2.2e-5;
|
||||
const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.74 : 1.23e-4;
|
||||
testONNXModels("densenet121", pb, l1, lInf);
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ if haveInfEngine:
|
||||
if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
|
||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
|
||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
||||
if haveInfEngine: # FIXIT Check Intel iGPU only
|
||||
if haveInfEngine and cv.ocl_Device.getDefault().isIntel():
|
||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
|
||||
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user