diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 888477e154..5e6b247f8c 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -59,7 +59,7 @@ CV_EXPORTS_W void finish(); CV_EXPORTS bool haveSVM(); class CV_EXPORTS Context; -class CV_EXPORTS Device; +class CV_EXPORTS_W_SIMPLE Device; class CV_EXPORTS Kernel; class CV_EXPORTS Program; class CV_EXPORTS ProgramSource; @@ -67,14 +67,14 @@ class CV_EXPORTS Queue; class CV_EXPORTS PlatformInfo; class CV_EXPORTS Image2D; -class CV_EXPORTS Device +class CV_EXPORTS_W_SIMPLE Device { public: - Device(); + CV_WRAP Device(); explicit Device(void* d); Device(const Device& d); Device& operator = (const Device& d); - ~Device(); + CV_WRAP ~Device(); void set(void* d); @@ -89,24 +89,24 @@ public: TYPE_ALL = 0xFFFFFFFF }; - String name() const; - String extensions() const; - bool isExtensionSupported(const String& extensionName) const; - String version() const; - String vendorName() const; - String OpenCL_C_Version() const; - String OpenCLVersion() const; - int deviceVersionMajor() const; - int deviceVersionMinor() const; - String driverVersion() const; + CV_WRAP String name() const; + CV_WRAP String extensions() const; + CV_WRAP bool isExtensionSupported(const String& extensionName) const; + CV_WRAP String version() const; + CV_WRAP String vendorName() const; + CV_WRAP String OpenCL_C_Version() const; + CV_WRAP String OpenCLVersion() const; + CV_WRAP int deviceVersionMajor() const; + CV_WRAP int deviceVersionMinor() const; + CV_WRAP String driverVersion() const; void* ptr() const; - int type() const; + CV_WRAP int type() const; - int addressBits() const; - bool available() const; - bool compilerAvailable() const; - bool linkerAvailable() const; + CV_WRAP int addressBits() const; + CV_WRAP bool available() const; + CV_WRAP bool compilerAvailable() const; + CV_WRAP bool linkerAvailable() const; enum { @@ -119,21 +119,21 @@ public: FP_SOFT_FLOAT=(1 << 6), FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) }; - int doubleFPConfig() const; - int singleFPConfig() const; - int halfFPConfig() const; + CV_WRAP int doubleFPConfig() const; + CV_WRAP int singleFPConfig() const; + CV_WRAP int halfFPConfig() const; - bool endianLittle() const; - bool errorCorrectionSupport() const; + CV_WRAP bool endianLittle() const; + CV_WRAP bool errorCorrectionSupport() const; enum { EXEC_KERNEL=(1 << 0), EXEC_NATIVE_KERNEL=(1 << 1) }; - int executionCapabilities() const; + CV_WRAP int executionCapabilities() const; - size_t globalMemCacheSize() const; + CV_WRAP size_t globalMemCacheSize() const; enum { @@ -141,38 +141,38 @@ public: READ_ONLY_CACHE=1, READ_WRITE_CACHE=2 }; - int globalMemCacheType() const; - int globalMemCacheLineSize() const; - size_t globalMemSize() const; + CV_WRAP int globalMemCacheType() const; + CV_WRAP int globalMemCacheLineSize() const; + CV_WRAP size_t globalMemSize() const; - size_t localMemSize() const; + CV_WRAP size_t localMemSize() const; enum { NO_LOCAL_MEM=0, LOCAL_IS_LOCAL=1, LOCAL_IS_GLOBAL=2 }; - int localMemType() const; - bool hostUnifiedMemory() const; + CV_WRAP int localMemType() const; + CV_WRAP bool hostUnifiedMemory() const; - bool imageSupport() const; + CV_WRAP bool imageSupport() const; - bool imageFromBufferSupport() const; + CV_WRAP bool imageFromBufferSupport() const; uint imagePitchAlignment() const; uint imageBaseAddressAlignment() const; /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value) - bool intelSubgroupsSupport() const; + CV_WRAP bool intelSubgroupsSupport() const; - size_t image2DMaxWidth() const; - size_t image2DMaxHeight() const; + CV_WRAP size_t image2DMaxWidth() const; + CV_WRAP size_t image2DMaxHeight() const; - size_t image3DMaxWidth() const; - size_t image3DMaxHeight() const; - size_t image3DMaxDepth() const; + CV_WRAP size_t image3DMaxWidth() const; + CV_WRAP size_t image3DMaxHeight() const; + CV_WRAP size_t image3DMaxDepth() const; - size_t imageMaxBufferSize() const; - size_t imageMaxArraySize() const; + CV_WRAP size_t imageMaxBufferSize() const; + CV_WRAP size_t imageMaxArraySize() const; enum { @@ -181,53 +181,53 @@ public: VENDOR_INTEL=2, VENDOR_NVIDIA=3 }; - int vendorID() const; + CV_WRAP int vendorID() const; // FIXIT // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform. // This method should use platform name instead of vendor name. // After fix restore code in arithm.cpp: ocl_compare() - inline bool isAMD() const { return vendorID() == VENDOR_AMD; } - inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } - inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } + CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; } + CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } + CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } - int maxClockFrequency() const; - int maxComputeUnits() const; - int maxConstantArgs() const; - size_t maxConstantBufferSize() const; + CV_WRAP int maxClockFrequency() const; + CV_WRAP int maxComputeUnits() const; + CV_WRAP int maxConstantArgs() const; + CV_WRAP size_t maxConstantBufferSize() const; - size_t maxMemAllocSize() const; - size_t maxParameterSize() const; + CV_WRAP size_t maxMemAllocSize() const; + CV_WRAP size_t maxParameterSize() const; - int maxReadImageArgs() const; - int maxWriteImageArgs() const; - int maxSamplers() const; + CV_WRAP int maxReadImageArgs() const; + CV_WRAP int maxWriteImageArgs() const; + CV_WRAP int maxSamplers() const; - size_t maxWorkGroupSize() const; - int maxWorkItemDims() const; + CV_WRAP size_t maxWorkGroupSize() const; + CV_WRAP int maxWorkItemDims() const; void maxWorkItemSizes(size_t*) const; - int memBaseAddrAlign() const; + CV_WRAP int memBaseAddrAlign() const; - int nativeVectorWidthChar() const; - int nativeVectorWidthShort() const; - int nativeVectorWidthInt() const; - int nativeVectorWidthLong() const; - int nativeVectorWidthFloat() const; - int nativeVectorWidthDouble() const; - int nativeVectorWidthHalf() const; + CV_WRAP int nativeVectorWidthChar() const; + CV_WRAP int nativeVectorWidthShort() const; + CV_WRAP int nativeVectorWidthInt() const; + CV_WRAP int nativeVectorWidthLong() const; + CV_WRAP int nativeVectorWidthFloat() const; + CV_WRAP int nativeVectorWidthDouble() const; + CV_WRAP int nativeVectorWidthHalf() const; - int preferredVectorWidthChar() const; - int preferredVectorWidthShort() const; - int preferredVectorWidthInt() const; - int preferredVectorWidthLong() const; - int preferredVectorWidthFloat() const; - int preferredVectorWidthDouble() const; - int preferredVectorWidthHalf() const; + CV_WRAP int preferredVectorWidthChar() const; + CV_WRAP int preferredVectorWidthShort() const; + CV_WRAP int preferredVectorWidthInt() const; + CV_WRAP int preferredVectorWidthLong() const; + CV_WRAP int preferredVectorWidthFloat() const; + CV_WRAP int preferredVectorWidthDouble() const; + CV_WRAP int preferredVectorWidthHalf() const; - size_t printfBufferSize() const; - size_t profilingTimerResolution() const; + CV_WRAP size_t printfBufferSize() const; + CV_WRAP size_t profilingTimerResolution() const; - static const Device& getDefault(); + CV_WRAP static const Device& getDefault(); protected: struct Impl; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 96e5881a29..e5d047fc88 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3078,7 +3078,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1; CV_Assert( val > 0 ); total *= _globalsize[i]; - if (_globalsize[i] == 1) + if (_globalsize[i] == 1 && !_localsize) val = 1; globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val; } diff --git a/modules/core/test/ocl/test_gemm.cpp b/modules/core/test/ocl/test_gemm.cpp index f8aebead91..825b506780 100644 --- a/modules/core/test/ocl/test_gemm.cpp +++ b/modules/core/test/ocl/test_gemm.cpp @@ -145,6 +145,21 @@ OCL_INSTANTIATE_TEST_CASE_P(Core, Gemm, ::testing::Combine( testing::Values(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2), Bool(), Bool(), Bool(), Bool())); +// Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize +OCL_TEST(Gemm, small) +{ + UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F); + Mat C(2, 4, CV_32F); + + randu(A, -1, 1); + randu(B, -1, 1); + + OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T)); + OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T)); + + EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5); +} + } } // namespace opencv_test::ocl #endif // HAVE_OPENCL diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 16fb06cca5..8e5f478b00 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1078,12 +1078,22 @@ struct Net::Impl } #else { - if (!DNN_OPENCL_ALLOW_ALL_DEVICES - && !(ocl::Device::getDefault().isIntel() && ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU) // Current implementation is only valid for Intel GPU (#11494) - ) + if (!DNN_OPENCL_ALLOW_ALL_DEVICES) { - CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with Intel GPUs only), switching to CPU."); - preferableTarget = DNN_TARGET_CPU; + // Current implementation is only valid for GPU (#11494) + if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU) + { + CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU."); + preferableTarget = DNN_TARGET_CPU; + } + else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel()) + { + CV_LOG_WARNING(NULL, + "DNN: OpenCL target with fp16 precision is not supported " + "with current OpenCL device (tested with Intel GPUs only), " + "switching to OpenCL with fp32 precision."); + preferableTarget = DNN_TARGET_OPENCL; + } } } #endif diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 6762ff8546..c0e8159532 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -230,8 +230,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 8f8e66d761..b85621f9a0 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -95,16 +95,9 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 92e5421db9..76468b12f6 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -237,16 +237,9 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index a948c6ef9d..38d56180c7 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -1529,8 +1529,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)); if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/crop_layer.cpp b/modules/dnn/src/layers/crop_layer.cpp index f1c41c4036..fb878bb5e6 100644 --- a/modules/dnn/src/layers/crop_layer.cpp +++ b/modules/dnn/src/layers/crop_layer.cpp @@ -137,12 +137,6 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 58c332ad53..8b3b9449d8 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -415,8 +415,7 @@ public: if (_bboxesNormalized) { - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) } if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 9c6a681467..341aea268f 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -354,8 +354,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index bda9ba46a1..632cb7aace 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -135,16 +135,9 @@ public: CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - outputs_arr.isUMatVector() && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + outputs_arr.isUMatVector(), forward_ocl(inputs_arr, outputs_arr, internals_arr)) - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index d2c609cf77..930ce2a4ce 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -389,8 +389,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 5af2359bb8..cc2955bbc0 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -148,8 +148,7 @@ public: CV_Assert(inputs_arr.total() == outputs_arr.total()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index 694d3d1039..b3ca64f24a 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -184,8 +184,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index 7aa12d7748..b837d4ccd5 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -99,19 +99,21 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); if (paddingType == "constant") { - outputs[0].setTo(paddingValue); + if (inputs_arr.depth() == CV_16S) + { + std::vector paddingValue_fp32(1, paddingValue); + std::vector paddingValue_fp16(1); + convertFp16(paddingValue_fp32, paddingValue_fp16); + outputs[0].setTo(paddingValue_fp16[0]); + } + else + outputs[0].setTo(paddingValue); inputs[0].copyTo(outputs[0](dstRanges)); } else if (paddingType == "reflect") diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 65e4f049e3..ac294a9a10 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -304,8 +304,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp index fde41201d5..fbe631fff4 100644 --- a/modules/dnn/src/layers/prior_box_layer.cpp +++ b/modules/dnn/src/layers/prior_box_layer.cpp @@ -402,8 +402,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 2d74443e08..f721d409a9 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -196,8 +196,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp index 6f0d55cd2f..a98f690e65 100644 --- a/modules/dnn/src/layers/reorg_layer.cpp +++ b/modules/dnn/src/layers/reorg_layer.cpp @@ -160,8 +160,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index d56507e0f6..ec1f8cf4a8 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -233,16 +233,9 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); diff --git a/modules/dnn/src/layers/shuffle_channel_layer.cpp b/modules/dnn/src/layers/shuffle_channel_layer.cpp index c4c04786b1..44987f6390 100644 --- a/modules/dnn/src/layers/shuffle_channel_layer.cpp +++ b/modules/dnn/src/layers/shuffle_channel_layer.cpp @@ -92,8 +92,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index e24842f9de..4818d9dfc7 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -239,16 +239,9 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 9f7a0ac920..f1872a0026 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -187,8 +187,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) diff --git a/modules/dnn/src/layers/split_layer.cpp b/modules/dnn/src/layers/split_layer.cpp index 2fe5df1509..b0ea1aed01 100644 --- a/modules/dnn/src/layers/split_layer.cpp +++ b/modules/dnn/src/layers/split_layer.cpp @@ -83,12 +83,6 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - if (inputs_arr.depth() == CV_16S) - { - forward_fallback(inputs_arr, outputs_arr, internals_arr); - return; - } - std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp index b0fcfa9f0b..8f946251e4 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp @@ -69,9 +69,6 @@ bool OCL4DNNLRN::Forward(const UMat& bottom, UMat& top) { bool ret = true; - if (!ocl::Device::getDefault().intelSubgroupsSupport()) - return false; - switch (lrn_type_) { case LRNParameter_NormRegion_ACROSS_CHANNELS: diff --git a/modules/dnn/src/opencl/prior_box.cl b/modules/dnn/src/opencl/prior_box.cl index d898a13ffd..a3f1161f30 100644 --- a/modules/dnn/src/opencl/prior_box.cl +++ b/modules/dnn/src/opencl/prior_box.cl @@ -114,6 +114,6 @@ __kernel void clip(const int nthreads, for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) { Dtype4 vec = vload4(index, dst); - vstore4(clamp(vec, 0, 1), index, dst); + vstore4(clamp(vec, 0.0f, 1.0f), index, dst); } } diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 85405803d6..9415bea754 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -295,7 +295,7 @@ TEST_P(Test_ONNX_nets, TinyYolov2) TEST_P(Test_ONNX_nets, CNN_MNIST) { // output range: [-1952; 6574] - const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 3.82 : 4.3e-4; + const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 3.82 : 4.4e-4; const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 13.5 : 2e-3; testONNXModels("cnn_mnist", pb, l1, lInf); @@ -341,7 +341,7 @@ TEST_P(Test_ONNX_nets, Inception_v2) TEST_P(Test_ONNX_nets, DenseNet121) { // output range: [-87; 138] - const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.12 : 1.88e-5; + const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.12 : 2.2e-5; const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.74 : 1.23e-4; testONNXModels("densenet121", pb, l1, lInf); } diff --git a/modules/python/test/test_dnn.py b/modules/python/test/test_dnn.py index a1b55f4358..544ecbd20e 100644 --- a/modules/python/test/test_dnn.py +++ b/modules/python/test/test_dnn.py @@ -95,7 +95,7 @@ if haveInfEngine: if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL(): dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL]) dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16]) - if haveInfEngine: # FIXIT Check Intel iGPU only + if haveInfEngine and cv.ocl_Device.getDefault().isIntel(): dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL]) dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])