diff --git a/CMakeLists.txt b/CMakeLists.txt index 64d89ed60b..caa8f83c99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,10 @@ if(POLICY CMP0077) cmake_policy(SET CMP0077 NEW) # CMake 3.13+: option() honors normal variables. endif() +if(POLICY CMP0146) + cmake_policy(SET CMP0146 OLD) # CMake 3.27+: use CMake FindCUDA if available. +endif() + # # Configure OpenCV CMake hooks # diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index d5a9bb34af..261bc5c3ca 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -643,4 +643,69 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); + +typedef TestBaseWithParam > > Layer_FullyConnected; +PERF_TEST_P_(Layer_FullyConnected, fc) +{ + std::vector inpShape; + inpShape.reserve(4); + for (int i = 0; i < 4; ++i) { + int dim = get<0>(GetParam())[i]; + if (dim == 0) + break; + inpShape.push_back(dim); + } + Mat input(inpShape, CV_32F); + randn(input, 0, 1); + + int axis = input.dims - 1; + int outDims = get<1>(GetParam()); + bool isMatMul = get<2>(GetParam()); + int backendId = get<0>(get<3>(GetParam())); + int targetId = get<1>(get<3>(GetParam())); + + std::vector weightShape; + if (isMatMul) { + weightShape = inpShape; + weightShape[weightShape.size() - 2] = outDims; + } else { + weightShape = {outDims, (int)input.total(axis, input.dims)}; + } + Mat weights(weightShape, CV_32F); + randn(weights, 0, 1); + + LayerParams lp; + lp.set("axis", input.dims - 1); + lp.set("is_matmul", weights.dims > 2); + lp.set("bias_term", false); + lp.set("transB", true); + lp.set("num_output", (int)weights.total(0, weights.dims - 1)); + lp.blobs.resize(1, weights); + + Net net; + net.addLayerToPrev("matmul", "InnerProduct", lp); + + net.setInput(input); + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + + // warmup + Mat output = net.forward(); + + TEST_CYCLE() + { + net.forward(); + } + SANITY_CHECK_NOTHING(); +} +INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine( + Values( // input size + Vec4i(5, 512, 384), + Vec4i(5, 16, 512, 128) + ), + Values(256, 512, 1024), // output dimension + testing::Bool(), // is_matmul + dnnBackendsAndTargets() +)); + } // namespace diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index e12457a164..e983c95a91 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -248,6 +248,11 @@ void selu(const Stream& stream, Span output, View input, T alpha, T gamma) generic_op>(stream, output, input, {alpha, gamma}); } +template +void gelu(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + template void sign(const Stream& stream, Span output, View input) { generic_op>(stream, output, input); @@ -324,6 +329,7 @@ template void tan<__half>(const Stream&, Span<__half>, View<__half>); template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); +template void gelu<__half>(const Stream&, Span<__half>, View<__half>); template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); @@ -366,6 +372,7 @@ template void tan(const Stream&, Span, View); template void celu(const Stream&, Span, View, float); template void hardsigmoid(const Stream&, Span, View, float, float); template void selu(const Stream&, Span, View, float, float); +template void gelu(const Stream&, Span, View); template void thresholdedrelu(const Stream&, Span, View, float); template void power(const Stream&, Span, View, float, float, float); template void exp(const Stream&, Span, View, float, float); diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 83a949f8e7..3e487cd98a 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -588,6 +588,21 @@ struct SeluFunctor { T alpha, gamma; }; +template +struct GeluFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() { } + }; + + CUDA4DNN_DEVICE GeluFunctor() { } + CUDA4DNN_DEVICE GeluFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::erf; + return static_cast(0.5f) * value * (static_cast(1.f) + erf(value * static_cast(M_SQRT1_2))); + } +}; + template struct ThresholdedReluFunctor { struct Params { diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp index 6958b93d5e..fad549a083 100644 --- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp @@ -114,6 +114,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void selu(const csl::Stream& stream, csl::Span output, csl::View input, T alpha, T gamma); + template + void gelu(const csl::Stream& stream, csl::Span output, csl::View input); + template void thresholdedrelu(const csl::Stream& stream, csl::Span output, csl::View input, T alpha); diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp index 564202e8c0..c10f9014a5 100644 --- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp @@ -537,6 +537,20 @@ namespace cv { namespace dnn { namespace cuda4dnn { const T alpha, gamma; }; + template + class GeluOp final : public BaseOp { + public: + GeluOp(csl::Stream stream_) : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::gelu(stream, output, input); + } + + private: + csl::Stream stream; + }; + template class ThresholdedReluOp final : public BaseOp { public: diff --git a/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp b/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp index f067dddaa7..91ff33f817 100644 --- a/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp @@ -111,7 +111,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { * or there might be several weights * or we don't have to scale */ - if (weight != 1.0) + if (weight != static_cast(1.0f)) { kernels::scale1_with_bias1(stream, output, input, weight, 1.0); } diff --git a/modules/dnn/src/cuda4dnn/primitives/region.hpp b/modules/dnn/src/cuda4dnn/primitives/region.hpp index d22d44214e..3af05155fe 100644 --- a/modules/dnn/src/cuda4dnn/primitives/region.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/region.hpp @@ -121,7 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { new_coords ); - if (nms_iou_threshold > 0) { + if (nms_iou_threshold > static_cast(0.0f)) { auto output_mat = output_wrapper->getMutableHostMat(); CV_Assert(output_mat.type() == CV_32F); for (int i = 0; i < input.get_axis_size(0); i++) { diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index a49976de74..140d4b0d2f 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -446,66 +446,6 @@ void InfEngineNgraphNet::addOutput(const Ptr& node) requestedOutputs.insert({name, node.get()}); } -void InfEngineNgraphNet::setNodePtr(std::shared_ptr* ptr) { - all_nodes.emplace((*ptr)->get_friendly_name(), ptr); -} - - void InfEngineNgraphNet::release() - { - // FIXIT release should not be conditional, release ALL - for (auto& node : components.back()) { -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) { -#else - if (!(node->is_parameter() || node->is_output() || node->is_constant()) ) { -#endif - auto it = all_nodes.find(node->get_friendly_name()); - if (it != all_nodes.end()) { - it->second->reset(); - all_nodes.erase(it); - } - } - } - } - -void InfEngineNgraphNet::dfs(std::shared_ptr& node, - std::vector>& comp, - std::unordered_map& used) { - used[node->get_friendly_name()] = true; - comp.push_back(node); - auto inputs = node->get_users(); - for (size_t i = 0; i < node->get_input_size(); ++i) { - inputs.push_back(node->input_value(i).get_node()->shared_from_this()); - } - - for (auto& to : inputs) { - if (!used[to->get_friendly_name()]) { - dfs(to, comp, used); - } - } -} - -int InfEngineNgraphNet::getNumComponents() -{ - if (!components.empty()) { - return components.size(); - } - std::unordered_map used; - auto inputs = ngraph_function->get_ordered_ops(); - for (auto& node : inputs) { - used.emplace(node->get_friendly_name(), false); - } - - for (auto& node : inputs) { - if (!used[node->get_friendly_name()]) { - std::vector> current_comp; - dfs(node, current_comp, used); - components.push_back(current_comp); - } - } - return components.size(); -} - void InfEngineNgraphNet::createNet(Target targetId) { if (!hasNetOwner) { @@ -524,46 +464,7 @@ void InfEngineNgraphNet::createNet(Target targetId) { } CV_Assert_N(!inputs_vec.empty(), !outs.empty()); ngraph_function = std::make_shared(outs, inputs_vec); - - int num_comp = getNumComponents(); - CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp); - if (num_comp > 1) { - for (int i = num_comp - 1; i >= 0; --i) { - ngraph::ResultVector outputs; - ngraph::ParameterVector inps; - for (auto& node : components.back()) { -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - if (ngraph::op::is_parameter(node)) { -#else - if (node->is_parameter()) { -#endif - CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'"); - auto parameter = std::dynamic_pointer_cast(node); - inps.push_back(parameter); - } -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - else if (ngraph::op::is_output(node)) { -#else - else if (node->is_output()) { -#endif - CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'"); - auto result = std::dynamic_pointer_cast(node); - outputs.push_back(result); - } - } - CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size()); - isInit = false; - CV_Assert_N(!inps.empty(), !outputs.empty()); - ngraph_function = std::make_shared(outputs, inps); - release(); - components.pop_back(); - init(targetId); - } - } else { - release(); - components.clear(); - init(targetId); - } + init(targetId); } } diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp index 09afc7f117..7bb0ac09df 100644 --- a/modules/dnn/src/ie_ngraph.hpp +++ b/modules/dnn/src/ie_ngraph.hpp @@ -50,22 +50,14 @@ public: void addBlobs(const std::vector >& ptrs); void createNet(Target targetId); - void setNodePtr(std::shared_ptr* ptr); void reset(); //private: detail::NetImplBase& netImpl_; - void release(); - int getNumComponents(); - void dfs(std::shared_ptr& node, std::vector>& comp, - std::unordered_map& used); - ngraph::ParameterVector inputs_vec; std::shared_ptr ngraph_function; - std::vector>> components; - std::unordered_map* > all_nodes; InferenceEngine::ExecutableNetwork netExec; #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 61d4f44432..26f483a770 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -221,7 +221,7 @@ public: { return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_CUDA && !_groupByClasses) || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized); + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } bool getMemoryShapes(const std::vector &inputs, @@ -1006,9 +1006,30 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { CV_Assert(nodes.size() == 3); - auto& box_logits = nodes[0].dynamicCast()->node; - auto& class_preds = nodes[1].dynamicCast()->node; - auto& proposals = nodes[2].dynamicCast()->node; + auto box_logits = nodes[0].dynamicCast()->node; + auto class_preds = nodes[1].dynamicCast()->node; + auto proposals = nodes[2].dynamicCast()->node; + + if (_locPredTransposed) { + // Convert box predictions from yxYX to xyXY + box_logits = std::make_shared(box_logits, + std::make_shared(ngraph::element::i32, ngraph::Shape{3}, std::vector{0, -1, 2}), + true + ); + int axis = 2; + box_logits = std::make_shared(box_logits, + std::make_shared(ngraph::element::i32, ngraph::Shape{1}, &axis), + ngraph::op::v1::Reverse::Mode::INDEX + ); + } + + auto shape = std::make_shared(ngraph::element::i32, ngraph::Shape{2}, std::vector{0, -1}); + box_logits = std::make_shared(box_logits, shape, true); + class_preds = std::make_shared(class_preds, shape, true); + proposals = std::make_shared(proposals, + std::make_shared(ngraph::element::i32, ngraph::Shape{3}, std::vector{0, _varianceEncodedInTarget ? 1 : 2, -1}), + true + ); ngraph::op::DetectionOutputAttrs attrs; attrs.num_classes = _numClasses; diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 2a34b9400b..3bcd53f95c 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -821,7 +821,7 @@ struct GeluFunctor : public BaseDefaultFunctor bool supportBackend(int backendId, int) { - return backendId == DNN_BACKEND_OPENCV; + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; } inline float calculate(float x) const @@ -829,6 +829,13 @@ struct GeluFunctor : public BaseDefaultFunctor return 0.5f * x * (1.0f + erf(x * M_SQRT1_2)); } +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); + } +#endif + int64 getFLOPSPerElement() const { return 100; } }; diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index e0fdac1039..9cdb31023c 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -180,15 +180,12 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { bool tranAorB = transA || transB; -#ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return axis == 1 && !tranAorB; -#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) || (backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) || backendId == DNN_BACKEND_CANN || + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !tranAorB); } @@ -630,8 +627,10 @@ public: if(input_wrapper->getRank() == inp2Dim) return make_cuda_node(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB); - else + else { + CV_LOG_INFO(NULL, "DNN/CUDA: no implementation for MatMul with rank " << input_wrapper->getRank()); return Ptr(); + } } auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank()); @@ -800,17 +799,26 @@ public: if (nodes.size() == 2) { auto& inp2 = nodes[1].dynamicCast()->node; - matmul = std::make_shared(ieInpNode, inp2, false, false); + matmul = std::make_shared(ieInpNode, inp2, transA, transB); } else { - std::vector data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]}; - auto new_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, data.data()); - auto inp = std::make_shared(ieInpNode, new_shape, true); + std::vector shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0); + shape[shape.size() - 1] = -1; + auto inp = std::make_shared( + ieInpNode, + std::make_shared(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()), + true + ); - std::vector weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]}; + std::vector weight_shape; + if (isMatMul) { + weight_shape = getShape(oriMat); + } else { + weight_shape = {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]}; + } auto ieWeights = std::make_shared(ngraph::element::f32, weight_shape, blobs[0].data); - matmul = std::make_shared(inp, ieWeights, false, true); + matmul = std::make_shared(inp, ieWeights, transA, transB); } if (bias) { diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp index fd47c4c919..6a599408e1 100644 --- a/modules/dnn/src/layers/max_unpooling_layer.cpp +++ b/modules/dnn/src/layers/max_unpooling_layer.cpp @@ -13,6 +13,7 @@ Implementation of Batch Normalization layer. #include "layers_common.hpp" #include "../op_cuda.hpp" #include "../op_halide.hpp" +#include "../ie_ngraph.hpp" #include #include @@ -41,6 +42,7 @@ public: { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || (backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height); } @@ -181,6 +183,50 @@ public: #endif // HAVE_HALIDE return Ptr(); } + +#ifdef HAVE_DNN_NGRAPH + virtual Ptr initNgraph(const std::vector >& inputs, + const std::vector >& nodes) CV_OVERRIDE + { + auto features = nodes[0].dynamicCast()->node; + auto indices = nodes[1].dynamicCast()->node; + + std::vector inpShapes(nodes.size()); + std::vector outShapes, internals; + for (int i = 0; i < nodes.size(); ++i) { + std::vector shape = nodes[i].dynamicCast()->node->get_shape(); + inpShapes[i] = std::vector(shape.begin(), shape.end()); + } + getMemoryShapes(inpShapes, 1, outShapes, internals); + + Mat zeros = Mat::zeros(1, total(outShapes[0]), CV_32F); + auto zeroInp = std::make_shared(ngraph::element::f32, ngraph::Shape{zeros.total()}, zeros.data); + + int newShape = -1; + features = std::make_shared( + features, + std::make_shared(ngraph::element::i32, ngraph::Shape{1}, &newShape), + true + ); + indices = std::make_shared( + indices, + std::make_shared(ngraph::element::i32, ngraph::Shape{1}, &newShape), + true + ); + if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) { + indices = std::make_shared(indices, ngraph::element::i64); + } + + int axis = 0; + std::shared_ptr unpool = std::make_shared(zeroInp, indices, features, + std::make_shared(ngraph::element::i32, ngraph::Shape{1}, &axis)); + + auto shape = std::make_shared(ngraph::element::i32, ngraph::Shape{outShapes[0].size()}, outShapes[0].data()); + unpool = std::make_shared(unpool, shape, true); + + return Ptr(new InfEngineNgraphNode(unpool)); + } +#endif // HAVE_DNN_NGRAPH }; Ptr MaxUnpoolLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index c58405507e..5caaa36ba0 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -209,7 +209,7 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()); + return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()); } #endif if (backendId == DNN_BACKEND_OPENCV) @@ -613,9 +613,17 @@ public: return Ptr(new InfEngineNgraphNode(reduce_sum)); } else if (type == MAX) { - auto max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), - ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), - rounding_type, pad_type); + std::shared_ptr max_pool; + if (computeMaxIdx) { + std::vector dilations(kernel_size.size(), 1); + max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations), + ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), + rounding_type, pad_type); + } else { + max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), + ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), + rounding_type, pad_type); + } return Ptr(new InfEngineNgraphNode(max_pool)); } else if (type == ROI) { diff --git a/modules/dnn/src/layers/reduce_layer.cpp b/modules/dnn/src/layers/reduce_layer.cpp index b983a791c5..77d8898df4 100644 --- a/modules/dnn/src/layers/reduce_layer.cpp +++ b/modules/dnn/src/layers/reduce_layer.cpp @@ -425,7 +425,7 @@ public: dtype* p_dst = dst.ptr(); size_t main_index = start / last_unreduced_dim; - size_t loop = start / last_unreduced_dim; + size_t loop = start % last_unreduced_dim; size_t origin = unprojected_steps[main_index] + loop * last_unreduced_step; for (int i = start; i < end; ++i) { Op accumulator(n_reduce, p_src[origin + projected_steps[0]]); diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index 02ac29de8d..607adb8aa1 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -410,7 +410,10 @@ public: } attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES; - if (alignCorners) { + CV_Assert(!halfPixelCenters || !alignCorners); + if (halfPixelCenters) { + attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL; + } else if (alignCorners) { attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS; } @@ -427,7 +430,10 @@ public: } attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes; - if (alignCorners) { + CV_Assert(!halfPixelCenters || !alignCorners); + if (halfPixelCenters) { + attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel; + } else if (alignCorners) { attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners; } diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp index 5704cb9b64..e974ce34a3 100644 --- a/modules/dnn/src/net_openvino.cpp +++ b/modules/dnn/src/net_openvino.cpp @@ -476,13 +476,14 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) { int lid = ld.inputBlobsId[i].lid; int oid = ld.inputBlobsId[i].oid; - if (oid == 0 || lid == 0) - continue; auto ieInpNode = inputNodes[i].dynamicCast(); const auto& ngraph_input_node = ieInpNode->node; CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); + if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0) + continue; + // Handle parameters from other subnets. Output port is not used in this case #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) && @@ -549,7 +550,6 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) break; } } - ieNode->net->setNodePtr(&ieNode->node); net->addBlobs(ld.inputBlobsWrappers); net->addBlobs(ld.outputBlobsWrappers); diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 5cd22057ad..24e8b3f913 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1385,13 +1385,19 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end()); Mat splitsBlob = getBlob(node_proto, 1); int splitSize = splitsBlob.total(); - - std::vector slicePoints(splitSize - 1, splitsBlob.at(0)); - for (int i = 1; i < splitSize - 1; ++i) + if (splitSize == 1) { - slicePoints[i] = slicePoints[i - 1] + splitsBlob.at(i); + layerParams.set("num_split", 1); + } + else + { + std::vector slicePoints(splitSize - 1, splitsBlob.at(0)); + for (int i = 1; i < splitSize - 1; ++i) + { + slicePoints[i] = slicePoints[i - 1] + splitsBlob.at(i); + } + layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); } - layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); } else { @@ -1965,9 +1971,11 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr } int transB = layerParams.get("transB", 0); + int secondInpDims; if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) { Mat weights = getBlob(node_proto, 1); + secondInpDims = weights.dims; if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0. { @@ -1993,7 +2001,10 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr } } else + { layerParams.set("transB", transB == 1); + secondInpDims = outShapes[node_proto.input(1)].size(); + } if (node_proto.input_size() == 3) { @@ -2002,7 +2013,7 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr } layerParams.set("bias_term", node_proto.input_size() == 3); - layerParams.set("is_matmul", true); + layerParams.set("is_matmul", secondInpDims > 2); addLayer(layerParams, node_proto); } @@ -2045,7 +2056,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node layerParams.blobs.push_back(transBlob); int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1); layerParams.set("num_output", numOutput); - layerParams.set("is_matmul", true); + layerParams.set("is_matmul", secondInpDims > 2); } else secondInpDims = outShapes[node_proto.input(1)].size(); diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 809b959a21..708e353aac 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -731,21 +731,23 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - double scoreDiff = 0.0; -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) - // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: - // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': - // Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84} + double scoreDiff = 0.0, iouDiff = 0.0; +#if defined(INF_ENGINE_RELEASE) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - if (target == DNN_TARGET_OPENCL_FP16) - scoreDiff = 0.02; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { + iouDiff = 0.02; + if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) { + scoreDiff = 0.04; + iouDiff = 0.06; + } + } #endif static Mat ref = (Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); - testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff); + testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff); } TEST_P(Test_Caffe_nets, FasterRCNN_zf) @@ -766,9 +768,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) ); #endif - if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || - backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); @@ -779,7 +778,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) static Mat ref = (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); - testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref); + + double scoreDiff = 0.0, iouDiff = 0.0; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { + scoreDiff = 0.02; + iouDiff = 0.13; + } + + testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff); } TEST_P(Test_Caffe_nets, RFCN) @@ -802,8 +808,8 @@ TEST_P(Test_Caffe_nets, RFCN) iouDiff = 0.12; } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { scoreDiff = 0.1f; iouDiff = 0.2f; diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 2d61426769..2c734b4492 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -102,11 +102,14 @@ TEST(Test_Darknet, read_yolo_voc_stream) class Test_Darknet_layers : public DNNTestLayer { public: - void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true) + void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true, + double l1 = 0.0, double lInf = 0.0) { SCOPED_TRACE(name); Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy")); Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy")); + l1 = l1 ? l1 : default_l1; + lInf = lInf ? lInf : default_lInf; std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg"); std::string model = ""; @@ -120,7 +123,7 @@ public: net.setPreferableTarget(target); net.setInput(inp); Mat out = net.forward(); - normAssert(out, ref, "", default_l1, default_lInf); + normAssert(out, ref, "", l1, lInf); if (inp.size[0] == 1 && testBatchProcessing) // test handling of batch size { @@ -166,8 +169,8 @@ public: }*/ ASSERT_EQ(out2.dims, ref2.dims) << ref.dims; - normAssert(out2(ranges0), ref2, "", default_l1, default_lInf); - normAssert(out2(ranges1), ref2, "", default_l1, default_lInf); + normAssert(out2(ranges0), ref2, "", l1, lInf); + normAssert(out2(ranges1), ref2, "", l1, lInf); } } }; @@ -1046,7 +1049,7 @@ TEST_P(Test_Darknet_layers, region) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000) // accuracy on CPU, OpenCL // Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05 // |ref| = 1.207319974899292 @@ -1116,7 +1119,12 @@ TEST_P(Test_Darknet_layers, connected) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16); - testDarknetLayer("connected", true); + double l1 = 0.0; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + { + l1 = 3e-5; + } + testDarknetLayer("connected", true, true, l1); } TEST_P(Test_Darknet_layers, relu) diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 6a7958ecee..d8a16d3efa 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -361,22 +361,9 @@ TEST_P(MaxPooling, Accuracy) Backend backendId = get<0>(get<5>(GetParam())); Target targetId = get<1>(get<5>(GetParam())); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000) - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD - && inSize == Size(7, 6) && kernel == Size(3, 2) - && (stride == Size(1, 1) || stride == Size(2, 2)) - && (pad == Size(0, 1) || pad == Size(1, 1)) - ) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000) - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD - && (kernel == Size(2, 2) || kernel == Size(3, 2)) - && stride == Size(1, 1) && (pad == Size(0, 0) || pad == Size(0, 1)) - ) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif + // https://github.com/openvinotoolkit/openvino/issues/18731 + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD @@ -467,6 +454,11 @@ TEST_P(FullyConnected, Accuracy) { l1 = 0.01; } + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL) + { + l1 = 5e-3; + lInf = 7e-3; + } #endif if (targetId == DNN_TARGET_CUDA_FP16) l1 = 0.015; diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 135caa9064..c6667c7ad2 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -465,8 +465,8 @@ TEST_P(DNNTestHighLevelAPI, predict) const std::string modelPath = getOpenVINOModel(modelName, isFP16); ASSERT_FALSE(modelPath.empty()) << modelName; - std::string xmlPath = findDataFile(modelPath + ".xml"); - std::string binPath = findDataFile(modelPath + ".bin"); + std::string xmlPath = findDataFile(modelPath + ".xml", false); + std::string binPath = findDataFile(modelPath + ".bin", false); Model model(xmlPath, binPath); Mat frame = imread(findDataFile("dnn/googlenet_1.png")); diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 763d94b99c..c820283a94 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -215,7 +215,13 @@ TEST_P(Test_Caffe_layers, InnerProduct) if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16); - testLayerUsingCaffeModels("layer_inner_product", true); + double l1 = 0.0, lInf = 0.0; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + { + l1 = 5e-3; + lInf = 2e-2; + } + testLayerUsingCaffeModels("layer_inner_product", true, true, l1, lInf); } TEST_P(Test_Caffe_layers, Pooling_max) diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index bd03551ab8..a19923bf28 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput) { if (backend == DNN_BACKEND_OPENCV) scoreDiff = 4e-3; -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000) - else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - scoreDiff = 4e-2; -#endif else scoreDiff = 2e-2; iouDiff = 1.8e-1; } +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + scoreDiff = 0.05; + iouDiff = 0.08; + } +#endif testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean); diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp index e6a35dfab9..339746f5f2 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp @@ -579,9 +579,7 @@ CASE(test_dropout_default_mask_ratio) CASE(test_dropout_default_old) // no filter CASE(test_dropout_default_ratio) -#if SKIP_SET_1 - SKIP; -#endif + // no filter CASE(test_dropout_random_old) // no filter CASE(test_dynamicquantizelinear) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 49908e7ff1..d695b1c202 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -52,7 +52,7 @@ public: } void testONNXModels(const String& basename, const Extension ext = npy, - const double l1 = 0, const float lInf = 0, const bool useSoftmax = false, + double l1 = 0, double lInf = 0, const bool useSoftmax = false, bool checkNoFallbacks = true, int numInps = 1) { String onnxmodel = _tf("models/" + basename + ".onnx", required); @@ -102,7 +102,12 @@ public: netSoftmax.setInput(ref); ref = netSoftmax.forward(); } - normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + { + l1 = std::max(l1, 1.4e-3); + lInf = std::max(lInf, 8e-3); + } + normAssert(ref, out, basename.c_str(), l1 ? l1 : default_l1, lInf ? lInf : default_lInf); if (checkNoFallbacks) expectNoFallbacksFromIE(net); } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index e2dfbc706e..274fa8cee0 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN) double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + scoreDiff = std::max(scoreDiff, 0.06); + iouDiff = std::max(iouDiff, 0.01); + } normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff); // Output size of masks is NxCxHxW where diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp index c5bee0c086..19b3f3a94a 100644 --- a/modules/dnn/test/test_tflite_importer.cpp +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -20,6 +20,14 @@ namespace opencv_test { namespace { using namespace cv; using namespace cv::dnn; +class Test_TFLite : public DNNTestLayer { +public: + void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0); + void testModel(const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0); + void testModel(const std::string& modelName, const Size& inpSize, double l1 = 0, double lInf = 0); + void testLayer(const std::string& modelName, double l1 = 0, double lInf = 0); +}; + void testInputShapes(const Net& net, const std::vector& inps) { std::vector inLayerShapes; std::vector outLayerShapes; @@ -31,8 +39,14 @@ void testInputShapes(const Net& net, const std::vector& inps) { } } -void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) +void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& input, double l1, double lInf) { + l1 = l1 ? l1 : default_l1; + lInf = lInf ? lInf : default_lInf; + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + testInputShapes(net, {input}); net.setInput(input); @@ -48,20 +62,20 @@ void testModel(Net& net, const std::string& modelName, const Mat& input, double } } -void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) +void Test_TFLite::testModel(const std::string& modelName, const Mat& input, double l1, double lInf) { Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); testModel(net, modelName, input, l1, lInf); } -void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4) +void Test_TFLite::testModel(const std::string& modelName, const Size& inpSize, double l1, double lInf) { Mat input = imread(findDataFile("cv/shared/lena.png")); input = blobFromImage(input, 1.0 / 255, inpSize, 0, true); testModel(modelName, input, l1, lInf); } -void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4) +void Test_TFLite::testLayer(const std::string& modelName, double l1, double lInf) { Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy")); Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite")); @@ -69,29 +83,66 @@ void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e- } // https://google.github.io/mediapipe/solutions/face_mesh -TEST(Test_TFLite, face_landmark) +TEST_P(Test_TFLite, face_landmark) { - testModel("face_landmark", Size(192, 192), 2e-5, 2e-4); + if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); + double l1 = 2e-5, lInf = 2e-4; + if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || + (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)) + { + l1 = 0.15; + lInf = 0.82; + } + testModel("face_landmark", Size(192, 192), l1, lInf); } // https://google.github.io/mediapipe/solutions/face_detection -TEST(Test_TFLite, face_detection_short_range) +TEST_P(Test_TFLite, face_detection_short_range) { - testModel("face_detection_short_range", Size(128, 128)); + double l1 = 0, lInf = 2e-4; + if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || + (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)) + { + l1 = 0.04; + lInf = 0.8; + } + testModel("face_detection_short_range", Size(128, 128), l1, lInf); } // https://google.github.io/mediapipe/solutions/selfie_segmentation -TEST(Test_TFLite, selfie_segmentation) +TEST_P(Test_TFLite, selfie_segmentation) { - testModel("selfie_segmentation", Size(256, 256)); + double l1 = 0, lInf = 0; + if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || + (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)) + { + l1 = 0.01; + lInf = 0.48; + } + testModel("selfie_segmentation", Size(256, 256), l1, lInf); } -TEST(Test_TFLite, max_unpooling) +TEST_P(Test_TFLite, max_unpooling) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) { + if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } + + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + // Due Max Unpoling is a numerically unstable operation and small difference between frameworks // might lead to positional difference of maximal elements in the tensor, this test checks // behavior of Max Unpooling layer only. Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false)); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); Mat input = imread(findDataFile("cv/shared/lena.png")); cvtColor(input, input, COLOR_BGR2RGBA); @@ -101,7 +152,15 @@ TEST(Test_TFLite, max_unpooling) net.setInput(input); std::vector > outs; - net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { + // TODO: seems like a bug with a retrieving intermediate tensors + net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); + outs.erase(outs.begin()); + } + else { + net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); + } + ASSERT_EQ(outs.size(), 4); ASSERT_EQ(outs[0].size(), 1); ASSERT_EQ(outs[1].size(), 2); @@ -117,6 +176,8 @@ TEST(Test_TFLite, max_unpooling) ASSERT_EQ(poolOut.size, poolIds.size); ASSERT_EQ(poolOut.size, unpoolInp.size); + ASSERT_EQ(countNonZero(poolInp), poolInp.total()); + for (int c = 0; c < 32; ++c) { float *poolInpData = poolInp.ptr(0, c); float *poolOutData = poolOut.ptr(0, c); @@ -135,15 +196,19 @@ TEST(Test_TFLite, max_unpooling) } } EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg; - EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg; + if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { + EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg; + } EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg; } } } } -TEST(Test_TFLite, EfficientDet_int8) { +TEST_P(Test_TFLite, EfficientDet_int8) { Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false)); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); Mat img = imread(findDataFile("dnn/dog416.png")); Mat blob = blobFromImage(img, 1.0, Size(320, 320)); @@ -158,10 +223,18 @@ TEST(Test_TFLite, EfficientDet_int8) { normAssertDetections(ref, out, "", 0.5, 0.05, 0.1); } -TEST(Test_TFLite, replicate_by_pack) { - testLayer("replicate_by_pack"); +TEST_P(Test_TFLite, replicate_by_pack) { + double l1 = 0, lInf = 0; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + { + l1 = 4e-4; + lInf = 2e-3; + } + testLayer("replicate_by_pack", l1, lInf); } +INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets()); + }} // namespace #endif // OPENCV_TEST_DNN_TFLITE diff --git a/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp index 4ba829df09..c418c0d496 100644 --- a/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp @@ -39,6 +39,12 @@ public: GAPI_WRAP PyParams& cfgAddExecutionProvider(ep::DirectML ep); + GAPI_WRAP + PyParams& cfgAddExecutionProvider(ep::CUDA ep); + + GAPI_WRAP + PyParams& cfgAddExecutionProvider(ep::TensorRT ep); + GAPI_WRAP PyParams& cfgDisableMemPattern(); diff --git a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp index 64b855acd7..ff5febcf90 100644 --- a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp @@ -32,6 +32,56 @@ namespace onnx { */ namespace ep { +/** + * @brief This structure provides functions + * that fill inference options for CUDA Execution Provider. + * Please follow https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider + */ +struct GAPI_EXPORTS_W_SIMPLE CUDA { + // NB: Used from python. + /// @private -- Exclude this constructor from OpenCV documentation + GAPI_WRAP + CUDA() = default; + + /** @brief Class constructor. + + Constructs CUDA parameters based on device type information. + + @param dev_id Target device id to use. + */ + GAPI_WRAP + explicit CUDA(const int dev_id) + : device_id(dev_id) { + } + + int device_id; +}; + +/** + * @brief This structure provides functions + * that fill inference options for TensorRT Execution Provider. + * Please follow https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#tensorrt-execution-provider + */ +struct GAPI_EXPORTS_W_SIMPLE TensorRT { + // NB: Used from python. + /// @private -- Exclude this constructor from OpenCV documentation + GAPI_WRAP + TensorRT() = default; + + /** @brief Class constructor. + + Constructs TensorRT parameters based on device type information. + + @param dev_id Target device id to use. + */ + GAPI_WRAP + explicit TensorRT(const int dev_id) + : device_id(dev_id) { + } + + int device_id; +}; + /** * @brief This structure provides functions * that fill inference options for ONNX OpenVINO Execution Provider. @@ -143,7 +193,11 @@ public: DeviceDesc ddesc; }; -using EP = cv::util::variant; +using EP = cv::util::variant< cv::util::monostate + , OpenVINO + , DirectML + , CUDA + , TensorRT>; } // namespace ep @@ -431,6 +485,34 @@ public: return *this; } + /** @brief Adds execution provider for runtime. + + The function is used to add ONNX Runtime CUDA Execution Provider options. + + @param ep CUDA Execution Provider options. + @see cv::gapi::onnx::ep::CUDA. + + @return the reference on modified object. + */ + Params& cfgAddExecutionProvider(ep::CUDA&& ep) { + desc.execution_providers.emplace_back(std::move(ep)); + return *this; + } + + /** @brief Adds execution provider for runtime. + + The function is used to add ONNX Runtime TensorRT Execution Provider options. + + @param ep TensorRT Execution Provider options. + @see cv::gapi::onnx::ep::TensorRT. + + @return the reference on modified object. + */ + Params& cfgAddExecutionProvider(ep::TensorRT&& ep) { + desc.execution_providers.emplace_back(std::move(ep)); + return *this; + } + /** @brief Disables the memory pattern optimization. @return the reference on modified object. @@ -491,6 +573,16 @@ public: desc.execution_providers.emplace_back(std::move(ep)); } + /** @see onnx::Params::cfgAddExecutionProvider. */ + void cfgAddExecutionProvider(ep::CUDA&& ep) { + desc.execution_providers.emplace_back(std::move(ep)); + } + + /** @see onnx::Params::cfgAddExecutionProvider. */ + void cfgAddExecutionProvider(ep::TensorRT&& ep) { + desc.execution_providers.emplace_back(std::move(ep)); + } + /** @see onnx::Params::cfgDisableMemPattern. */ void cfgDisableMemPattern() { desc.disable_mem_pattern = true; diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index 60d5f85479..3269a7d470 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -31,6 +31,8 @@ using map_string_and_vector_float = std::map>; using map_int_and_double = std::map; using ep_OpenVINO = cv::gapi::onnx::ep::OpenVINO; using ep_DirectML = cv::gapi::onnx::ep::DirectML; +using ep_CUDA = cv::gapi::onnx::ep::CUDA; +using ep_TensorRT = cv::gapi::onnx::ep::TensorRT; // NB: Python wrapper generate T_U for T // This behavior is only observed for inputs diff --git a/modules/gapi/src/backends/onnx/bindings_onnx.cpp b/modules/gapi/src/backends/onnx/bindings_onnx.cpp index 6051c6bb4d..b41ec7b1b1 100644 --- a/modules/gapi/src/backends/onnx/bindings_onnx.cpp +++ b/modules/gapi/src/backends/onnx/bindings_onnx.cpp @@ -33,6 +33,18 @@ cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::DirectML e return *this; } +cv::gapi::onnx::PyParams& +cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::CUDA ep) { + m_priv->cfgAddExecutionProvider(std::move(ep)); + return *this; +} + +cv::gapi::onnx::PyParams& +cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::TensorRT ep) { + m_priv->cfgAddExecutionProvider(std::move(ep)); + return *this; +} + cv::gapi::onnx::PyParams& cv::gapi::onnx::PyParams::cfgDisableMemPattern() { m_priv->cfgDisableMemPattern(); diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.cpp b/modules/gapi/src/backends/onnx/gonnxbackend.cpp index b90d4d6974..c552b8b0e6 100644 --- a/modules/gapi/src/backends/onnx/gonnxbackend.cpp +++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp @@ -145,9 +145,39 @@ public: void run(); }; +static void addCUDAExecutionProvider(Ort::SessionOptions *session_options, + const cv::gapi::onnx::ep::CUDA &cuda_ep) { + OrtCUDAProviderOptions options{}; + options.device_id = cuda_ep.device_id; + + try { + session_options->AppendExecutionProvider_CUDA(options); + } catch (const std::exception &e) { + std::stringstream ss; + ss << "ONNX Backend: Failed to enable CUDA" + << " Execution Provider: " << e.what(); + cv::util::throw_error(std::runtime_error(ss.str())); + } +} + +static void addTensorRTExecutionProvider(Ort::SessionOptions *session_options, + const cv::gapi::onnx::ep::TensorRT &trt_ep) { + OrtTensorRTProviderOptions options{}; + options.device_id = trt_ep.device_id; + + try { + session_options->AppendExecutionProvider_TensorRT(options); + } catch (const std::exception &e) { + std::stringstream ss; + ss << "ONNX Backend: Failed to enable TensorRT" + << " Execution Provider: " << e.what(); + cv::util::throw_error(std::runtime_error(ss.str())); + } +} + static void addOpenVINOExecutionProvider(Ort::SessionOptions *session_options, const cv::gapi::onnx::ep::OpenVINO &ov_ep) { - OrtOpenVINOProviderOptions options; + OrtOpenVINOProviderOptions options{}; options.device_type = ov_ep.device_type.c_str(); options.cache_dir = ov_ep.cache_dir.c_str(); options.num_of_threads = ov_ep.num_of_threads; @@ -181,6 +211,18 @@ static void addExecutionProvider(Ort::SessionOptions *session_options, addDMLExecutionProvider(session_options, dml_ep); break; } + case ep::EP::index_of(): { + GAPI_LOG_INFO(NULL, "CUDA Execution Provider is added."); + const auto &cuda_ep = cv::util::get(execution_provider); + addCUDAExecutionProvider(session_options, cuda_ep); + break; + } + case ep::EP::index_of(): { + GAPI_LOG_INFO(NULL, "TensorRT Execution Provider is added."); + const auto &trt_ep = cv::util::get(execution_provider); + addTensorRTExecutionProvider(session_options, trt_ep); + break; + } default: GAPI_LOG_INFO(NULL, "CPU Execution Provider is added."); break; diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp index e3c53d7cdd..905c79e42f 100644 --- a/modules/videoio/src/cap_v4l.cpp +++ b/modules/videoio/src/cap_v4l.cpp @@ -260,6 +260,10 @@ typedef uint32_t __u32; #define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17) #endif +#ifndef v4l2_fourcc_be +#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31)) +#endif + #ifndef V4L2_PIX_FMT_Y10 #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ') #endif diff --git a/modules/videoio/test/test_precomp.hpp b/modules/videoio/test/test_precomp.hpp index 9bd613d8f0..815264c494 100644 --- a/modules/videoio/test/test_precomp.hpp +++ b/modules/videoio/test/test_precomp.hpp @@ -65,7 +65,7 @@ inline std::string fourccToStringSafe(int fourcc) { std::string res = fourccToString(fourcc); // TODO: return hex values for invalid characters - std::transform(res.begin(), res.end(), res.begin(), [](uint8_t c) { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); }); + std::transform(res.begin(), res.end(), res.begin(), [](char c) -> char { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); }); return res; } diff --git a/modules/videoio/test/test_v4l2.cpp b/modules/videoio/test/test_v4l2.cpp index 5d56ac097c..1c4917bfca 100644 --- a/modules/videoio/test/test_v4l2.cpp +++ b/modules/videoio/test/test_v4l2.cpp @@ -22,6 +22,9 @@ #include // workarounds for older versions +#ifndef v4l2_fourcc_be +#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31)) +#endif #ifndef V4L2_PIX_FMT_Y10 #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ') #endif