Enable Mask R-CNN with Inference Engine. Full coverage with nGraph

2025-07-22 12:17:04 +08:00 · 2020-02-16 22:12:14 +03:00 · 2020-02-16 22:12:14 +03:00 · f3eef792eb
commit f3eef792eb
parent a6f3a21256
5 changed files with 118 additions and 32 deletions
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -1294,13 +1294,15 @@ struct Net::Impl
 #endif
            clear();

+            this->blobsToKeep = blobsToKeep_;
+
            allocateLayers(blobsToKeep_);

            MapIdToLayerData::iterator it = layers.find(0);
            CV_Assert(it != layers.end());
            it->second.skip = netInputLayer->skip;

-            initBackend();
+            initBackend(blobsToKeep_);

            if (!netWasAllocated )
            {
@ -1313,7 +1315,6 @@ struct Net::Impl
            }

            netWasAllocated = true;
-            this->blobsToKeep = blobsToKeep_;

            if (DNN_NETWORK_DUMP > 0)
            {
@ -1440,7 +1441,7 @@ struct Net::Impl
        ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
    }

-    void initBackend()
+    void initBackend(const std::vector<LayerPin>& blobsToKeep_)
    {
        CV_TRACE_FUNCTION();
        if (preferableBackend == DNN_BACKEND_OPENCV)
@ -1450,7 +1451,7 @@ struct Net::Impl
        else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
        {
 #ifdef HAVE_INF_ENGINE
-            initInfEngineBackend();
+            initInfEngineBackend(blobsToKeep_);
 #else
            CV_Assert(false && "This OpenCV version is built without Inference Engine API support");
 #endif
@ -1458,7 +1459,7 @@ struct Net::Impl
        else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
        {
 #ifdef HAVE_DNN_NGRAPH
-            initNgraphBackend();
+            initNgraphBackend(blobsToKeep_);
 #else
            CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
 #endif
@ -1560,7 +1561,7 @@ struct Net::Impl
        }
    }

-    void initInfEngineBackend()
+    void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
    {
        CV_TRACE_FUNCTION();
        CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
@ -1750,6 +1751,15 @@ struct Net::Impl
            CV_Assert(!ieNode.empty());
            ieNode->net = net;

+            for (const auto& pin : blobsToKeep_)
+            {
+                if (pin.lid == ld.id)
+                {
+                    ieNode->net->addOutput(ieNode->layer.getName());
+                    break;
+                }
+            }
+
            // Convert weights in FP16 for specific targets.
            if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
                 preferableTarget == DNN_TARGET_MYRIAD ||
@ -1856,7 +1866,7 @@ struct Net::Impl
        }
    }

-    void initNgraphBackend()
+    void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
    {
        CV_TRACE_FUNCTION();
        CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
@ -2045,6 +2055,14 @@ struct Net::Impl
                // TF EAST_text_detection
                ieNode->net->setUnconnectedNodes(ieNode);
            }
+            for (const auto& pin : blobsToKeep_)
+            {
+                if (pin.lid == ld.id)
+                {
+                    ieNode->net->addOutput(ieNode->node->get_friendly_name());
+                    break;
+                }
+            }
            ieNode->net->setNodePtr(&ieNode->node);

            net->addBlobs(ld.inputBlobsWrappers);
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -231,11 +231,10 @@ void InfEngineNgraphNet::init(Target targetId)
                }
            }
        }
-    } else {
-        for (const auto& name : requestedOutputs)
-        {
-            cnn.addOutput(name);
-        }
+    }
+    for (const auto& name : requestedOutputs)
+    {
+        cnn.addOutput(name);
    }

    for (const auto& it : cnn.getInputsInfo())
--- a/modules/dnn/src/layers/crop_and_resize_layer.cpp
+++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp
@ -5,6 +5,7 @@
 // Copyright (C) 2018, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 #include "../precomp.hpp"
+#include "../ie_ngraph.hpp"
 #include "layers_common.hpp"

 namespace cv { namespace dnn {
@ -20,6 +21,11 @@ public:
        outHeight = params.get<float>("height");
    }

+    virtual bool supportBackend(int backendId) CV_OVERRIDE
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+    }
+
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
                         const int requiredOutputs,
                         std::vector<MatShape> &outputs,
@ -111,6 +117,41 @@ public:
        }
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        // Slice second input: from 1x1xNx7 to 1x1xNx5
+        auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto rois = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+
+        std::vector<size_t> dims = rois->get_shape(), offsets(4, 0);
+        offsets[3] = 2;
+        dims[3] = 7;
+
+        auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                             ngraph::Shape{offsets.size()}, offsets.data());
+        auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                             ngraph::Shape{dims.size()}, dims.data());
+        auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                        ngraph::Shape{dims.size()}, std::vector<int64_t>((int64_t)dims.size(), 1));
+        auto slice = std::make_shared<ngraph::op::v1::StridedSlice>(rois,
+                                      lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
+
+        // Reshape rois from 4D to 2D
+        std::vector<size_t> shapeData = {dims[2], 5};
+        auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shapeData.data());
+        auto reshape = std::make_shared<ngraph::op::v1::Reshape>(slice, shape, true);
+
+        auto roiPooling =
+            std::make_shared<ngraph::op::v0::ROIPooling>(input, reshape,
+                                                         ngraph::Shape{(size_t)outHeight, (size_t)outWidth},
+                                                         1.0f, "bilinear");
+
+        return Ptr<BackendNode>(new InfEngineNgraphNode(roiPooling));
+    }
+#endif  // HAVE_DNN_NGRAPH
+
 private:
    int outWidth, outHeight;
 };
--- a/modules/dnn/src/layers/scale_layer.cpp
+++ b/modules/dnn/src/layers/scale_layer.cpp
@ -53,7 +53,8 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1) ||
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0);
    }

    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
@ -233,22 +234,26 @@ public:
        auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;

        std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
-        shape[1] = numChannels;
-        auto weight = hasWeights ?
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), blobs[0].data) :
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), std::vector<float>(numChannels, 1).data());
+        int cAxis = clamp(axis, shape.size());
+        shape[cAxis] = numChannels;

-        auto bias = hasBias ?
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), blobs.back().data) :
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), std::vector<float>(numChannels, 0).data());
-
-        auto scale_node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
-        auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
-        return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift));
+        auto node = ieInpNode;
+        if (hasWeights)
+        {
+            auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+                                                                 ngraph::Shape(shape), blobs[0].data);
+            node = std::make_shared<ngraph::op::v1::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
+        }
+        if (hasBias || !hasWeights)
+        {
+            auto bias = hasBias ?
+                        std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+                                                               ngraph::Shape(shape), blobs.back().data) :
+                        std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+                                                               ngraph::Shape(shape), std::vector<float>(numChannels, 0).data());
+            node = std::make_shared<ngraph::op::v1::Add>(node, bias, ngraph::op::AutoBroadcastType::NUMPY);
+        }
+        return Ptr<BackendNode>(new InfEngineNgraphNode(node));
    }
 #endif  // HAVE_DNN_NGRAPH

--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -914,8 +914,16 @@ TEST(Test_TensorFlow, two_inputs)
    normAssert(out, firstInput + secondInput);
 }

-TEST(Test_TensorFlow, Mask_RCNN)
+TEST_P(Test_TensorFlow_nets, Mask_RCNN)
 {
+    static const double kMaskThreshold = 0.5;
+
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
+
    applyTestTag(CV_TEST_TAG_MEMORY_1GB, CV_TEST_TAG_DEBUG_VERYLONG);
    Mat img = imread(findDataFile("dnn/street.png"));
    std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt");
@ -926,7 +934,8 @@ TEST(Test_TensorFlow, Mask_RCNN)
    Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy"));
    Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false);

-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);

    net.setInput(blob);

@ -940,7 +949,10 @@ TEST(Test_TensorFlow, Mask_RCNN)

    Mat outDetections = outs[0];
    Mat outMasks = outs[1];
-    normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5);
+
+    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.019 : 2e-5;
+    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : default_lInf;
+    normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);

    // Output size of masks is NxCxHxW where
    // N - number of detected boxes
@ -964,7 +976,18 @@ TEST(Test_TensorFlow, Mask_RCNN)
        outMasks(srcRanges).copyTo(masks(dstRanges));
    }
    cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()};
-    normAssert(masks, refMasks(&topRefMasks[0]));
+    refMasks = refMasks(&topRefMasks[0]);
+
+    // make binary masks
+    cv::threshold(masks.reshape(1, 1), masks, kMaskThreshold, 1, THRESH_BINARY);
+    cv::threshold(refMasks.reshape(1, 1), refMasks, kMaskThreshold, 1, THRESH_BINARY);
+
+    double inter = cv::countNonZero(masks & refMasks);
+    double area = cv::countNonZero(masks | refMasks);
+    EXPECT_GE(inter / area, 0.99);
+
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        expectNoFallbacks(net);
 }

 }