Merge pull request #26394 from alexlyulkov:al/new-engine-tf-parser

Modified tensorflow parser for the new dnn engine #26394 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-08-02 11:36:26 +08:00 · 2024-11-27 09:15:20 +03:00 · 2024-11-27 09:15:20 +03:00 · 3672a14b42
commit 3672a14b42
parent b9914065e8
14 changed files with 491 additions and 408 deletions
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -1090,17 +1090,28 @@ CV__DNN_INLINE_NS_BEGIN
      * @param config path to the .pbtxt file that contains text graph definition in protobuf format.
      *               Resulting Net object is built by text graph using weights from a binary one that
      *               let us make it more flexible.
+      * @param engine select DNN engine to be used. With auto selection the new engine is used.
+      * @param extraOutputs specify model outputs explicitly, in addition to the outputs the graph analyzer finds.
+      * Please pay attention that the new DNN does not support non-CPU back-ends for now.
      * @returns Net object.
      */
-    CV_EXPORTS_W Net readNetFromTensorflow(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config = String());
+    CV_EXPORTS_W Net readNetFromTensorflow(CV_WRAP_FILE_PATH const String &model,
+                                           CV_WRAP_FILE_PATH const String &config = String(),
+                                           int engine=ENGINE_AUTO,
+                                           const std::vector<String>& extraOutputs = std::vector<String>());

    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
      * @param bufferModel buffer containing the content of the pb file
      * @param bufferConfig buffer containing the content of the pbtxt file
+      * @param engine select DNN engine to be used. With auto selection the new engine is used.
+      * @param extraOutputs specify model outputs explicitly, in addition to the outputs the graph analyzer finds.
+      * Please pay attention that the new DNN does not support non-CPU back-ends for now.
      * @returns Net object.
      */
    CV_EXPORTS_W Net readNetFromTensorflow(const std::vector<uchar>& bufferModel,
-                                           const std::vector<uchar>& bufferConfig = std::vector<uchar>());
+                                           const std::vector<uchar>& bufferConfig = std::vector<uchar>(),
+                                           int engine=ENGINE_AUTO,
+                                           const std::vector<String>& extraOutputs = std::vector<String>());

    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
      * @details This is an overloaded member function, provided for convenience.
@ -1109,9 +1120,14 @@ CV__DNN_INLINE_NS_BEGIN
      * @param lenModel length of bufferModel
      * @param bufferConfig buffer containing the content of the pbtxt file
      * @param lenConfig length of bufferConfig
+      * @param engine select DNN engine to be used. With auto selection the new engine is used.
+      * @param extraOutputs specify model outputs explicitly, in addition to the outputs the graph analyzer finds.
+      * Please pay attention that the new DNN does not support non-CPU back-ends for now.
      */
    CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel,
-                                         const char *bufferConfig = NULL, size_t lenConfig = 0);
+                                         const char *bufferConfig = NULL, size_t lenConfig = 0,
+                                         int engine=ENGINE_AUTO,
+                                         const std::vector<String>& extraOutputs = std::vector<String>());

    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format.
      * @param model  path to the .tflite file with binary flatbuffers description of the network architecture
--- a/modules/dnn/misc/java/test/DnnListRegressionTest.java
+++ b/modules/dnn/misc/java/test/DnnListRegressionTest.java
@ -69,10 +69,10 @@ public class DnnListRegressionTest extends OpenCVTestCase {
        Mat inputBlob = Dnn.blobFromImage(image, 1.0, new Size(224, 224), new Scalar(0), true, true);
        assertNotNull("Converting image to blob failed!", inputBlob);

-        net.setInput(inputBlob, "input");
+        net.setInput(inputBlob, "");
    }

-    public void testSetInputsNames() {
+    /*public void testSetInputsNames() {
        List<String> inputs = new ArrayList();
        inputs.add("input");
        try {
@ -80,12 +80,12 @@ public class DnnListRegressionTest extends OpenCVTestCase {
        } catch(Exception e) {
            fail("Net setInputsNames failed: " + e.getMessage());
        }
-    }
+    }*/

    public void testForward() {
        List<Mat> outs = new ArrayList();
        List<String> outNames = new ArrayList();
-        outNames.add("softmax2");
+        //outNames.add("");
        try {
            net.forward(outs,outNames);
        } catch(Exception e) {
--- a/modules/dnn/misc/java/test/DnnTensorFlowTest.java
+++ b/modules/dnn/misc/java/test/DnnTensorFlowTest.java
@ -74,20 +74,15 @@ public class DnnTensorFlowTest extends OpenCVTestCase {
    }

    public void testGetLayer() {
-        List<String> layernames = net.getLayerNames();
-
-        assertFalse("Test net returned no layers!", layernames.isEmpty());
-
-        String testLayerName = layernames.get(0);
-
-        DictValue layerId = new DictValue(testLayerName);
-
-        assertEquals("DictValue did not return the string, which was used in constructor!", testLayerName, layerId.getStringValue());
-
-        Layer layer = net.getLayer(layerId);
-
-        assertEquals("Layer name does not match the expected value!", testLayerName, layer.get_name());
+        List<String> layerNames = net.getLayerNames();
+        assertFalse("Test net returned no layers!", layerNames.isEmpty());

+        int layerId = 0;
+        for (String layerName: layerNames) {
+            Layer layer = net.getLayer(layerId);
+            assertEquals("Layer name does not match the expected value!", layerName, layer.get_name());
+            layerId++;
+        }
    }

    public void checkInceptionNet(Net net)
@ -98,12 +93,12 @@ public class DnnTensorFlowTest extends OpenCVTestCase {
        Mat inputBlob = Dnn.blobFromImage(image, 1.0, new Size(224, 224), new Scalar(0), true, true);
        assertNotNull("Converting image to blob failed!", inputBlob);

-        net.setInput(inputBlob, "input");
+        net.setInput(inputBlob, "");

        Mat result = new Mat();
        try {
            net.setPreferableBackend(Dnn.DNN_BACKEND_OPENCV);
-            result = net.forward("softmax2");
+            result = net.forward("");
        }
        catch (Exception e) {
            fail("DNN forward failed: " + e.getMessage());
--- a/modules/dnn/misc/objc/gen_dict.json
+++ b/modules/dnn/misc/objc/gen_dict.json
@ -7,8 +7,8 @@
            "(Net*)readNetFromDarknet:(ByteVector*)bufferCfg bufferModel:(ByteVector*)bufferModel" : { "readNetFromDarknet" : {"name" : "readNetFromDarknetBuffer"} },
            "(Net*)readNetFromONNX:(NSString*)onnxFile engine:(int)engine" : { "readNetFromONNX" : {"name" : "readNetFromONNXFile"} },
            "(Net*)readNetFromONNX:(ByteVector*)buffer engine:(int)engine" : { "readNetFromONNX" : {"name" : "readNetFromONNXBuffer"} },
-            "(Net*)readNetFromTensorflow:(NSString*)model config:(NSString*)config" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowFile"} },
-            "(Net*)readNetFromTensorflow:(ByteVector*)bufferModel bufferConfig:(ByteVector*)bufferConfig" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowBuffer"} },
+            "(Net*)readNetFromTensorflow:(NSString*)model config:(NSString*)config engine:(int)engine extraOutputs:(NSArray<NSString*>*)extraOutputs" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowFile"} },
+            "(Net*)readNetFromTensorflow:(ByteVector*)bufferModel bufferConfig:(ByteVector*)bufferConfig engine:(int)engine extraOutputs:(NSArray<NSString*>*)extraOutputs" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowBuffer"} },
            "(Net*)readNetFromTFLite:(NSString*)model engine:(int)engine" : { "readNetFromTFLite" : {"name" : "readNetFromTFLiteFile"} },
            "(Net*)readNetFromTFLite:(ByteVector*)buffer engine:(int)engine" : { "readNetFromTFLite" : {"name" : "readNetFromTFLiteBuffer"} }
        },
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@ -119,7 +119,7 @@ PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
 PERF_TEST_P_(DNNTestNetwork, Inception_5h)
 {
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) throw SkipTestException("");
-    processNet("dnn/tensorflow_inception_graph.pb", "", cv::Size(224, 224), "softmax2");
+    processNet("dnn/tensorflow_inception_graph.pb", "", cv::Size(224, 224));
 }

 PERF_TEST_P_(DNNTestNetwork, SSD)
--- a/modules/dnn/src/dnn_read.cpp
+++ b/modules/dnn/src/dnn_read.cpp
@ -27,11 +27,11 @@ Net readNet(const String& _model, const String& _config, const String& _framewor
    {
        if (modelExt == "pbtxt" || configExt == "pb")
            std::swap(model, config);
-        return readNetFromTensorflow(model, config);
+        return readNetFromTensorflow(model, config, engine);
    }
    if (framework == "tflite" || modelExt == "tflite")
    {
-        return readNetFromTFLite(model);
+        return readNetFromTFLite(model, engine);
    }
    if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || modelExt == "cfg" || configExt == "cfg")
    {
@ -63,13 +63,13 @@ Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
    else if (framework == "caffe")
        return readNetFromCaffe(bufferConfig, bufferModel, engine);
    else if (framework == "tensorflow")
-        return readNetFromTensorflow(bufferModel, bufferConfig);
+        return readNetFromTensorflow(bufferModel, bufferConfig, engine);
    else if (framework == "darknet")
        return readNetFromDarknet(bufferConfig, bufferModel);
    else if (framework == "dldt" || framework == "openvino")
        return readNetFromModelOptimizer(bufferConfig, bufferModel);
    else if (framework == "tflite")
-        return readNetFromTFLite(bufferModel);
+        return readNetFromTFLite(bufferModel, engine);
    CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
 }

--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -322,7 +322,8 @@ public:

        internals.clear();

-        CV_Assert(inputs.size() != 0);
+        CV_Assert(!inputs.empty());
+        CV_Assert(inputs[0].size() > 2);
        std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());

        int outCn = weightShape[0];
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@ -234,7 +234,6 @@ public:
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const CV_OVERRIDE
    {
-
        if (inputs.size() == 1 || inputs.size() == requiredOutputs)
        {
            outputs.clear();
--- a/modules/dnn/src/model.cpp
+++ b/modules/dnn/src/model.cpp
@ -46,9 +46,19 @@ public:
        net = network;

        outNames = net.getUnconnectedOutLayersNames();
+
+        Ptr<Graph> graph = net.getMainGraph();
        std::vector<MatShape> inLayerShapes;
-        std::vector<MatShape> outLayerShapes;
-        net.getLayerShapes(MatShape(), CV_32F, 0, inLayerShapes, outLayerShapes);
+
+        if (graph) {
+            const std::vector<Arg>& inputs = graph->inputs();
+            for (auto inp: inputs) {
+                inLayerShapes.push_back(net.argData(inp).shape);
+            }
+        } else {
+            std::vector<MatShape> outLayerShapes;
+            net.getLayerShapes(MatShape(), CV_32F, 0, inLayerShapes, outLayerShapes);
+        }
        if (!inLayerShapes.empty() && inLayerShapes[0].size() == 4)
            size = Size(inLayerShapes[0][3], inLayerShapes[0][2]);
        else
@ -1182,6 +1192,11 @@ struct TextDetectionModel_EAST_Impl : public TextDetectionModel_Impl
        CV_CheckEQ(geometry.dims, 4, "");
        CV_CheckEQ(scoreMap.size[0], 1, "");
        CV_CheckEQ(geometry.size[0], 1, "");
+
+        if (geometry.size[1] == 1 && scoreMap.size[1] == 5) {
+            std::swap(geometry, scoreMap);
+        }
+
        CV_CheckEQ(scoreMap.size[1], 1, "");
        CV_CheckEQ(geometry.size[1], 5, "");
        CV_CheckEQ(scoreMap.size[2], geometry.size[2], "");
--- a/modules/dnn/src/net_impl2.cpp
+++ b/modules/dnn/src/net_impl2.cpp
@ -199,6 +199,7 @@ Mat& Net::Impl::argTensor(Arg arg) const
    if (adata.kind == DNN_ARG_TEMP) {
        CV_Assert(__tensors__.at(arg.idx).empty());
        int bufidx = bufidxs.at(arg.idx);
+        CV_Assert(bufidx >= 0);
        return const_cast<Mat&>(buffers.at(bufidx));
    }
    return const_cast<Mat&>(__tensors__.at(arg.idx));
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@ -773,7 +773,7 @@ public:
    }
 };

-void simplifySubgraphs(tensorflow::GraphDef& net)
+void simplifySubgraphs(tensorflow::GraphDef& net, bool newEngine)
 {
    std::vector<Ptr<Subgraph> > subgraphs;
    subgraphs.push_back(Ptr<Subgraph>(new BatchNormSubgraph()));
@ -791,7 +791,6 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
    subgraphs.push_back(Ptr<Subgraph>(new UpsamplingKerasSubgraph("ResizeBilinear")));
    subgraphs.push_back(Ptr<Subgraph>(new SoftMaxSlimSubgraph()));
    subgraphs.push_back(Ptr<Subgraph>(new SoftMaxSlimV2Subgraph()));
-    subgraphs.push_back(Ptr<Subgraph>(new ReshapeAsShapeSubgraph()));
    subgraphs.push_back(Ptr<Subgraph>(new KerasMVNSubgraph()));
    subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(true)));
    subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(false)));
@ -799,6 +798,9 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
    subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraphDown()));
    subgraphs.push_back(Ptr<Subgraph>(new ClipByValueSubgraph()));

+    if (!newEngine)
+        subgraphs.push_back(Ptr<Subgraph>(new ReshapeAsShapeSubgraph()));
+
    for (int i = 0; i < net.node_size(); ++i)
    {
        tensorflow::NodeDef* layer = net.mutable_node(i);
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp
@ -17,7 +17,7 @@ CV__DNN_INLINE_NS_BEGIN

 void RemoveIdentityOps(tensorflow::GraphDef& net);

-void simplifySubgraphs(tensorflow::GraphDef& net);
+void simplifySubgraphs(tensorflow::GraphDef& net, bool newEngine);

 Mat getTensorContent(const tensorflow::TensorProto& tensor, bool forceCopy = true);

--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -46,7 +46,7 @@ TEST(Test_TensorFlow, read_inception)
    Mat inputBlob = blobFromImage(input);

    net.setInput(inputBlob, "input");
-    Mat out = net.forward("softmax2");
+    Mat out = net.forward();

    std::cout << out.dims << std::endl;
 }
@ -66,7 +66,7 @@ TEST(Test_TensorFlow, inception_accuracy)
    Mat inputBlob = blobFromImage(sample, 1.0, Size(224, 224), Scalar(), /*swapRB*/true);

    net.setInput(inputBlob, "input");
-    Mat out = net.forward("softmax2");
+    Mat out = net.forward();

    Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));

@ -1812,7 +1812,12 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
    std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt");
    std::string model = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pb", false);

-    Net net = readNetFromTensorflow(model, proto);
+    // Mask-RCNN predicts bounding boxes and segmentation masks.
+    std::vector<std::string> outNames(2);
+    outNames[0] = "detection_out_final";
+    outNames[1] = "detection_masks";
+
+    Net net = readNetFromTensorflow(model, proto, ENGINE_AUTO, outNames);
    Mat refDetections = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
    Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy"));
    Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false);
@ -1824,14 +1829,8 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)

    net.setInput(blob);

-    // Mask-RCNN predicts bounding boxes and segmentation masks.
-    std::vector<String> outNames(2);
-    outNames[0] = "detection_out_final";
-    outNames[1] = "detection_masks";
-
    std::vector<Mat> outs;
    net.forward(outs, outNames);
-
    Mat outDetections = outs[0];
    Mat outMasks = outs[1];

@ -1940,4 +1939,24 @@ TEST(Test_TensorFlow_Importer, tf_graph_simplifier_buffer_overflow_21947)
    EXPECT_ANY_THROW(readNetFromTensorflow(reinterpret_cast<const char*>(payload), sizeof(payload) / sizeof(payload[0])));
 }

+TEST(Test_TF_Model, Inception_GetLayer)
+{
+    const std::string model = findDataFile("dnn/tensorflow_inception_graph.pb", false);
+    auto net = readNetFromTensorflow(model);
+
+    auto layernames = net.getLayerNames();
+
+    ASSERT_FALSE(layernames.empty());
+
+    // this is empirical initialization:
+    // * in the case of new engine the very first layer always has id == 0 for any model.
+    // * in the case of the old engine at least for this model the very first layer has id == 1
+    int layer_id = net.getLayer(0)->name != layernames[0];
+    for (auto name: layernames) {
+        auto layer = net.getLayer(layer_id);
+        EXPECT_EQ(layer->name, name);
+        layer_id++;
+    }
+}
+
 }