dnn: use inheritance for OpenVINO net impl

2025-06-11 11:45:30 +08:00 · 2022-03-07 22:26:20 +00:00 · 2022-03-07 22:26:20 +00:00 · ca7f964104
commit ca7f964104
parent b26fc6f31b
14 changed files with 413 additions and 133 deletions
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -52,6 +52,11 @@

 namespace cv {
 namespace dnn {
+
+namespace accessor {
+class DnnNetAccessor;  // forward declaration
+}
+
 CV__DNN_INLINE_NS_BEGIN
 //! @addtogroup dnn
 //! @{
@ -840,8 +845,12 @@ CV__DNN_INLINE_NS_BEGIN
         */
        CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings);

-    private:
+
        struct Impl;
+        inline Impl* getImpl() const { return impl.get(); }
+        inline Impl& getImplRef() const { CV_DbgAssert(impl); return *impl.get(); }
+        friend class accessor::DnnNetAccessor;
+    protected:
        Ptr<Impl> impl;
    };

--- a/modules/dnn/src/dnn_common.hpp
+++ b/modules/dnn/src/dnn_common.hpp
@ -156,6 +156,18 @@ static inline std::string toString(const Mat& blob, const std::string& name = st


 CV__DNN_INLINE_NS_END
+
+namespace accessor {
+class DnnNetAccessor
+{
+public:
+    static inline Ptr<Net::Impl>& getImplPtrRef(Net& net)
+    {
+        return net.impl;
+    }
+};
+}
+
 }}  // namespace

 #endif  // __OPENCV_DNN_COMMON_HPP__
--- a/modules/dnn/src/dnn_params.cpp
+++ b/modules/dnn/src/dnn_params.cpp
@ -36,11 +36,7 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()
 int getParam_DNN_BACKEND_DEFAULT()
 {
    static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
-#ifdef HAVE_INF_ENGINE
-            (size_t)DNN_BACKEND_INFERENCE_ENGINE
-#else
            (size_t)DNN_BACKEND_OPENCV
-#endif
    );
    return PARAM_DNN_BACKEND_DEFAULT;
 }
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -988,14 +988,6 @@ InferenceEngine::DataPtr ngraphDataOutputNode(
    return w.dataPtr;
 }

-void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                      Ptr<BackendNode>& node, bool isAsync)
-{
-    CV_Assert(!node.empty());
-    Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
-    CV_Assert(!ieNode.empty());
-    ieNode->net->forward(outBlobsWrappers, isAsync);
-}

 void InfEngineNgraphNet::reset()
 {
@ -1192,12 +1184,6 @@ void InfEngineNgraphNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlo
    }
 }

-#else
-void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                   Ptr<BackendNode>& node, bool isAsync)
-{
-    CV_Assert(false && "nGraph is not enabled in this OpenCV build");
-}
 #endif

 }}
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@ -158,9 +158,6 @@ private:

 #endif  // HAVE_DNN_NGRAPH

-void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                   Ptr<BackendNode>& node, bool isAsync);
-
 }}  // namespace cv::dnn


--- a/modules/dnn/src/layer_internals.hpp
+++ b/modules/dnn/src/layer_internals.hpp
@ -112,6 +112,30 @@ struct LayerData

        return layerInstance;
    }
+
+    void resetAllocation()
+    {
+        if (id == 0)
+            return;  // skip "input" layer (assertion in Net::Impl::allocateLayers)
+
+        layerInstance.release();
+        outputBlobs.clear();
+        inputBlobs.clear();
+        internals.clear();
+
+        outputBlobsWrappers.clear();
+        inputBlobsWrappers.clear();
+        internalBlobsWrappers.clear();
+
+        backendNodes.clear();
+
+        skip = false;
+        flag = 0;
+
+#ifdef HAVE_CUDA
+        cudaD2HBackgroundTransfers.clear();
+#endif
+    }
 };


--- a/modules/dnn/src/legacy_backend.cpp
+++ b/modules/dnn/src/legacy_backend.cpp
@ -75,11 +75,7 @@ Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
    }
    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
-#ifdef HAVE_DNN_NGRAPH
-        return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
-#else
-        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph");
-#endif
+        CV_Assert(0 && "Internal error: DNN_BACKEND_INFERENCE_ENGINE_NGRAPH must be implemented through inheritance");
    }
    else if (backendId == DNN_BACKEND_WEBNN)
    {
--- a/modules/dnn/src/net.cpp
+++ b/modules/dnn/src/net.cpp
@ -120,7 +120,7 @@ Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtyp
    CV_TRACE_FUNCTION();
    CV_Assert(impl);
    CV_Assert(!empty());
-    return impl->quantize(calibData, inputsDtype, outputsDtype, perChannel);
+    return impl->quantize(*this, calibData, inputsDtype, outputsDtype, perChannel);
 }

 // FIXIT drop from inference API
@ -146,7 +146,7 @@ void Net::setPreferableBackend(int backendId)
    CV_TRACE_FUNCTION();
    CV_TRACE_ARG(backendId);
    CV_Assert(impl);
-    return impl->setPreferableBackend(backendId);
+    return impl->setPreferableBackend(*this, backendId);
 }

 void Net::setPreferableTarget(int targetId)
--- a/modules/dnn/src/net_impl.cpp
+++ b/modules/dnn/src/net_impl.cpp
@ -30,6 +30,12 @@ std::string detail::NetImplBase::getDumpFileNameBase() const
 }


+Net::Impl::~Impl()
+{
+    // nothing
+}
+
+
 Net::Impl::Impl()
 {
    // allocate fake net input layer
@ -46,9 +52,8 @@ Net::Impl::Impl()
    netWasQuantized = false;
    fusion = true;
    isAsync = false;
-    preferableBackend = DNN_BACKEND_DEFAULT;
+    preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
    preferableTarget = DNN_TARGET_CPU;
-    skipInfEngineInit = false;
    hasDynamicShapes = false;
 }

@ -86,22 +91,10 @@ void Net::Impl::clear()
 }


-void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
+void Net::Impl::validateBackendAndTarget()
 {
    CV_TRACE_FUNCTION();

-    if (dumpLevel && networkDumpCounter == 0)
-    {
-        dumpNetworkToFile();
-    }
-
-    if (preferableBackend == DNN_BACKEND_DEFAULT)
-        preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
-#ifdef HAVE_INF_ENGINE
-    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
-        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
-#endif
-
    CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
              preferableTarget == DNN_TARGET_CPU ||
              preferableTarget == DNN_TARGET_OPENCL ||
@ -109,19 +102,6 @@ void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
    CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
              preferableTarget == DNN_TARGET_CPU ||
              preferableTarget == DNN_TARGET_OPENCL);
-#ifdef HAVE_INF_ENGINE
-    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-    {
-        CV_Assert(
-              (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) ||
-              preferableTarget == DNN_TARGET_OPENCL ||
-              preferableTarget == DNN_TARGET_OPENCL_FP16 ||
-              preferableTarget == DNN_TARGET_MYRIAD ||
-              preferableTarget == DNN_TARGET_HDDL ||
-              preferableTarget == DNN_TARGET_FPGA
-        );
-    }
-#endif
 #ifdef HAVE_WEBNN
    if (preferableBackend == DNN_BACKEND_WEBNN)
    {
@ -136,6 +116,20 @@ void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
    CV_Assert(preferableBackend != DNN_BACKEND_TIMVX ||
              preferableTarget == DNN_TARGET_NPU);

+    CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error");
+}
+
+void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
+{
+    CV_TRACE_FUNCTION();
+
+    if (dumpLevel && networkDumpCounter == 0)
+    {
+        dumpNetworkToFile();
+    }
+
+    validateBackendAndTarget();
+
    if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
    {
        if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
@ -813,12 +807,10 @@ void Net::Impl::forwardLayer(LayerData& ld)
            {
                forwardHalide(ld.outputBlobsWrappers, node);
            }
-#ifdef HAVE_INF_ENGINE
            else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
            {
-                forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
+                CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error");
            }
-#endif
            else if (preferableBackend == DNN_BACKEND_WEBNN)
            {
                forwardWebnn(ld.outputBlobsWrappers, node, isAsync);
@ -844,7 +836,7 @@ void Net::Impl::forwardLayer(LayerData& ld)
 #endif
            else
            {
-                CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
+                CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend));
            }
        }

@ -1369,30 +1361,7 @@ Mat Net::Impl::getBlob(String outputName) const
 AsyncArray Net::Impl::getBlobAsync(const LayerPin& pin)
 {
    CV_TRACE_FUNCTION();
-#ifdef HAVE_INF_ENGINE
-    if (!pin.valid())
-        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
-
-    LayerData& ld = layers[pin.lid];
-    if ((size_t)pin.oid >= ld.outputBlobs.size())
-    {
-        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
-                                              "the #%d was requested",
-                                               ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid));
-    }
-    if (preferableTarget != DNN_TARGET_CPU)
-    {
-        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
-        // Transfer data to CPU if it's require.
-        ld.outputBlobsWrappers[pin.oid]->copyToHost();
-    }
-    CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-
-    Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
-    return std::move(wrapper->futureMat);
-#else
    CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required");
-#endif  // HAVE_INF_ENGINE
 }


--- a/modules/dnn/src/net_impl.hpp
+++ b/modules/dnn/src/net_impl.hpp
@ -38,7 +38,12 @@ struct Net::Impl : public detail::NetImplBase
    typedef std::map<int, LayerShapes> LayersShapesMap;
    typedef std::map<int, LayerData> MapIdToLayerData;

+    virtual ~Impl();
    Impl();
+    Impl(const Impl&) = delete;
+
+    // Inheritance support
+    Ptr<Net::Impl> basePtr_;

    Ptr<DataLayer> netInputLayer;
    std::vector<LayerPin> blobsToKeep;
@ -49,7 +54,7 @@ struct Net::Impl : public detail::NetImplBase
    int preferableBackend;
    int preferableTarget;
    String halideConfigFile;
-    bool skipInfEngineInit;
+//    bool skipInfEngineInit;
    bool hasDynamicShapes;
    // Map host data to backend specific wrapper.
    std::map<void*, Ptr<BackendWrapper>> backendWrappers;
@ -59,19 +64,22 @@ struct Net::Impl : public detail::NetImplBase
    bool netWasAllocated;
    bool netWasQuantized;
    bool fusion;
-    bool isAsync;
+    bool isAsync;  // FIXIT: drop
    std::vector<int64> layersTimings;


-    bool empty() const;
-    void setPreferableBackend(int backendId);
-    void setPreferableTarget(int targetId);
+    virtual bool empty() const;
+    virtual void setPreferableBackend(Net& net, int backendId);
+    virtual void setPreferableTarget(int targetId);

    // FIXIT use inheritance
-    Ptr<BackendWrapper> wrap(Mat& host);
+    virtual Ptr<BackendWrapper> wrap(Mat& host);


-    void clear();
+    virtual void clear();
+
+
+    virtual void validateBackendAndTarget();

    void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>());

@ -118,7 +126,7 @@ struct Net::Impl : public detail::NetImplBase

    void setInputsNames(const std::vector<String>& inputBlobNames);
    void setInputShape(const String& inputName, const MatShape& shape);
-    void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean);
+    virtual void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean);
    Mat getParam(int layer, int numParam) const;
    void setParam(int layer, int numParam, const Mat& blob);
    std::vector<Ptr<Layer>> getLayerInputs(int layerId) const;
@ -130,8 +138,7 @@ struct Net::Impl : public detail::NetImplBase
    int getLayersCount(const String& layerType) const;


-    // FIXIT use inheritance
-    void initBackend(const std::vector<LayerPin>& blobsToKeep_);
+    virtual void initBackend(const std::vector<LayerPin>& blobsToKeep_);

    void setHalideScheduler(const String& scheduler);
 #ifdef HAVE_HALIDE
@ -139,11 +146,6 @@ struct Net::Impl : public detail::NetImplBase
    void initHalideBackend();
 #endif

-#ifdef HAVE_DNN_NGRAPH
-    void addNgraphOutputs(LayerData& ld);
-    void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_);
-#endif
-
 #ifdef HAVE_WEBNN
    void addWebnnOutputs(LayerData& ld);
    void initWebnnBackend(const std::vector<LayerPin>& blobsToKeep_);
@ -183,11 +185,11 @@ struct Net::Impl : public detail::NetImplBase
    // TODO add getter
    void enableFusion(bool fusion_);

-    void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
+    virtual void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);

    void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);

-    void forwardLayer(LayerData& ld);
+    virtual void forwardLayer(LayerData& ld);

    void forwardToLayer(LayerData& ld, bool clearFlags = true);

@ -243,22 +245,17 @@ struct Net::Impl : public detail::NetImplBase
    Mat getBlob(String outputName) const;

 #ifdef CV_CXX11
-    AsyncArray getBlobAsync(const LayerPin& pin);
+    virtual AsyncArray getBlobAsync(const LayerPin& pin);

    AsyncArray getBlobAsync(String outputName);
 #endif  // CV_CXX11

-#ifdef HAVE_INF_ENGINE
-    static
-    Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
-#endif
-
    string dump(bool forceAllocation = false) const;

    void dumpNetworkToFile() const;

    // FIXIT drop from inference API
-    Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel) /*const*/;
+    Net quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel) /*const*/;
    void getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;
    void getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;

--- a/modules/dnn/src/net_impl_backend.cpp
+++ b/modules/dnn/src/net_impl_backend.cpp
@ -109,11 +109,7 @@ void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
    }
    else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
-#ifdef HAVE_DNN_NGRAPH
-        initNgraphBackend(blobsToKeep_);
-#else
-        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO");
-#endif
+        CV_Assert(0 && "Inheritance must be used with OpenVINO backend");
    }
    else if (preferableBackend == DNN_BACKEND_WEBNN)
    {
@ -154,26 +150,30 @@ void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
 }


-void Net::Impl::setPreferableBackend(int backendId)
+void Net::Impl::setPreferableBackend(Net& net, int backendId)
 {
    if (backendId == DNN_BACKEND_DEFAULT)
        backendId = (Backend)getParam_DNN_BACKEND_DEFAULT();

+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
+        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
+
    if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX)
    {
        CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks");
        backendId = DNN_BACKEND_OPENCV;
    }

-#ifdef HAVE_INF_ENGINE
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
-        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
-#endif
-
    if (preferableBackend != backendId)
    {
        preferableBackend = backendId;
        clear();
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            switchToOpenVINOBackend(net);
+        }
+#endif
    }
 }

--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@ -17,11 +17,205 @@ CV__DNN_INLINE_NS_BEGIN

 #ifdef HAVE_INF_ENGINE

+// TODO: use "string" target specifier
+class NetImplOpenVINO CV_FINAL : public Net::Impl
+{
+public:
+    typedef Net::Impl Base;
+
+    // this default constructor is used with OpenVINO native loader
+    // TODO: dedicated Impl?
+    NetImplOpenVINO()
+        : Net::Impl()
+    {
+        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+    }
+
+    // constructor to derive execution implementation from the loaded network
+    explicit NetImplOpenVINO(const Ptr<Net::Impl>& basePtr)
+        : Net::Impl()
+    {
+        basePtr_ = basePtr;
+        init();
+    }
+
+    void init()
+    {
+        CV_TRACE_FUNCTION();
+        CV_Assert(basePtr_);
+        Net::Impl& base = *basePtr_;
+        CV_Assert(!base.netWasAllocated);
+        CV_Assert(!base.netWasQuantized);
+        netInputLayer = base.netInputLayer;
+        blobsToKeep = base.blobsToKeep;
+        layers = base.layers;
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
+        {
+            LayerData& ld = it->second;
+            ld.resetAllocation();
+        }
+        layerNameToId = base.layerNameToId;
+        outputNameToId = base.outputNameToId;
+        //blobManager = base.blobManager;
+        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  //base.preferableBackend;
+        preferableTarget = base.preferableTarget;
+        hasDynamicShapes = base.hasDynamicShapes;
+        CV_Assert(base.backendWrappers.empty());  //backendWrappers = base.backendWrappers;
+        lastLayerId = base.lastLayerId;
+        netWasAllocated = base.netWasAllocated;
+        netWasQuantized = base.netWasQuantized;
+        fusion = base.fusion;
+    }
+
+
+    //bool isAsync;  // FIXIT: drop
+
+
+    bool empty() const override
+    {
+        return Base::empty();
+    }
+    void setPreferableBackend(Net& net, int backendId) override
+    {
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return;  // no-op
+        if (!basePtr_)
+            CV_Error(Error::StsError, "DNN: Can't switch backend of network created by OpenVINO");
+        Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net);
+        impl_ptr_ref = basePtr_;
+        return basePtr_->setPreferableBackend(net, backendId);
+    }
+    void setPreferableTarget(int targetId) override
+    {
+        if (preferableTarget != targetId)
+        {
+            preferableTarget = targetId;
+            clear();
+        }
+    }
+
+    Ptr<BackendWrapper> wrap(Mat& host) override
+    {
+        return Ptr<BackendWrapper>(new NgraphBackendWrapper(preferableTarget, host));
+    }
+
+
+    void clear() override
+    {
+        Base::clear();
+    }
+
+    void validateBackendAndTarget() override
+    {
+        CV_TRACE_FUNCTION();
+
+        CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
+        CV_Check((int)preferableTarget,
+              preferableTarget == DNN_TARGET_CPU ||
+              preferableTarget == DNN_TARGET_OPENCL ||
+              preferableTarget == DNN_TARGET_OPENCL_FP16 ||
+              preferableTarget == DNN_TARGET_MYRIAD ||
+              preferableTarget == DNN_TARGET_HDDL ||
+              preferableTarget == DNN_TARGET_FPGA,
+              "Unknown OpenVINO target"
+        );
+    }
+
+    //void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>()) override;
+
+
+    //void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) override;
+
+    void addNgraphOutputs(LayerData& ld);
+
+    void initBackend(const std::vector<LayerPin>& blobsToKeep_) override;
+
+    void fuseLayers(const std::vector<LayerPin>& blobsToKeep_) override;
+
+    //void allocateLayers(const std::vector<LayerPin>& blobsToKeep_) override;
+
+    void forwardLayer(LayerData& ld) override;
+
+    AsyncArray getBlobAsync(const LayerPin& pin) override;
+
+    //string dump(bool forceAllocation = false) const override;
+
+    static
+    Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
+
+};  // NetImplOpenVINO
+
+
+void NetImplOpenVINO::forwardLayer(LayerData& ld)
+{
+    CV_TRACE_FUNCTION();
+
+    Ptr<Layer> layer = ld.layerInstance;
+
+    if (!ld.skip)
+    {
+        auto it = ld.backendNodes.find(preferableBackend);
+        if (ld.id == 0 ||  // input layer
+            it == ld.backendNodes.end()  // non-supported layer or its mode
+        )
+        {
+            return Base::forwardLayer(ld);
+        }
+
+        CV_Assert(it != ld.backendNodes.end());
+        const Ptr<BackendNode>& node = it->second;
+        CV_Assert(!node.empty());
+        Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
+        CV_Assert(!ieNode.empty());
+        CV_Assert(ieNode->net);
+
+        TickMeter tm;
+        tm.start();
+
+        ieNode->net->forward(ld.outputBlobsWrappers, isAsync);
+
+        tm.stop();
+        int64 t = tm.getTimeTicks();
+        layersTimings[ld.id] = (t > 0) ? t : t + 1;  // zero for skipped layers only
+    }
+    else
+    {
+        layersTimings[ld.id] = 0;
+    }
+
+    ld.flag = 1;
+}
+
+AsyncArray NetImplOpenVINO::getBlobAsync(const LayerPin& pin)
+{
+    CV_TRACE_FUNCTION();
+    if (!pin.valid())
+        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
+
+    LayerData& ld = layers[pin.lid];
+    if ((size_t)pin.oid >= ld.outputBlobs.size())
+    {
+        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
+                                              "the #%d was requested",
+                                               ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid));
+    }
+    if (preferableTarget != DNN_TARGET_CPU)
+    {
+        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
+        // Transfer data to CPU if it's require.
+        ld.outputBlobsWrappers[pin.oid]->copyToHost();
+    }
+    CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
+
+    Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
+    return std::move(wrapper->futureMat);
+}
+

 /** mark input pins as outputs from other subnetworks
 * FIXIT must be done by DNN engine not ngraph.
 */
-void Net::Impl::addNgraphOutputs(LayerData& ld)
+void NetImplOpenVINO::addNgraphOutputs(LayerData& ld)
 {
    CV_TRACE_FUNCTION();

@ -59,7 +253,7 @@ void Net::Impl::addNgraphOutputs(LayerData& ld)
    }
 }

-void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
+void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
 {
    CV_TRACE_FUNCTION();
    CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
@ -92,7 +286,7 @@ void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
        }
    }

-    if (skipInfEngineInit)
+    if (!basePtr_)  // model is loaded by OpenVINO
    {
        Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
        CV_Assert(!node.empty());
@ -399,10 +593,104 @@ void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
    }
 }

-//}  // Net::Impl
+
+#if 0
+#define printf_(args) printf args
+#else
+#define printf_(args)
+#endif
+
+void NetImplOpenVINO::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
+{
+    CV_TRACE_FUNCTION();
+
+    if(!fusion)
+       return;
+
+    CV_Check((int)preferableBackend, preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
+
+#if 0  // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes
+    return;
+#endif
+
+    // scan through all the layers. If there is convolution layer followed by the activation layer,
+    // we try to embed this activation into the convolution and disable separate execution of the activation
+
+    // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)"
+    std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
+                                  blobsToKeep_.end());
+    for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
+    {
+        int lid = it->first;
+        LayerData& ld = layers[lid];
+        if (ld.skip)
+        {
+            printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
+            continue;
+        }
+        printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
+
+        // the optimization #1. try to fuse batch norm, scaling and/or activation layers
+        // with the current layer if they follow it. Normally, the are fused with the convolution layer,
+        // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
+        // some other layers.
+        Ptr<Layer>& currLayer = ld.layerInstance;
+        if (ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0)
+        {
+            LayerData* nextData = &layers[ld.consumers[0].lid];
+            LayerPin lpNext(ld.consumers[0].lid, 0);
+            while (nextData)
+            {
+                if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0)
+                {
+                    CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type);
+                    break;
+                }
+
+                /* we use `tryFuse` member of convolution layer to fuse eltwise later
+                 * it's not intended to be fused here; hence, we stop when we encounter eltwise
+                 */
+                Ptr<Layer> nextLayer = nextData->layerInstance;
+                if (currLayer->tryFuse(nextLayer))
+                {
+                    printf_(("\tfused with %s\n", nextLayer->name.c_str()));
+                    nextData->skip = true;
+                    ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+                    ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
+                    if (nextData->consumers.size() == 1)
+                    {
+                        int nextLayerId = nextData->consumers[0].lid;
+                        nextData = &layers[nextLayerId];
+                        lpNext = LayerPin(nextLayerId, 0);
+                    }
+                    else
+                    {
+                        nextData = 0;
+                        break;
+                    }
+                }
+                else
+                    break;
+            }
+        }
+    }
+}
+
+
+
+void switchToOpenVINOBackend(Net& net)
+{
+    CV_TRACE_FUNCTION();
+    CV_LOG_INFO(NULL, "DNN: switching to OpenVINO backend...");
+    Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net);
+    Ptr<NetImplOpenVINO> openvino_impl_ptr = makePtr<NetImplOpenVINO>(impl_ptr_ref);
+    impl_ptr_ref = openvino_impl_ptr;
+}
+
+

 /*static*/
-Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
+Net NetImplOpenVINO::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
 {
    CV_TRACE_FUNCTION();

@ -418,6 +706,10 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
    }

    Net cvNet;
+    Ptr<NetImplOpenVINO> openvino_impl_ptr = makePtr<NetImplOpenVINO>();
+    NetImplOpenVINO& openvino_impl = *openvino_impl_ptr;
+    accessor::DnnNetAccessor::getImplPtrRef(cvNet) = openvino_impl_ptr;
+
    cvNet.setInputsNames(inputsNames);

    // set empty input to determine input shapes
@ -432,7 +724,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
    {
        auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape {});
        Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
-        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
+        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(openvino_impl, ieNet));
        backendNode = backendNodeNGraph;
    }

@ -450,7 +742,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
        LayerParams lp;
        int lid = cvNet.addLayer(it.first, "", lp);

-        LayerData& ld = cvNet.impl->layers[lid];
+        LayerData& ld = openvino_impl.layers[lid];

        {
            Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
@ -498,7 +790,6 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe

    cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);

-    cvNet.impl->skipInfEngineInit = true;
    return cvNet;
 }
 #endif  // HAVE_INF_ENGINE
@ -516,7 +807,7 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
    InferenceEngine::Core& ie = getCore("");
    InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);

-    return Impl::createNetworkFromModelOptimizer(ieNet);
+    return NetImplOpenVINO::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
 }

@ -560,7 +851,7 @@ Net Net::readFromModelOptimizer(
        CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
    }

-    return Impl::createNetworkFromModelOptimizer(ieNet);
+    return NetImplOpenVINO::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
 }

--- a/modules/dnn/src/net_quantization.cpp
+++ b/modules/dnn/src/net_quantization.cpp
@ -33,7 +33,7 @@ void getQuantizationParams(const Mat& src, std::vector<float>& scales, std::vect
 }

 // FIXIT drop from inference API
-Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
+Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
 {
    // Net can be quantized only once.
    if (netWasQuantized)
@ -47,7 +47,8 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
    int prefTarget = preferableTarget;

    // Disable fusions and use CPU backend to quantize net
-    setPreferableBackend(DNN_BACKEND_OPENCV);
+    // FIXIT: we should not modify original network!
+    setPreferableBackend(net, DNN_BACKEND_OPENCV);
    setPreferableTarget(DNN_TARGET_CPU);
    enableFusion(false);

@ -163,7 +164,7 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
    Net::Impl& dstNet = *(dstNet_.impl);
    dstNet.netWasQuantized = true;
    dstNet.setInputsNames(netInputLayer->outNames);
-    dstNet.setPreferableBackend(prefBackend);
+    dstNet.setPreferableBackend(dstNet_, prefBackend);
    dstNet.setPreferableTarget(prefTarget);
    dstNet.enableFusion(originalFusion);

@ -253,7 +254,7 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
        }
    }
    // Restore FP32 Net's backend, target and fusion
-    setPreferableBackend(prefBackend);
+    setPreferableBackend(net, prefBackend);
    setPreferableTarget(prefTarget);
    enableFusion(originalFusion);
    return dstNet_;
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@ -73,6 +73,8 @@ void infEngineBlobsToMats(const std::vector<InferenceEngine::Blob::Ptr>& blobs,

 CV__DNN_INLINE_NS_BEGIN

+void switchToOpenVINOBackend(Net& net);
+
 namespace openvino {

 // TODO: use std::string as parameter