dnn: use inheritance for OpenVINO net impl

2025-06-08 01:53:19 +08:00 · 2022-03-07 22:26:20 +00:00 · 2022-03-07 22:26:20 +00:00 · ca7f964104
commit ca7f964104
parent b26fc6f31b
14 changed files with 413 additions and 133 deletions
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -52,6 +52,11 @@
 namespace cv {
 namespace dnn {
 namespace accessor {
 class DnnNetAccessor;  // forward declaration
 }
 CV__DNN_INLINE_NS_BEGIN
 //! @addtogroup dnn
 //! @{
@ -840,8 +845,12 @@ CV__DNN_INLINE_NS_BEGIN
         */
        CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings);
-    private:
+
        struct Impl;
        inline Impl* getImpl() const { return impl.get(); }
        inline Impl& getImplRef() const { CV_DbgAssert(impl); return *impl.get(); }
        friend class accessor::DnnNetAccessor;
    protected:
        Ptr<Impl> impl;
    };
--- a/modules/dnn/src/dnn_common.hpp
+++ b/modules/dnn/src/dnn_common.hpp
@ -156,6 +156,18 @@ static inline std::string toString(const Mat& blob, const std::string& name = st
 CV__DNN_INLINE_NS_END
 namespace accessor {
 class DnnNetAccessor
 {
 public:
    static inline Ptr<Net::Impl>& getImplPtrRef(Net& net)
    {
        return net.impl;
    }
 };
 }
 }}  // namespace
 #endif  // __OPENCV_DNN_COMMON_HPP__
--- a/modules/dnn/src/dnn_params.cpp
+++ b/modules/dnn/src/dnn_params.cpp
@ -36,11 +36,7 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()
 int getParam_DNN_BACKEND_DEFAULT()
 {
    static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
 #ifdef HAVE_INF_ENGINE
            (size_t)DNN_BACKEND_INFERENCE_ENGINE
 #else
            (size_t)DNN_BACKEND_OPENCV
 #endif
    );
    return PARAM_DNN_BACKEND_DEFAULT;
 }
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -988,14 +988,6 @@ InferenceEngine::DataPtr ngraphDataOutputNode(
    return w.dataPtr;
 }
 void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                      Ptr<BackendNode>& node, bool isAsync)
 {
    CV_Assert(!node.empty());
    Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
    CV_Assert(!ieNode.empty());
    ieNode->net->forward(outBlobsWrappers, isAsync);
 }
 void InfEngineNgraphNet::reset()
 {
@ -1192,12 +1184,6 @@ void InfEngineNgraphNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlo
    }
 }
 #else
 void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                   Ptr<BackendNode>& node, bool isAsync)
 {
    CV_Assert(false && "nGraph is not enabled in this OpenCV build");
 }
 #endif
 }}
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@ -158,9 +158,6 @@ private:
 #endif  // HAVE_DNN_NGRAPH
 void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                   Ptr<BackendNode>& node, bool isAsync);
 }}  // namespace cv::dnn
--- a/modules/dnn/src/layer_internals.hpp
+++ b/modules/dnn/src/layer_internals.hpp
@ -112,6 +112,30 @@ struct LayerData
        return layerInstance;
    }
    void resetAllocation()
    {
        if (id == 0)
            return;  // skip "input" layer (assertion in Net::Impl::allocateLayers)
        layerInstance.release();
        outputBlobs.clear();
        inputBlobs.clear();
        internals.clear();
        outputBlobsWrappers.clear();
        inputBlobsWrappers.clear();
        internalBlobsWrappers.clear();
        backendNodes.clear();
        skip = false;
        flag = 0;
 #ifdef HAVE_CUDA
        cudaD2HBackgroundTransfers.clear();
 #endif
    }
 };
--- a/modules/dnn/src/legacy_backend.cpp
+++ b/modules/dnn/src/legacy_backend.cpp
@ -75,11 +75,7 @@ Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
    }
    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
-#ifdef HAVE_DNN_NGRAPH
+        CV_Assert(0 && "Internal error: DNN_BACKEND_INFERENCE_ENGINE_NGRAPH must be implemented through inheritance");
        return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
 #else
        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph");
 #endif
    }
    else if (backendId == DNN_BACKEND_WEBNN)
    {
--- a/modules/dnn/src/net.cpp
+++ b/modules/dnn/src/net.cpp
@ -120,7 +120,7 @@ Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtyp
    CV_TRACE_FUNCTION();
    CV_Assert(impl);
    CV_Assert(!empty());
-    return impl->quantize(calibData, inputsDtype, outputsDtype, perChannel);
+    return impl->quantize(*this, calibData, inputsDtype, outputsDtype, perChannel);
 }
 // FIXIT drop from inference API
@ -146,7 +146,7 @@ void Net::setPreferableBackend(int backendId)
    CV_TRACE_FUNCTION();
    CV_TRACE_ARG(backendId);
    CV_Assert(impl);
-    return impl->setPreferableBackend(backendId);
+    return impl->setPreferableBackend(*this, backendId);
 }
 void Net::setPreferableTarget(int targetId)
--- a/modules/dnn/src/net_impl.cpp
+++ b/modules/dnn/src/net_impl.cpp
@ -30,6 +30,12 @@ std::string detail::NetImplBase::getDumpFileNameBase() const
 }
 Net::Impl::~Impl()
 {
    // nothing
 }
 Net::Impl::Impl()
 {
    // allocate fake net input layer
@ -46,9 +52,8 @@ Net::Impl::Impl()
    netWasQuantized = false;
    fusion = true;
    isAsync = false;
-    preferableBackend = DNN_BACKEND_DEFAULT;
+    preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
    preferableTarget = DNN_TARGET_CPU;
    skipInfEngineInit = false;
    hasDynamicShapes = false;
 }
@ -86,22 +91,10 @@ void Net::Impl::clear()
 }
-void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
+void Net::Impl::validateBackendAndTarget()
 {
    CV_TRACE_FUNCTION();
    if (dumpLevel && networkDumpCounter == 0)
    {
        dumpNetworkToFile();
    }
    if (preferableBackend == DNN_BACKEND_DEFAULT)
        preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
 #ifdef HAVE_INF_ENGINE
    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
 #endif
    CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
              preferableTarget == DNN_TARGET_CPU ||
              preferableTarget == DNN_TARGET_OPENCL ||
@ -109,19 +102,6 @@ void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
    CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
              preferableTarget == DNN_TARGET_CPU ||
              preferableTarget == DNN_TARGET_OPENCL);
 #ifdef HAVE_INF_ENGINE
    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
        CV_Assert(
              (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) ||
              preferableTarget == DNN_TARGET_OPENCL ||
              preferableTarget == DNN_TARGET_OPENCL_FP16 ||
              preferableTarget == DNN_TARGET_MYRIAD ||
              preferableTarget == DNN_TARGET_HDDL ||
              preferableTarget == DNN_TARGET_FPGA
        );
    }
 #endif
 #ifdef HAVE_WEBNN
    if (preferableBackend == DNN_BACKEND_WEBNN)
    {
@ -136,6 +116,20 @@ void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
    CV_Assert(preferableBackend != DNN_BACKEND_TIMVX ||
              preferableTarget == DNN_TARGET_NPU);
    CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error");
 }
 void Net::Impl::setUpNet(const std::vector<LayerPin>& blobsToKeep_)
 {
    CV_TRACE_FUNCTION();
    if (dumpLevel && networkDumpCounter == 0)
    {
        dumpNetworkToFile();
    }
    validateBackendAndTarget();
    if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
    {
        if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
@ -813,12 +807,10 @@ void Net::Impl::forwardLayer(LayerData& ld)
            {
                forwardHalide(ld.outputBlobsWrappers, node);
            }
 #ifdef HAVE_INF_ENGINE
            else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
            {
-                forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
+                CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error");
            }
 #endif
            else if (preferableBackend == DNN_BACKEND_WEBNN)
            {
                forwardWebnn(ld.outputBlobsWrappers, node, isAsync);
@ -844,7 +836,7 @@ void Net::Impl::forwardLayer(LayerData& ld)
 #endif
            else
            {
-                CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
+                CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend));
            }
        }
@ -1369,30 +1361,7 @@ Mat Net::Impl::getBlob(String outputName) const
 AsyncArray Net::Impl::getBlobAsync(const LayerPin& pin)
 {
    CV_TRACE_FUNCTION();
 #ifdef HAVE_INF_ENGINE
    if (!pin.valid())
        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
    LayerData& ld = layers[pin.lid];
    if ((size_t)pin.oid >= ld.outputBlobs.size())
    {
        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
                                              "the #%d was requested",
                                               ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid));
    }
    if (preferableTarget != DNN_TARGET_CPU)
    {
        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
        // Transfer data to CPU if it's require.
        ld.outputBlobsWrappers[pin.oid]->copyToHost();
    }
    CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
    Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
    return std::move(wrapper->futureMat);
 #else
    CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required");
 #endif  // HAVE_INF_ENGINE
 }
--- a/modules/dnn/src/net_impl.hpp
+++ b/modules/dnn/src/net_impl.hpp
@ -38,7 +38,12 @@ struct Net::Impl : public detail::NetImplBase
    typedef std::map<int, LayerShapes> LayersShapesMap;
    typedef std::map<int, LayerData> MapIdToLayerData;
    virtual ~Impl();
    Impl();
    Impl(const Impl&) = delete;
    // Inheritance support
    Ptr<Net::Impl> basePtr_;
    Ptr<DataLayer> netInputLayer;
    std::vector<LayerPin> blobsToKeep;
@ -49,7 +54,7 @@ struct Net::Impl : public detail::NetImplBase
    int preferableBackend;
    int preferableTarget;
    String halideConfigFile;
-    bool skipInfEngineInit;
+//    bool skipInfEngineInit;
    bool hasDynamicShapes;
    // Map host data to backend specific wrapper.
    std::map<void*, Ptr<BackendWrapper>> backendWrappers;
@ -59,19 +64,22 @@ struct Net::Impl : public detail::NetImplBase
    bool netWasAllocated;
    bool netWasQuantized;
    bool fusion;
-    bool isAsync;
+    bool isAsync;  // FIXIT: drop
    std::vector<int64> layersTimings;
-    bool empty() const;
+    virtual bool empty() const;
-    void setPreferableBackend(int backendId);
+    virtual void setPreferableBackend(Net& net, int backendId);
-    void setPreferableTarget(int targetId);
+    virtual void setPreferableTarget(int targetId);
    // FIXIT use inheritance
-    Ptr<BackendWrapper> wrap(Mat& host);
+    virtual Ptr<BackendWrapper> wrap(Mat& host);
-    void clear();
+    virtual void clear();
    virtual void validateBackendAndTarget();
    void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>());
@ -118,7 +126,7 @@ struct Net::Impl : public detail::NetImplBase
    void setInputsNames(const std::vector<String>& inputBlobNames);
    void setInputShape(const String& inputName, const MatShape& shape);
-    void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean);
+    virtual void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean);
    Mat getParam(int layer, int numParam) const;
    void setParam(int layer, int numParam, const Mat& blob);
    std::vector<Ptr<Layer>> getLayerInputs(int layerId) const;
@ -130,8 +138,7 @@ struct Net::Impl : public detail::NetImplBase
    int getLayersCount(const String& layerType) const;
-    // FIXIT use inheritance
+    virtual void initBackend(const std::vector<LayerPin>& blobsToKeep_);
    void initBackend(const std::vector<LayerPin>& blobsToKeep_);
    void setHalideScheduler(const String& scheduler);
 #ifdef HAVE_HALIDE
@ -139,11 +146,6 @@ struct Net::Impl : public detail::NetImplBase
    void initHalideBackend();
 #endif
 #ifdef HAVE_DNN_NGRAPH
    void addNgraphOutputs(LayerData& ld);
    void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_);
 #endif
 #ifdef HAVE_WEBNN
    void addWebnnOutputs(LayerData& ld);
    void initWebnnBackend(const std::vector<LayerPin>& blobsToKeep_);
@ -183,11 +185,11 @@ struct Net::Impl : public detail::NetImplBase
    // TODO add getter
    void enableFusion(bool fusion_);
-    void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
+    virtual void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
    void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);
-    void forwardLayer(LayerData& ld);
+    virtual void forwardLayer(LayerData& ld);
    void forwardToLayer(LayerData& ld, bool clearFlags = true);
@ -243,22 +245,17 @@ struct Net::Impl : public detail::NetImplBase
    Mat getBlob(String outputName) const;
 #ifdef CV_CXX11
-    AsyncArray getBlobAsync(const LayerPin& pin);
+    virtual AsyncArray getBlobAsync(const LayerPin& pin);
    AsyncArray getBlobAsync(String outputName);
 #endif  // CV_CXX11
 #ifdef HAVE_INF_ENGINE
    static
    Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
 #endif
    string dump(bool forceAllocation = false) const;
    void dumpNetworkToFile() const;
    // FIXIT drop from inference API
-    Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel) /*const*/;
+    Net quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel) /*const*/;
    void getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;
    void getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;
--- a/modules/dnn/src/net_impl_backend.cpp
+++ b/modules/dnn/src/net_impl_backend.cpp
@ -109,11 +109,7 @@ void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
    }
    else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
-#ifdef HAVE_DNN_NGRAPH
+        CV_Assert(0 && "Inheritance must be used with OpenVINO backend");
        initNgraphBackend(blobsToKeep_);
 #else
        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO");
 #endif
    }
    else if (preferableBackend == DNN_BACKEND_WEBNN)
    {
@ -154,26 +150,30 @@ void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
 }
-void Net::Impl::setPreferableBackend(int backendId)
+void Net::Impl::setPreferableBackend(Net& net, int backendId)
 {
    if (backendId == DNN_BACKEND_DEFAULT)
        backendId = (Backend)getParam_DNN_BACKEND_DEFAULT();
    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
    if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX)
    {
        CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks");
        backendId = DNN_BACKEND_OPENCV;
    }
 #ifdef HAVE_INF_ENGINE
    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
 #endif
    if (preferableBackend != backendId)
    {
        preferableBackend = backendId;
        clear();
 #ifdef HAVE_INF_ENGINE
        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
        {
            switchToOpenVINOBackend(net);
        }
 #endif
    }
 }
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@ -17,11 +17,205 @@ CV__DNN_INLINE_NS_BEGIN
 #ifdef HAVE_INF_ENGINE
 // TODO: use "string" target specifier
 class NetImplOpenVINO CV_FINAL : public Net::Impl
 {
 public:
    typedef Net::Impl Base;
    // this default constructor is used with OpenVINO native loader
    // TODO: dedicated Impl?
    NetImplOpenVINO()
        : Net::Impl()
    {
        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }
    // constructor to derive execution implementation from the loaded network
    explicit NetImplOpenVINO(const Ptr<Net::Impl>& basePtr)
        : Net::Impl()
    {
        basePtr_ = basePtr;
        init();
    }
    void init()
    {
        CV_TRACE_FUNCTION();
        CV_Assert(basePtr_);
        Net::Impl& base = *basePtr_;
        CV_Assert(!base.netWasAllocated);
        CV_Assert(!base.netWasQuantized);
        netInputLayer = base.netInputLayer;
        blobsToKeep = base.blobsToKeep;
        layers = base.layers;
        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
        {
            LayerData& ld = it->second;
            ld.resetAllocation();
        }
        layerNameToId = base.layerNameToId;
        outputNameToId = base.outputNameToId;
        //blobManager = base.blobManager;
        preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  //base.preferableBackend;
        preferableTarget = base.preferableTarget;
        hasDynamicShapes = base.hasDynamicShapes;
        CV_Assert(base.backendWrappers.empty());  //backendWrappers = base.backendWrappers;
        lastLayerId = base.lastLayerId;
        netWasAllocated = base.netWasAllocated;
        netWasQuantized = base.netWasQuantized;
        fusion = base.fusion;
    }
    //bool isAsync;  // FIXIT: drop
    bool empty() const override
    {
        return Base::empty();
    }
    void setPreferableBackend(Net& net, int backendId) override
    {
        if (backendId == DNN_BACKEND_INFERENCE_ENGINE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
            return;  // no-op
        if (!basePtr_)
            CV_Error(Error::StsError, "DNN: Can't switch backend of network created by OpenVINO");
        Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net);
        impl_ptr_ref = basePtr_;
        return basePtr_->setPreferableBackend(net, backendId);
    }
    void setPreferableTarget(int targetId) override
    {
        if (preferableTarget != targetId)
        {
            preferableTarget = targetId;
            clear();
        }
    }
    Ptr<BackendWrapper> wrap(Mat& host) override
    {
        return Ptr<BackendWrapper>(new NgraphBackendWrapper(preferableTarget, host));
    }
    void clear() override
    {
        Base::clear();
    }
    void validateBackendAndTarget() override
    {
        CV_TRACE_FUNCTION();
        CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
        CV_Check((int)preferableTarget,
              preferableTarget == DNN_TARGET_CPU ||
              preferableTarget == DNN_TARGET_OPENCL ||
              preferableTarget == DNN_TARGET_OPENCL_FP16 ||
              preferableTarget == DNN_TARGET_MYRIAD ||
              preferableTarget == DNN_TARGET_HDDL ||
              preferableTarget == DNN_TARGET_FPGA,
              "Unknown OpenVINO target"
        );
    }
    //void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>()) override;
    //void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) override;
    void addNgraphOutputs(LayerData& ld);
    void initBackend(const std::vector<LayerPin>& blobsToKeep_) override;
    void fuseLayers(const std::vector<LayerPin>& blobsToKeep_) override;
    //void allocateLayers(const std::vector<LayerPin>& blobsToKeep_) override;
    void forwardLayer(LayerData& ld) override;
    AsyncArray getBlobAsync(const LayerPin& pin) override;
    //string dump(bool forceAllocation = false) const override;
    static
    Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
 };  // NetImplOpenVINO
 void NetImplOpenVINO::forwardLayer(LayerData& ld)
 {
    CV_TRACE_FUNCTION();
    Ptr<Layer> layer = ld.layerInstance;
    if (!ld.skip)
    {
        auto it = ld.backendNodes.find(preferableBackend);
        if (ld.id == 0 ||  // input layer
            it == ld.backendNodes.end()  // non-supported layer or its mode
        )
        {
            return Base::forwardLayer(ld);
        }
        CV_Assert(it != ld.backendNodes.end());
        const Ptr<BackendNode>& node = it->second;
        CV_Assert(!node.empty());
        Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
        CV_Assert(!ieNode.empty());
        CV_Assert(ieNode->net);
        TickMeter tm;
        tm.start();
        ieNode->net->forward(ld.outputBlobsWrappers, isAsync);
        tm.stop();
        int64 t = tm.getTimeTicks();
        layersTimings[ld.id] = (t > 0) ? t : t + 1;  // zero for skipped layers only
    }
    else
    {
        layersTimings[ld.id] = 0;
    }
    ld.flag = 1;
 }
 AsyncArray NetImplOpenVINO::getBlobAsync(const LayerPin& pin)
 {
    CV_TRACE_FUNCTION();
    if (!pin.valid())
        CV_Error(Error::StsObjectNotFound, "Requested blob not found");
    LayerData& ld = layers[pin.lid];
    if ((size_t)pin.oid >= ld.outputBlobs.size())
    {
        CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, "
                                              "the #%d was requested",
                                               ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid));
    }
    if (preferableTarget != DNN_TARGET_CPU)
    {
        CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
        // Transfer data to CPU if it's require.
        ld.outputBlobsWrappers[pin.oid]->copyToHost();
    }
    CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
    Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
    return std::move(wrapper->futureMat);
 }
 /** mark input pins as outputs from other subnetworks
 * FIXIT must be done by DNN engine not ngraph.
 */
-void Net::Impl::addNgraphOutputs(LayerData& ld)
+void NetImplOpenVINO::addNgraphOutputs(LayerData& ld)
 {
    CV_TRACE_FUNCTION();
@ -59,7 +253,7 @@ void Net::Impl::addNgraphOutputs(LayerData& ld)
    }
 }
-void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
+void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
 {
    CV_TRACE_FUNCTION();
    CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
@ -92,7 +286,7 @@ void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
        }
    }
-    if (skipInfEngineInit)
+    if (!basePtr_)  // model is loaded by OpenVINO
    {
        Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
        CV_Assert(!node.empty());
@ -399,10 +593,104 @@ void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
    }
 }
-//}  // Net::Impl
+
 #if 0
 #define printf_(args) printf args
 #else
 #define printf_(args)
 #endif
 void NetImplOpenVINO::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
 {
    CV_TRACE_FUNCTION();
    if(!fusion)
       return;
    CV_Check((int)preferableBackend, preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
 #if 0  // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes
    return;
 #endif
    // scan through all the layers. If there is convolution layer followed by the activation layer,
    // we try to embed this activation into the convolution and disable separate execution of the activation
    // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)"
    std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
                                  blobsToKeep_.end());
    for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
    {
        int lid = it->first;
        LayerData& ld = layers[lid];
        if (ld.skip)
        {
            printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
            continue;
        }
        printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
        // the optimization #1. try to fuse batch norm, scaling and/or activation layers
        // with the current layer if they follow it. Normally, the are fused with the convolution layer,
        // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
        // some other layers.
        Ptr<Layer>& currLayer = ld.layerInstance;
        if (ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0)
        {
            LayerData* nextData = &layers[ld.consumers[0].lid];
            LayerPin lpNext(ld.consumers[0].lid, 0);
            while (nextData)
            {
                if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0)
                {
                    CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type);
                    break;
                }
                /* we use `tryFuse` member of convolution layer to fuse eltwise later
                 * it's not intended to be fused here; hence, we stop when we encounter eltwise
                 */
                Ptr<Layer> nextLayer = nextData->layerInstance;
                if (currLayer->tryFuse(nextLayer))
                {
                    printf_(("\tfused with %s\n", nextLayer->name.c_str()));
                    nextData->skip = true;
                    ld.outputBlobs = layers[lpNext.lid].outputBlobs;
                    ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
                    if (nextData->consumers.size() == 1)
                    {
                        int nextLayerId = nextData->consumers[0].lid;
                        nextData = &layers[nextLayerId];
                        lpNext = LayerPin(nextLayerId, 0);
                    }
                    else
                    {
                        nextData = 0;
                        break;
                    }
                }
                else
                    break;
            }
        }
    }
 }
 void switchToOpenVINOBackend(Net& net)
 {
    CV_TRACE_FUNCTION();
    CV_LOG_INFO(NULL, "DNN: switching to OpenVINO backend...");
    Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net);
    Ptr<NetImplOpenVINO> openvino_impl_ptr = makePtr<NetImplOpenVINO>(impl_ptr_ref);
    impl_ptr_ref = openvino_impl_ptr;
 }
 /*static*/
-Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
+Net NetImplOpenVINO::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
 {
    CV_TRACE_FUNCTION();
@ -418,6 +706,10 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
    }
    Net cvNet;
    Ptr<NetImplOpenVINO> openvino_impl_ptr = makePtr<NetImplOpenVINO>();
    NetImplOpenVINO& openvino_impl = *openvino_impl_ptr;
    accessor::DnnNetAccessor::getImplPtrRef(cvNet) = openvino_impl_ptr;
    cvNet.setInputsNames(inputsNames);
    // set empty input to determine input shapes
@ -432,7 +724,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
    {
        auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape {});
        Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
-        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
+        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(openvino_impl, ieNet));
        backendNode = backendNodeNGraph;
    }
@ -450,7 +742,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
        LayerParams lp;
        int lid = cvNet.addLayer(it.first, "", lp);
-        LayerData& ld = cvNet.impl->layers[lid];
+        LayerData& ld = openvino_impl.layers[lid];
        {
            Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
@ -498,7 +790,6 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
    cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
    cvNet.impl->skipInfEngineInit = true;
    return cvNet;
 }
 #endif  // HAVE_INF_ENGINE
@ -516,7 +807,7 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
    InferenceEngine::Core& ie = getCore("");
    InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
-    return Impl::createNetworkFromModelOptimizer(ieNet);
+    return NetImplOpenVINO::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
 }
@ -560,7 +851,7 @@ Net Net::readFromModelOptimizer(
        CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
    }
-    return Impl::createNetworkFromModelOptimizer(ieNet);
+    return NetImplOpenVINO::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
 }
--- a/modules/dnn/src/net_quantization.cpp
+++ b/modules/dnn/src/net_quantization.cpp
@ -33,7 +33,7 @@ void getQuantizationParams(const Mat& src, std::vector<float>& scales, std::vect
 }
 // FIXIT drop from inference API
-Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
+Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
 {
    // Net can be quantized only once.
    if (netWasQuantized)
@ -47,7 +47,8 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
    int prefTarget = preferableTarget;
    // Disable fusions and use CPU backend to quantize net
-    setPreferableBackend(DNN_BACKEND_OPENCV);
+    // FIXIT: we should not modify original network!
    setPreferableBackend(net, DNN_BACKEND_OPENCV);
    setPreferableTarget(DNN_TARGET_CPU);
    enableFusion(false);
@ -163,7 +164,7 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
    Net::Impl& dstNet = *(dstNet_.impl);
    dstNet.netWasQuantized = true;
    dstNet.setInputsNames(netInputLayer->outNames);
-    dstNet.setPreferableBackend(prefBackend);
+    dstNet.setPreferableBackend(dstNet_, prefBackend);
    dstNet.setPreferableTarget(prefTarget);
    dstNet.enableFusion(originalFusion);
@ -253,7 +254,7 @@ Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outpu
        }
    }
    // Restore FP32 Net's backend, target and fusion
-    setPreferableBackend(prefBackend);
+    setPreferableBackend(net, prefBackend);
    setPreferableTarget(prefTarget);
    enableFusion(originalFusion);
    return dstNet_;
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@ -73,6 +73,8 @@ void infEngineBlobsToMats(const std::vector<InferenceEngine::Blob::Ptr>& blobs,
 CV__DNN_INLINE_NS_BEGIN
 void switchToOpenVINOBackend(Net& net);
 namespace openvino {
 // TODO: use std::string as parameter