From a718f2e6eaf88a543d4bc2441b8be582f3ba6af8 Mon Sep 17 00:00:00 2001
From: berak <px1704@web.de>
Date: Mon, 13 Jan 2020 12:26:28 +0100
Subject: [PATCH 1/6] ml/python: fix digits samples(3.4)

---
 samples/python/digits.py       | 20 +++++++++++++-------
 samples/python/digits_video.py | 14 +++++++++-----
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/samples/python/digits.py b/samples/python/digits.py
index f58e9dd987..e5d8ceb59a 100755
--- a/samples/python/digits.py
+++ b/samples/python/digits.py
@@ -70,13 +70,8 @@ def deskew(img):
     img = cv.warpAffine(img, M, (SZ, SZ), flags=cv.WARP_INVERSE_MAP | cv.INTER_LINEAR)
     return img
 
-class StatModel(object):
-    def load(self, fn):
-        self.model.load(fn)  # Known bug: https://github.com/opencv/opencv/issues/4969
-    def save(self, fn):
-        self.model.save(fn)
 
-class KNearest(StatModel):
+class KNearest(object):
     def __init__(self, k = 3):
         self.k = k
         self.model = cv.ml.KNearest_create()
@@ -88,7 +83,13 @@ class KNearest(StatModel):
         _retval, results, _neigh_resp, _dists = self.model.findNearest(samples, self.k)
         return results.ravel()
 
-class SVM(StatModel):
+    def load(self, fn):
+        self.model = cv.ml.KNearest_load(fn)
+
+    def save(self, fn):
+        self.model.save(fn)
+
+class SVM(object):
     def __init__(self, C = 1, gamma = 0.5):
         self.model = cv.ml.SVM_create()
         self.model.setGamma(gamma)
@@ -102,6 +103,11 @@ class SVM(StatModel):
     def predict(self, samples):
         return self.model.predict(samples)[1].ravel()
 
+    def load(self, fn):
+        self.model = cv.ml.SVM_load(fn)
+
+    def save(self, fn):
+        self.model.save(fn)
 
 def evaluate_model(model, digits, samples, labels):
     resp = model.predict(samples)
diff --git a/samples/python/digits_video.py b/samples/python/digits_video.py
index dc035e42fb..7b07831643 100755
--- a/samples/python/digits_video.py
+++ b/samples/python/digits_video.py
@@ -1,4 +1,12 @@
 #!/usr/bin/env python
+'''
+Digit recognition from video.
+
+Run digits.py before, to train and save the SVM.
+
+Usage:
+  digits_video.py [{camera_id|video_file}]
+'''
 
 # Python 2/3 compatibility
 from __future__ import print_function
@@ -28,11 +36,7 @@ def main():
         print('"%s" not found, run digits.py first' % classifier_fn)
         return
 
-    if True:
-        model = cv.ml.SVM_load(classifier_fn)
-    else:
-        model = cv.ml.SVM_create()
-        model.load_(classifier_fn) #Known bug: https://github.com/opencv/opencv/issues/4969
+    model = cv.ml.SVM_load(classifier_fn)
 
     while True:
         _ret, frame = cap.read()

From 8f1e36f7c1a861366408d708daacc485ada1b1de Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Tue, 24 Dec 2019 13:34:33 +0300
Subject: [PATCH 2/6] Disable some tests for Myriad target of nGraph

Add lightweight IE hardware targets checks

nGraph: Concat with paddings

Enable more nGraph tests

Restore FP32->FP16 for GPU plugin of IE

try to fix buildbot

Use lightweight IE targets check only starts from R4
---
 modules/dnn/src/dnn.cpp                    | 17 +++++++++
 modules/dnn/src/ie_ngraph.cpp              | 25 +++++++-----
 modules/dnn/src/layers/concat_layer.cpp    | 44 +++++++++++++++++++---
 modules/dnn/src/layers/pooling_layer.cpp   |  2 +-
 modules/dnn/src/op_inf_engine.cpp          | 16 ++++++++
 modules/dnn/test/test_backends.cpp         |  8 ++--
 modules/dnn/test/test_caffe_importer.cpp   | 12 ++++--
 modules/dnn/test/test_darknet_importer.cpp | 11 +++---
 modules/dnn/test/test_halide_layers.cpp    |  8 +---
 modules/dnn/test/test_ie_models.cpp        |  9 +++--
 modules/dnn/test/test_layers.cpp           |  6 ++-
 modules/dnn/test/test_onnx_importer.cpp    | 38 +++++++++++++------
 modules/dnn/test/test_tf_importer.cpp      | 25 +++++++++---
 modules/dnn/test/test_torch_importer.cpp   | 12 ++++++
 14 files changed, 174 insertions(+), 59 deletions(-)

diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 462a4b9816..ecd4c150d6 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -103,6 +103,22 @@ public:
 #ifdef HAVE_INF_ENGINE
     static inline bool checkIETarget(Target target)
     {
+#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R3)
+        // Lightweight detection
+        const std::vector<std::string> devices = getCore().GetAvailableDevices();
+        for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
+        {
+            if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
+                return true;
+            else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
+                return true;
+            else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
+                return true;
+            else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
+                return true;
+        }
+        return false;
+#else
         cv::dnn::Net net;
         cv::dnn::LayerParams lp;
         lp.set("kernel_size", 1);
@@ -126,6 +142,7 @@ public:
             return false;
         }
         return true;
+#endif
     }
 #endif
 
diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp
index 6b5c611c9a..be9022d87e 100644
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -168,21 +168,26 @@ void InfEngineNgraphNet::init(Target targetId)
 {
     if (!hasNetOwner)
     {
-        if (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) {
+        if (targetId == DNN_TARGET_OPENCL_FP16)
+        {
             auto nodes = ngraph_function->get_ordered_ops();
-            for (auto& node : nodes) {
+            for (auto& node : nodes)
+            {
                 auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
-                if (parameter && parameter->get_element_type() == ngraph::element::f32) {
+                if (parameter && parameter->get_element_type() == ngraph::element::f32)
+                {
                     parameter->set_element_type(ngraph::element::f16);
                 }
                 auto constant = std::dynamic_pointer_cast<ngraph::op::Constant>(node);
-                if (constant && constant->get_element_type() == ngraph::element::f32) {
-                    auto data = constant->get_vector<float>();
-                    std::vector<ngraph::float16> new_data(data.size());
-                    for (size_t i = 0; i < data.size(); ++i) {
-                        new_data[i] = ngraph::float16(data[i]);
-                    }
-                    auto new_const = std::make_shared<ngraph::op::Constant>(ngraph::element::f16, constant->get_shape(), new_data);
+                if (constant && constant->get_element_type() == ngraph::element::f32)
+                {
+                    const float* floatsData = constant->get_data_ptr<float>();
+                    size_t total = ngraph::shape_size(constant->get_shape());
+                    Mat floats(1, total, CV_32F, (void*)floatsData);
+                    Mat halfs;
+                    cv::convertFp16(floats, halfs);
+
+                    auto new_const = std::make_shared<ngraph::op::Constant>(ngraph::element::f16, constant->get_shape(), halfs.data);
                     new_const->set_friendly_name(constant->get_friendly_name());
                     ngraph::replace_node(constant, new_const);
                 }
diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp
index 98864f95f8..bb19bbdf97 100644
--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@@ -106,7 +106,8 @@ public:
     {
         return backendId == DNN_BACKEND_OPENCV ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) ||  // By channels
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && !padding);
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !padding) ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
     }
 
     class ChannelConcatInvoker : public ParallelLoopBody
@@ -316,14 +317,45 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
+        InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]);
+        const int numDims = data->getDims().size();
+        const int cAxis = clamp(axis, numDims);
+        std::vector<size_t> maxDims(numDims, 0);
+
         CV_Assert(inputs.size() == nodes.size());
         ngraph::NodeVector inp_nodes;
-        for (auto& node : nodes) {
-            inp_nodes.push_back(node.dynamicCast<InfEngineNgraphNode>()->node);
-        }
+        for (int i = 0; i < nodes.size(); ++i)
+        {
+            inp_nodes.push_back(nodes[i].dynamicCast<InfEngineNgraphNode>()->node);
 
-        InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]);
-        auto concat = std::make_shared<ngraph::op::Concat>(inp_nodes, clamp(axis, data->getDims().size()));
+            std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
+            for (int i = 0; i < numDims; ++i)
+                maxDims[i] = std::max(maxDims[i], inpShape[i]);
+        }
+        for (int i = 0; i < inp_nodes.size(); ++i)
+        {
+            bool needPadding = false;
+            std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
+            std::vector<int64_t> begins(inpShape.size(), 0), ends(inpShape.size(), 0);
+            for (int j = 0; j < inpShape.size(); ++j)
+            {
+                if (j != cAxis && inpShape[j] != maxDims[j])
+                {
+                    needPadding = true;
+                    begins[j] = static_cast<int64_t>((maxDims[j] - inpShape[j]) / 2);
+                    ends[j] = static_cast<int64_t>(maxDims[j] - inpShape[j] - begins[j]);
+                }
+            }
+            if (needPadding)
+            {
+                inp_nodes[i] = std::make_shared<ngraph::op::v1::Pad>(
+                    inp_nodes[i],
+                    std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{begins.size()}, begins.data()),
+                    std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ends.size()}, ends.data()),
+                    ngraph::op::PadMode::CONSTANT);
+            }
+        }
+        auto concat = std::make_shared<ngraph::op::Concat>(inp_nodes, cAxis);
         return Ptr<BackendNode>(new InfEngineNgraphNode(concat));
     }
 #endif  // HAVE_DNN_NGRAPH
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 5727e2b3f9..320a9b0f8d 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -189,7 +189,7 @@ public:
 #endif
         }
         else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
-            return type != STOCHASTIC;
+            return !computeMaxIdx && type != STOCHASTIC;
         }
         else
             return (kernel_size.size() == 3 && backendId == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU) ||
diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp
index c6f741f3c9..013989312b 100644
--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@@ -573,6 +573,21 @@ InferenceEngine::Core& getCore()
 #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)
 static bool detectMyriadX_()
 {
+#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R3)
+    // Lightweight detection
+    InferenceEngine::Core& ie = getCore();
+    const std::vector<std::string> devices = ie.GetAvailableDevices();
+    for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
+    {
+        if (i->find("MYRIAD") != std::string::npos)
+        {
+            const std::string name = ie.GetMetric(*i, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
+            CV_LOG_INFO(NULL, "Myriad device: " << name);
+            return name.find("MyriadX") != std::string::npos  || name.find("Myriad X") != std::string::npos;
+        }
+    }
+    return false;
+#else
     InferenceEngine::Builder::Network builder("");
     InferenceEngine::idx_t inpId = builder.addLayer(
                                    InferenceEngine::Builder::InputLayer().setPort(InferenceEngine::Port({1})));
@@ -633,6 +648,7 @@ static bool detectMyriadX_()
         return false;
     }
     return true;
+#endif
 }
 #endif  // !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)
 
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index 8959612b43..2bee9e06cf 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -189,8 +189,8 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height)
     if (backend == DNN_BACKEND_HALIDE)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE);
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
 #endif
     Mat sample = imread(findDataFile("dnn/street.png"));
@@ -223,8 +223,8 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow_Different_Width_Height)
     if (backend == DNN_BACKEND_HALIDE)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE);
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
 #endif
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019020000)
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 22e31db5ec..09d8745c14 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -660,9 +660,11 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
         (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB),
         CV_TEST_TAG_DEBUG_LONG
     );
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
     static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
                                            0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
@@ -677,9 +679,11 @@ TEST_P(Test_Caffe_nets, RFCN)
         CV_TEST_TAG_LONG,
         CV_TEST_TAG_DEBUG_VERYLONG
     );
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
     double scoreDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 4e-3 : default_l1;
     double iouDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 8e-2 : default_lInf;
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index eced69555e..6c43622adb 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -307,8 +307,8 @@ TEST_P(Test_Darknet_nets, YoloVoc)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
 #endif
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);  // need to update check function
 #endif
 
@@ -343,8 +343,8 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc)
     applyTestTag(CV_TEST_TAG_MEMORY_512MB);
 
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);  // need to update check function
 #endif
     // batchId, classId, confidence, left, top, right, bottom
@@ -460,7 +460,8 @@ TEST_P(Test_Darknet_nets, YOLOv3)
     std::string weights_file = "yolov3.weights";
 
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD &&
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD &&
         getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
     {
         scoreDiff = 0.04;
diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp
index 11668b4b79..c926f5b7e3 100644
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@@ -350,11 +350,6 @@ TEST_P(MaxPooling, Accuracy)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 
-#if defined(INF_ENGINE_RELEASE)
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1) && pad != Size(0, 0))
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
-#endif
-
     LayerParams lp;
     lp.set("pool", "max");
     lp.set("kernel_w", kernel.width);
@@ -392,7 +387,8 @@ TEST_P(FullyConnected, Accuracy)
     bool hasBias = get<3>(GetParam());
     Backend backendId = get<0>(get<4>(GetParam()));
     Target targetId = get<1>(get<4>(GetParam()));
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (targetId == DNN_TARGET_OPENCL_FP16 ||
+    if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && (targetId == DNN_TARGET_OPENCL_FP16 ||
        (targetId == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X))) {
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp
index 8d94543067..23869df36e 100644
--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@@ -134,12 +134,13 @@ static const std::vector<std::string> getOpenVINOTestModelsList()
     return result;
 }
 
-static inline void genData(const std::vector<size_t>& dims, Mat& m, Blob::Ptr& dataPtr)
+static inline void genData(const InferenceEngine::TensorDesc& desc, Mat& m, Blob::Ptr& dataPtr)
 {
+    const std::vector<size_t>& dims = desc.getDims();
     m.create(std::vector<int>(dims.begin(), dims.end()), CV_32F);
     randu(m, -1, 1);
 
-    dataPtr = make_shared_blob<float>({Precision::FP32, dims, Layout::ANY}, (float*)m.data);
+    dataPtr = make_shared_blob<float>(desc, (float*)m.data);
 }
 
 void runIE(Target target, const std::string& xmlPath, const std::string& binPath,
@@ -238,7 +239,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath
     BlobMap inputBlobs;
     for (auto& it : net.getInputsInfo())
     {
-        genData(it.second->getTensorDesc().getDims(), inputsMap[it.first], inputBlobs[it.first]);
+        genData(it.second->getTensorDesc(), inputsMap[it.first], inputBlobs[it.first]);
     }
     infRequest.SetInput(inputBlobs);
 
@@ -247,7 +248,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath
     BlobMap outputBlobs;
     for (auto& it : net.getOutputsInfo())
     {
-        genData(it.second->getTensorDesc().getDims(), outputsMap[it.first], outputBlobs[it.first]);
+        genData(it.second->getTensorDesc(), outputsMap[it.first], outputBlobs[it.first]);
     }
     infRequest.SetOutput(outputBlobs);
 
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index f9ff4ed883..0d61f7af1b 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -846,6 +846,8 @@ TEST_P(Test_Caffe_layers, PriorBox_squares)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
     LayerParams lp;
     lp.name = "testPriorBox";
     lp.type = "PriorBox";
@@ -1276,7 +1278,7 @@ static void test_dldt_fused_output(Backend backend, Target target)
     }
     net.setPreferableBackend(backend);
     net.setPreferableTarget(target);
-    net.setInput(Mat({1, 1, 1, 1}, CV_32FC1, Scalar(1)));
+    net.setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
     net.forward();
 }
 
@@ -1315,7 +1317,7 @@ TEST_P(Test_DLDT_layers, multiple_networks)
         nets[i].addLayerToPrev(lp.name, lp.type, lp);
         nets[i].setPreferableBackend(backend);
         nets[i].setPreferableTarget(target);
-        nets[i].setInput(Mat({1, 1, 1, 1}, CV_32FC1, Scalar(1)));
+        nets[i].setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
     }
     Mat out_1 = nets[0].forward();
     Mat out_2 = nets[1].forward();
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 2122813195..d0f939f626 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -345,9 +345,12 @@ TEST_P(Test_ONNX_layers, Div)
     net.setPreferableBackend(backend);
     net.setPreferableTarget(target);
 
-    Mat inp1 = blobFromNPY(_tf("data/input_div_0.npy"));
-    Mat inp2 = blobFromNPY(_tf("data/input_div_1.npy"));
+    // Reference output values range is -68.80928, 2.991873. So to avoid computational
+    // difference for FP16 we'll perform reversed division (just swap inputs).
+    Mat inp1 = blobFromNPY(_tf("data/input_div_1.npy"));
+    Mat inp2 = blobFromNPY(_tf("data/input_div_0.npy"));
     Mat ref  = blobFromNPY(_tf("data/output_div.npy"));
+    cv::divide(1.0, ref, ref);
     checkBackend(&inp1, &ref);
 
     net.setInput(inp1, "0");
@@ -448,6 +451,9 @@ TEST_P(Test_ONNX_nets, Googlenet)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
 
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+
     const String model = _tf("models/googlenet.onnx", false);
 
     Net net = readNetFromONNX(model);
@@ -491,7 +497,7 @@ TEST_P(Test_ONNX_nets, RCNN_ILSVRC13)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
     // Reference output values are in range [-4.992, -1.161]
-    testONNXModels("rcnn_ilsvrc13", pb, 0.0045);
+    testONNXModels("rcnn_ilsvrc13", pb, 0.0046);
 }
 
 TEST_P(Test_ONNX_nets, VGG16_bn)
@@ -558,10 +564,12 @@ TEST_P(Test_ONNX_nets, TinyYolov2)
     )
         applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
 
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
     )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X,
+                     backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ?
+                     CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER :
+                     CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif
 
     // output range: [-11; 8]
@@ -594,6 +602,12 @@ TEST_P(Test_ONNX_nets, LResNet100E_IR)
         if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
         if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
     }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
 
     double l1 = default_l1;
     double lInf = default_lInf;
@@ -612,10 +626,11 @@ TEST_P(Test_ONNX_nets, LResNet100E_IR)
 TEST_P(Test_ONNX_nets, Emotion_ferplus)
 {
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
-    )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X,
+                     backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ?
+                     CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER :
+                     CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif
 
     double l1 = default_l1;
@@ -652,7 +667,8 @@ TEST_P(Test_ONNX_nets, DenseNet121)
 TEST_P(Test_ONNX_nets, Inception_v1)
 {
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 #endif
     testONNXModels("inception_v1", pb);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 8826fa09ff..54da723973 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -247,10 +247,13 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same)
 {
     // Reference output values are in range [-0.519531, 0.112976]
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
-    )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    {
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+        else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    }
 #endif
     runTensorFlowNet("ave_pool_same");
 }
@@ -373,6 +376,8 @@ TEST_P(Test_TensorFlow_layers, l2_normalize_3d)
 #if defined(INF_ENGINE_RELEASE)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif
 
     runTensorFlowNet("l2_normalize_3d");
@@ -383,11 +388,15 @@ class Test_TensorFlow_nets : public DNNTestLayer {};
 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
 {
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if (target == DNN_TARGET_MYRIAD)
     {
 #if INF_ENGINE_VER_MAJOR_GE(2019020000)
         if (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
-            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X,
+                         backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ?
+                             CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER :
+                             CV_TEST_TAG_DNN_SKIP_IE_NGRAPH,
+                         CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
     }
 #endif
@@ -503,6 +512,10 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
         (INF_ENGINE_VER_MAJOR_LT(2019020000) || target != DNN_TARGET_CPU))
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+
+    if (INF_ENGINE_VER_MAJOR_GT(2019030000) &&
+        backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif
     // segfault: inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp:111:
     // Assertion `prior_height > 0' failed.
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
index 889156a62d..b9da2a425e 100644
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -211,6 +211,8 @@ TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
     runTorchNet("net_conv_gemm_lrn", "", false, true, true,
                 target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,
                 target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);
@@ -348,6 +350,13 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
         if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
         throw SkipTestException("");
     }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
+    {
+        if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        throw SkipTestException("");
+    }
 
     Net net;
     {
@@ -400,6 +409,9 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
             && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD
+            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif
 
     checkBackend();

From 31289d2f32ab1941c0f2de0ecfb741e95637db89 Mon Sep 17 00:00:00 2001
From: Vadim Levin <vadim.levin@xperience.ai>
Date: Mon, 13 Jan 2020 18:11:34 +0300
Subject: [PATCH 3/6] Merge pull request #15915 from VadimLevin:dev/norm_fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix implicit conversion from array to scalar in python bindings

* Fix wrong conversion behavior for primitive types

  - Introduce ArgTypeInfo namedtuple instead of plain tuple.
    If strict conversion parameter for type is set to true, it is
    handled like object argument in PyArg_ParseTupleAndKeywords and
    converted to concrete type with the appropriate pyopencv_to function
    call.
  - Remove deadcode and unused variables.
  - Fix implicit conversion from numpy array with 1 element to scalar
  - Fix narrowing conversion to size_t type.

* Fix wrong conversion behavior for primitive types

  - Introduce ArgTypeInfo namedtuple instead of plain tuple.
    If strict conversion parameter for type is set to true, it is
    handled like object argument in PyArg_ParseTupleAndKeywords and
    converted to concrete type with the appropriate pyopencv_to function
    call.
  - Remove deadcode and unused variables.
  - Fix implicit conversion from numpy array with 1 element to scalar
  - Fix narrowing conversion to size_t type.·
  - Enable tests with wrong conversion behavior
  - Restrict passing None as value
  - Restrict bool to integer/floating types conversion

* Add PyIntType support for Python 2

* Remove possible narrowing conversion of size_t

* Bindings conversion update

  - Remove unused macro
  - Add better conversion for types to numpy types descriptors
  - Add argument name to fail messages
  - NoneType treated as a valid argument. Better handling will be added
    as a standalone patch

* Add descriptor specialization for size_t

* Add check for signed to unsigned integer conversion safety

  - If signed integer is positive it can be safely converted
    to unsigned
  - Add check for plain python 2 objects
  - Add check for numpy scalars
  - Add simple type_traits implementation for better code style

* Resolve type "overflow" false negative in safe casting check

 - Move type_traits to separate header

* Add copyright message to type_traits.hpp

* Limit conversion scope for integral numpy types

  - Made canBeSafelyCasted specialized only for size_t, so
    type_traits header became unused and was removed.
  - Added clarification about descriptor pointer
---
 modules/python/src2/cv2.cpp      | 330 +++++++++++++++++++++++++++----
 modules/python/src2/gen2.py      |  77 +++++---
 modules/python/test/test_misc.py |  25 +--
 modules/python/test/test_norm.py | 173 ++++++++++++++++
 4 files changed, 532 insertions(+), 73 deletions(-)
 create mode 100644 modules/python/test/test_norm.py

diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp
index d493f1f68d..0b516a4e9d 100644
--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@@ -13,11 +13,14 @@
 #   define Py_LIMITED_API 0x03030000
 #endif
 
-#include <math.h>
+#include <cmath>
 #include <Python.h>
+#include <limits>
 
 #if PY_MAJOR_VERSION < 3
 #undef CVPY_DYNAMIC_INIT
+#else
+#define CV_PYTHON_3 1
 #endif
 
 #if defined(_MSC_VER) && (_MSC_VER > 1800)
@@ -37,16 +40,17 @@
 #include "pycompat.hpp"
 #include <map>
 
+#define CV_HAS_CONVERSION_ERROR(x) (((x) == -1) && PyErr_Occurred())
+
+
 class ArgInfo
 {
 public:
-    const char * name;
+    const char* name;
     bool outputarg;
     // more fields may be added if necessary
 
-    ArgInfo(const char * name_, bool outputarg_)
-        : name(name_)
-        , outputarg(outputarg_) {}
+    ArgInfo(const char* name_, bool outputarg_) : name(name_), outputarg(outputarg_) {}
 
 private:
     ArgInfo(const ArgInfo&); // = delete
@@ -159,6 +163,135 @@ catch (const cv::Exception &e) \
 
 using namespace cv;
 
+
+namespace {
+template<class T>
+NPY_TYPES asNumpyType()
+{
+    return NPY_OBJECT;
+}
+
+template<>
+NPY_TYPES asNumpyType<bool>()
+{
+    return NPY_BOOL;
+}
+
+#define CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(src, dst) \
+    template<>                                             \
+    NPY_TYPES asNumpyType<src>()                           \
+    {                                                      \
+        return NPY_##dst;                                  \
+    }                                                      \
+    template<>                                             \
+    NPY_TYPES asNumpyType<u##src>()                        \
+    {                                                      \
+        return NPY_U##dst;                                 \
+    }
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int8_t, INT8);
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int16_t, INT16);
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int32_t, INT32);
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int64_t, INT64);
+
+#undef CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION
+
+template<>
+NPY_TYPES asNumpyType<float>()
+{
+    return NPY_FLOAT;
+}
+
+template<>
+NPY_TYPES asNumpyType<double>()
+{
+    return NPY_DOUBLE;
+}
+
+template <class T>
+PyArray_Descr* getNumpyTypeDescriptor()
+{
+    return PyArray_DescrFromType(asNumpyType<T>());
+}
+
+template <>
+PyArray_Descr* getNumpyTypeDescriptor<size_t>()
+{
+#if SIZE_MAX == ULONG_MAX
+    return PyArray_DescrFromType(NPY_ULONG);
+#elif SIZE_MAX == ULLONG_MAX
+    return PyArray_DescrFromType(NPY_ULONGLONG);
+#else
+    return PyArray_DescrFromType(NPY_UINT);
+#endif
+}
+
+template <class T, class U>
+bool isRepresentable(U value) {
+    return (std::numeric_limits<T>::min() <= value) && (value <= std::numeric_limits<T>::max());
+}
+
+template<class T>
+bool canBeSafelyCasted(PyObject* obj, PyArray_Descr* to)
+{
+    return PyArray_CanCastTo(PyArray_DescrFromScalar(obj), to) != 0;
+}
+
+
+template<>
+bool canBeSafelyCasted<size_t>(PyObject* obj, PyArray_Descr* to)
+{
+    PyArray_Descr* from = PyArray_DescrFromScalar(obj);
+    if (PyArray_CanCastTo(from, to))
+    {
+        return true;
+    }
+    else
+    {
+        // False negative scenarios:
+        // - Signed input is positive so it can be safely cast to unsigned output
+        // - Input has wider limits but value is representable within output limits
+        // - All the above
+        if (PyDataType_ISSIGNED(from))
+        {
+            int64_t input = 0;
+            PyArray_CastScalarToCtype(obj, &input, getNumpyTypeDescriptor<int64_t>());
+            return (input >= 0) && isRepresentable<size_t>(static_cast<uint64_t>(input));
+        }
+        else
+        {
+            uint64_t input = 0;
+            PyArray_CastScalarToCtype(obj, &input, getNumpyTypeDescriptor<uint64_t>());
+            return isRepresentable<size_t>(input);
+        }
+        return false;
+    }
+}
+
+
+template<class T>
+bool parseNumpyScalar(PyObject* obj, T& value)
+{
+    if (PyArray_CheckScalar(obj))
+    {
+        // According to the numpy documentation:
+        // There are 21 statically-defined PyArray_Descr objects for the built-in data-types
+        // So descriptor pointer is not owning.
+        PyArray_Descr* to = getNumpyTypeDescriptor<T>();
+        if (canBeSafelyCasted<T>(obj, to))
+        {
+            PyArray_CastScalarToCtype(obj, &value, to);
+            return true;
+        }
+    }
+    return false;
+}
+
+} // namespace
+
 typedef std::vector<uchar> vector_uchar;
 typedef std::vector<char> vector_char;
 typedef std::vector<int> vector_int;
@@ -268,6 +401,11 @@ NumpyAllocator g_numpyAllocator;
 
 enum { ARG_NONE = 0, ARG_MAT = 1, ARG_SCALAR = 2 };
 
+static bool isBool(PyObject* obj) CV_NOEXCEPT
+{
+    return PyArray_IsScalar(obj, Bool) || PyBool_Check(obj);
+}
+
 // special case, when the converter needs full ArgInfo structure
 static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo& info)
 {
@@ -578,14 +716,22 @@ PyObject* pyopencv_from(const bool& value)
 template<>
 bool pyopencv_to(PyObject* obj, bool& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
         return true;
-    int _val = PyObject_IsTrue(obj);
-    if(_val < 0)
-        return false;
-    value = _val > 0;
-    return true;
+    }
+    if (isBool(obj) || PyArray_IsIntegerScalar(obj))
+    {
+        npy_bool npy_value = NPY_FALSE;
+        const int ret_code = PyArray_BoolConverter(obj, &npy_value);
+        if (ret_code >= 0)
+        {
+            value = (npy_value == NPY_TRUE);
+            return true;
+        }
+    }
+    failmsg("Argument '%s' is not convertable to bool", info.name);
+    return false;
 }
 
 template<>
@@ -597,11 +743,62 @@ PyObject* pyopencv_from(const size_t& value)
 template<>
 bool pyopencv_to(PyObject* obj, size_t& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
         return true;
-    value = (int)PyLong_AsUnsignedLong(obj);
-    return value != (size_t)-1 || !PyErr_Occurred();
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be integer type, not bool", info.name);
+        return false;
+    }
+    if (PyArray_IsIntegerScalar(obj))
+    {
+        if (PyLong_Check(obj))
+        {
+#if defined(CV_PYTHON_3)
+            value = PyLong_AsSize_t(obj);
+#else
+    #if ULONG_MAX == SIZE_MAX
+            value = PyLong_AsUnsignedLong(obj);
+    #else
+            value = PyLong_AsUnsignedLongLong(obj);
+    #endif
+#endif
+        }
+#if !defined(CV_PYTHON_3)
+        // Python 2.x has PyIntObject which is not a subtype of PyLongObject
+        // Overflow check here is unnecessary because object will be converted to long on the
+        // interpreter side
+        else if (PyInt_Check(obj))
+        {
+            const long res = PyInt_AsLong(obj);
+            if (res < 0) {
+                failmsg("Argument '%s' can not be safely parsed to 'size_t'", info.name);
+                return false;
+            }
+    #if ULONG_MAX == SIZE_MAX
+            value = PyInt_AsUnsignedLongMask(obj);
+    #else
+            value = PyInt_AsUnsignedLongLongMask(obj);
+    #endif
+        }
+#endif
+        else
+        {
+            const bool isParsed = parseNumpyScalar<size_t>(obj, value);
+            if (!isParsed) {
+                failmsg("Argument '%s' can not be safely parsed to 'size_t'", info.name);
+                return false;
+            }
+        }
+    }
+    else
+    {
+        failmsg("Argument '%s' is required to be an integer", info.name);
+        return false;
+    }
+    return !PyErr_Occurred();
 }
 
 template<>
@@ -613,16 +810,25 @@ PyObject* pyopencv_from(const int& value)
 template<>
 bool pyopencv_to(PyObject* obj, int& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
         return true;
-    if(PyInt_Check(obj))
-        value = (int)PyInt_AsLong(obj);
-    else if(PyLong_Check(obj))
-        value = (int)PyLong_AsLong(obj);
-    else
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be integer, not bool", info.name);
         return false;
-    return value != -1 || !PyErr_Occurred();
+    }
+    if (PyArray_IsIntegerScalar(obj))
+    {
+        value = PyArray_PyIntAsInt(obj);
+    }
+    else
+    {
+        failmsg("Argument '%s' is required to be an integer", info.name);
+        return false;
+    }
+    return !CV_HAS_CONVERSION_ERROR(value);
 }
 
 template<>
@@ -651,13 +857,39 @@ PyObject* pyopencv_from(const double& value)
 template<>
 bool pyopencv_to(PyObject* obj, double& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
         return true;
-    if(!!PyInt_CheckExact(obj))
-        value = (double)PyInt_AS_LONG(obj);
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be double, not bool", info.name);
+        return false;
+    }
+    if (PyArray_IsPythonNumber(obj))
+    {
+        if (PyLong_Check(obj))
+        {
+            value = PyLong_AsDouble(obj);
+        }
+        else
+        {
+            value = PyFloat_AsDouble(obj);
+        }
+    }
+    else if (PyArray_CheckScalar(obj))
+    {
+        const bool isParsed = parseNumpyScalar<double>(obj, value);
+        if (!isParsed) {
+            failmsg("Argument '%s' can not be safely parsed to 'double'", info.name);
+            return false;
+        }
+    }
     else
-        value = PyFloat_AsDouble(obj);
+    {
+        failmsg("Argument '%s' can not be treated as a double", info.name);
+        return false;
+    }
     return !PyErr_Occurred();
 }
 
@@ -670,13 +902,41 @@ PyObject* pyopencv_from(const float& value)
 template<>
 bool pyopencv_to(PyObject* obj, float& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
         return true;
-    if(!!PyInt_CheckExact(obj))
-        value = (float)PyInt_AS_LONG(obj);
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be float, not bool", info.name);
+        return false;
+    }
+    if (PyArray_IsPythonNumber(obj))
+    {
+        if (PyLong_Check(obj))
+        {
+            double res = PyLong_AsDouble(obj);
+            value = static_cast<float>(res);
+        }
+        else
+        {
+            double res = PyFloat_AsDouble(obj);
+            value = static_cast<float>(res);
+        }
+    }
+    else if (PyArray_CheckScalar(obj))
+    {
+       const bool isParsed = parseNumpyScalar<float>(obj, value);
+        if (!isParsed) {
+            failmsg("Argument '%s' can not be safely parsed to 'float'", info.name);
+            return false;
+        }
+    }
     else
-        value = (float)PyFloat_AsDouble(obj);
+    {
+        failmsg("Argument '%s' can't be treated as a float", info.name);
+        return false;
+    }
     return !PyErr_Occurred();
 }
 
@@ -1742,7 +2002,7 @@ static bool init_body(PyObject * m)
 #pragma GCC visibility push(default)
 #endif
 
-#if PY_MAJOR_VERSION >= 3
+#if defined(CV_PYTHON_3)
 // === Python 3
 
 static struct PyModuleDef cv2_moduledef =
diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py
index cd1b8f677c..d3c8ec39cf 100755
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@@ -4,12 +4,14 @@ from __future__ import print_function
 import hdr_parser, sys, re, os
 from string import Template
 from pprint import pprint
+from collections import namedtuple
 
 if sys.version_info[0] >= 3:
     from io import StringIO
 else:
     from cStringIO import StringIO
 
+
 forbidden_arg_types = ["void*"]
 
 ignored_arg_types = ["RNG*"]
@@ -172,18 +174,48 @@ gen_template_prop_init = Template("""
 gen_template_rw_prop_init = Template("""
     {(char*)"${member}", (getter)pyopencv_${name}_get_${member}, (setter)pyopencv_${name}_set_${member}, (char*)"${member}", NULL},""")
 
+class FormatStrings:
+    string = 's'
+    unsigned_char = 'b'
+    short_int = 'h'
+    int = 'i'
+    unsigned_int = 'I'
+    long = 'l'
+    unsigned_long = 'k'
+    long_long = 'L'
+    unsigned_long_long = 'K'
+    size_t = 'n'
+    float = 'f'
+    double = 'd'
+    object = 'O'
+
+ArgTypeInfo = namedtuple('ArgTypeInfo',
+                        ['atype', 'format_str', 'default_value',
+                         'strict_conversion'])
+# strict_conversion is False by default
+ArgTypeInfo.__new__.__defaults__ = (False,)
+
 simple_argtype_mapping = {
-    "bool": ("bool", "b", "0"),
-    "size_t": ("size_t", "I", "0"),
-    "int": ("int", "i", "0"),
-    "float": ("float", "f", "0.f"),
-    "double": ("double", "d", "0"),
-    "c_string": ("char*", "s", '(char*)""')
+    "bool": ArgTypeInfo("bool", FormatStrings.unsigned_char, "0", True),
+    "size_t": ArgTypeInfo("size_t", FormatStrings.unsigned_long_long, "0", True),
+    "int": ArgTypeInfo("int", FormatStrings.int, "0", True),
+    "float": ArgTypeInfo("float", FormatStrings.float, "0.f", True),
+    "double": ArgTypeInfo("double", FormatStrings.double, "0", True),
+    "c_string": ArgTypeInfo("char*", FormatStrings.string, '(char*)""')
 }
 
+
 def normalize_class_name(name):
     return re.sub(r"^cv\.", "", name).replace(".", "_")
 
+
+def get_type_format_string(arg_type_info):
+    if arg_type_info.strict_conversion:
+        return FormatStrings.object
+    else:
+        return arg_type_info.format_str
+
+
 class ClassProp(object):
     def __init__(self, decl):
         self.tp = decl[0].replace("*", "_ptr")
@@ -576,7 +608,7 @@ class FuncInfo(object):
                 fullname = selfinfo.wname + "." + fullname
 
         all_code_variants = []
-        declno = -1
+
         for v in self.variants:
             code_decl = ""
             code_ret = ""
@@ -584,7 +616,6 @@ class FuncInfo(object):
 
             code_args = "("
             all_cargs = []
-            parse_arglist = []
 
             if v.isphantom and ismethod and not self.is_static:
                 code_args += "_self_"
@@ -617,22 +648,22 @@ class FuncInfo(object):
                 if any(tp in codegen.enums.keys() for tp in tp_candidates):
                     defval0 = "static_cast<%s>(%d)" % (a.tp, 0)
 
-                amapping = simple_argtype_mapping.get(tp, (tp, "O", defval0))
+                arg_type_info = simple_argtype_mapping.get(tp, ArgTypeInfo(tp, FormatStrings.object, defval0, True))
                 parse_name = a.name
                 if a.py_inputarg:
-                    if amapping[1] == "O":
+                    if arg_type_info.strict_conversion:
                         code_decl += "    PyObject* pyobj_%s = NULL;\n" % (a.name,)
                         parse_name = "pyobj_" + a.name
                         if a.tp == 'char':
-                            code_cvt_list.append("convert_to_char(pyobj_%s, &%s, %s)"% (a.name, a.name, a.crepr()))
+                            code_cvt_list.append("convert_to_char(pyobj_%s, &%s, %s)" % (a.name, a.name, a.crepr()))
                         else:
                             code_cvt_list.append("pyopencv_to(pyobj_%s, %s, %s)" % (a.name, a.name, a.crepr()))
 
-                all_cargs.append([amapping, parse_name])
+                all_cargs.append([arg_type_info, parse_name])
 
                 defval = a.defval
                 if not defval:
-                    defval = amapping[2]
+                    defval = arg_type_info.default_value
                 else:
                     if "UMat" in tp:
                         if "Mat" in defval and "UMat" not in defval:
@@ -641,14 +672,14 @@ class FuncInfo(object):
                         if "Mat" in defval and "GpuMat" not in defval:
                             defval = defval.replace("Mat", "cuda::GpuMat")
                 # "tp arg = tp();" is equivalent to "tp arg;" in the case of complex types
-                if defval == tp + "()" and amapping[1] == "O":
+                if defval == tp + "()" and arg_type_info.format_str == FormatStrings.object:
                     defval = ""
                 if a.outputarg and not a.inputarg:
                     defval = ""
                 if defval:
-                    code_decl += "    %s %s=%s;\n" % (amapping[0], a.name, defval)
+                    code_decl += "    %s %s=%s;\n" % (arg_type_info.atype, a.name, defval)
                 else:
-                    code_decl += "    %s %s;\n" % (amapping[0], a.name)
+                    code_decl += "    %s %s;\n" % (arg_type_info.atype, a.name)
 
                 if not code_args.endswith("("):
                     code_args += ", "
@@ -690,12 +721,16 @@ class FuncInfo(object):
             if v.rettype:
                 tp = v.rettype
                 tp1 = tp.replace("*", "_ptr")
-                amapping = simple_argtype_mapping.get(tp, (tp, "O", "0"))
-                all_cargs.append(amapping)
+                default_info = ArgTypeInfo(tp, FormatStrings.object, "0")
+                arg_type_info = simple_argtype_mapping.get(tp, default_info)
+                all_cargs.append(arg_type_info)
 
             if v.args and v.py_arglist:
                 # form the format spec for PyArg_ParseTupleAndKeywords
-                fmtspec = "".join([all_cargs[argno][0][1] for aname, argno in v.py_arglist])
+                fmtspec = "".join([
+                    get_type_format_string(all_cargs[argno][0])
+                    for aname, argno in v.py_arglist
+                ])
                 if v.py_noptargs > 0:
                     fmtspec = fmtspec[:-v.py_noptargs] + "|" + fmtspec[-v.py_noptargs:]
                 fmtspec += ":" + fullname
@@ -723,10 +758,6 @@ class FuncInfo(object):
             else:
                 # there is more than 1 return parameter; form the tuple out of them
                 fmtspec = "N"*len(v.py_outlist)
-                backcvt_arg_list = []
-                for aname, argno in v.py_outlist:
-                    amapping = all_cargs[argno][0]
-                    backcvt_arg_list.append("%s(%s)" % (amapping[2], aname))
                 code_ret = "return Py_BuildValue(\"(%s)\", %s)" % \
                     (fmtspec, ", ".join(["pyopencv_from(" + aname + ")" for aname, argno in v.py_outlist]))
 
diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py
index 0918986881..b25ef7efbb 100644
--- a/modules/python/test/test_misc.py
+++ b/modules/python/test/test_misc.py
@@ -136,13 +136,12 @@ class Arguments(NewOpenCVTests):
                              msg=get_conversion_error_msg(convertible_false, 'bool: false', actual))
 
     def test_parse_to_bool_not_convertible(self):
-        for not_convertible in (1.2, np.float(2.3), 's', 'str', (1, 2), [1, 2], complex(1, 1), None,
+        for not_convertible in (1.2, np.float(2.3), 's', 'str', (1, 2), [1, 2], complex(1, 1),
                                 complex(imag=2), complex(1.1), np.array([1, 0], dtype=np.bool)):
             with self.assertRaises((TypeError, OverflowError),
                                    msg=get_no_exception_msg(not_convertible)):
                 _ = cv.utils.dumpBool(not_convertible)
 
-    @unittest.skip('Wrong conversion behavior')
     def test_parse_to_bool_convertible_extra(self):
         try_to_convert = partial(self._try_to_convert, cv.utils.dumpBool)
         _, max_size_t = get_limits(ctypes.c_size_t)
@@ -151,7 +150,6 @@ class Arguments(NewOpenCVTests):
             self.assertEqual('bool: true', actual,
                              msg=get_conversion_error_msg(convertible_true, 'bool: true', actual))
 
-    @unittest.skip('Wrong conversion behavior')
     def test_parse_to_bool_not_convertible_extra(self):
         for not_convertible in (np.array([False]), np.array([True], dtype=np.bool)):
             with self.assertRaises((TypeError, OverflowError),
@@ -172,12 +170,11 @@ class Arguments(NewOpenCVTests):
         min_int, max_int = get_limits(ctypes.c_int)
         for not_convertible in (1.2, np.float(4), float(3), np.double(45), 's', 'str',
                                 np.array([1, 2]), (1,), [1, 2], min_int - 1, max_int + 1,
-                                complex(1, 1), complex(imag=2), complex(1.1), None):
+                                complex(1, 1), complex(imag=2), complex(1.1)):
             with self.assertRaises((TypeError, OverflowError, ValueError),
                                    msg=get_no_exception_msg(not_convertible)):
                 _ = cv.utils.dumpInt(not_convertible)
 
-    @unittest.skip('Wrong conversion behavior')
     def test_parse_to_int_not_convertible_extra(self):
         for not_convertible in (np.bool_(True), True, False, np.float32(2.3),
                                 np.array([3, ], dtype=int), np.array([-2, ], dtype=np.int32),
@@ -189,7 +186,7 @@ class Arguments(NewOpenCVTests):
     def test_parse_to_size_t_convertible(self):
         try_to_convert = partial(self._try_to_convert, cv.utils.dumpSizeT)
         _, max_uint = get_limits(ctypes.c_uint)
-        for convertible in (2, True, False, max_uint, (12), np.uint8(34), np.int8(12), np.int16(23),
+        for convertible in (2, max_uint, (12), np.uint8(34), np.int8(12), np.int16(23),
                             np.int32(123), np.int64(344), np.uint64(3), np.uint16(2), np.uint32(5),
                             np.uint(44)):
             expected = 'size_t: {0:d}'.format(convertible).lower()
@@ -198,14 +195,15 @@ class Arguments(NewOpenCVTests):
                              msg=get_conversion_error_msg(convertible, expected, actual))
 
     def test_parse_to_size_t_not_convertible(self):
-        for not_convertible in (1.2, np.float(4), float(3), np.double(45), 's', 'str',
-                                np.array([1, 2]), (1,), [1, 2], np.float64(6), complex(1, 1),
-                                complex(imag=2), complex(1.1), None):
+        min_long, _ = get_limits(ctypes.c_long)
+        for not_convertible in (1.2, True, False, np.bool_(True), np.float(4), float(3),
+                                np.double(45), 's', 'str', np.array([1, 2]), (1,), [1, 2],
+                                np.float64(6), complex(1, 1), complex(imag=2), complex(1.1),
+                                -1, min_long, np.int8(-35)):
             with self.assertRaises((TypeError, OverflowError),
                                    msg=get_no_exception_msg(not_convertible)):
                 _ = cv.utils.dumpSizeT(not_convertible)
 
-    @unittest.skip('Wrong conversion behavior')
     def test_parse_to_size_t_convertible_extra(self):
         try_to_convert = partial(self._try_to_convert, cv.utils.dumpSizeT)
         _, max_size_t = get_limits(ctypes.c_size_t)
@@ -215,7 +213,6 @@ class Arguments(NewOpenCVTests):
             self.assertEqual(expected, actual,
                              msg=get_conversion_error_msg(convertible, expected, actual))
 
-    @unittest.skip('Wrong conversion behavior')
     def test_parse_to_size_t_not_convertible_extra(self):
         for not_convertible in (np.bool_(True), True, False, np.array([123, ], dtype=np.uint8),):
             with self.assertRaises((TypeError, OverflowError),
@@ -251,13 +248,12 @@ class Arguments(NewOpenCVTests):
                              msg=get_conversion_error_msg(inf, expected, actual))
 
     def test_parse_to_float_not_convertible(self):
-        for not_convertible in ('s', 'str', (12,), [1, 2], None, np.array([1, 2], dtype=np.float),
+        for not_convertible in ('s', 'str', (12,), [1, 2], np.array([1, 2], dtype=np.float),
                                 np.array([1, 2], dtype=np.double), complex(1, 1), complex(imag=2),
                                 complex(1.1)):
             with self.assertRaises((TypeError), msg=get_no_exception_msg(not_convertible)):
                 _ = cv.utils.dumpFloat(not_convertible)
 
-    @unittest.skip('Wrong conversion behavior')
     def test_parse_to_float_not_convertible_extra(self):
         for not_convertible in (np.bool_(False), True, False, np.array([123, ], dtype=int),
                                 np.array([1., ]), np.array([False]),
@@ -289,13 +285,12 @@ class Arguments(NewOpenCVTests):
                           "Actual: {}".format(type(nan).__name__, actual))
 
     def test_parse_to_double_not_convertible(self):
-        for not_convertible in ('s', 'str', (12,), [1, 2], None, np.array([1, 2], dtype=np.float),
+        for not_convertible in ('s', 'str', (12,), [1, 2], np.array([1, 2], dtype=np.float),
                                 np.array([1, 2], dtype=np.double), complex(1, 1), complex(imag=2),
                                 complex(1.1)):
             with self.assertRaises((TypeError), msg=get_no_exception_msg(not_convertible)):
                 _ = cv.utils.dumpDouble(not_convertible)
 
-    @unittest.skip('Wrong conversion behavior')
     def test_parse_to_double_not_convertible_extra(self):
         for not_convertible in (np.bool_(False), True, False, np.array([123, ], dtype=int),
                                 np.array([1., ]), np.array([False]),
diff --git a/modules/python/test/test_norm.py b/modules/python/test/test_norm.py
new file mode 100644
index 0000000000..404f19fbb7
--- /dev/null
+++ b/modules/python/test/test_norm.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+
+from itertools import product
+from functools import reduce
+
+import numpy as np
+import cv2 as cv
+
+from tests_common import NewOpenCVTests
+
+
+def norm_inf(x, y=None):
+    def norm(vec):
+        return np.linalg.norm(vec.flatten(), np.inf)
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_l1(x, y=None):
+    def norm(vec):
+        return np.linalg.norm(vec.flatten(), 1)
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_l2(x, y=None):
+    def norm(vec):
+        return np.linalg.norm(vec.flatten())
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_l2sqr(x, y=None):
+    def norm(vec):
+        return np.square(vec).sum()
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_hamming(x, y=None):
+    def norm(vec):
+        return sum(bin(i).count('1') for i in vec.flatten())
+
+    return norm(x) if y is None else norm(np.bitwise_xor(x, y))
+
+
+def norm_hamming2(x, y=None):
+    def norm(vec):
+        def element_norm(element):
+            binary_str = bin(element).split('b')[-1]
+            if len(binary_str) % 2 == 1:
+                binary_str = '0' + binary_str
+            gen = filter(lambda p: p != '00',
+                         (binary_str[i:i+2]
+                          for i in range(0, len(binary_str), 2)))
+            return sum(1 for _ in gen)
+
+        return sum(element_norm(element) for element in vec.flatten())
+
+    return norm(x) if y is None else norm(np.bitwise_xor(x, y))
+
+
+norm_type_under_test = {
+    cv.NORM_INF: norm_inf,
+    cv.NORM_L1: norm_l1,
+    cv.NORM_L2: norm_l2,
+    cv.NORM_L2SQR: norm_l2sqr,
+    cv.NORM_HAMMING: norm_hamming,
+    cv.NORM_HAMMING2: norm_hamming2
+}
+
+norm_name = {
+    cv.NORM_INF: 'inf',
+    cv.NORM_L1: 'L1',
+    cv.NORM_L2: 'L2',
+    cv.NORM_L2SQR: 'L2SQR',
+    cv.NORM_HAMMING: 'Hamming',
+    cv.NORM_HAMMING2: 'Hamming2'
+}
+
+
+def get_element_types(norm_type):
+    if norm_type in (cv.NORM_HAMMING, cv.NORM_HAMMING2):
+        return (np.uint8,)
+    else:
+        return (np.uint8, np.int8, np.uint16, np.int16, np.int32, np.float32,
+                np.float64)
+
+
+def generate_vector(shape, dtype):
+    if np.issubdtype(dtype, np.integer):
+        return np.random.randint(0, 100, shape).astype(dtype)
+    else:
+        return np.random.normal(10., 12.5, shape).astype(dtype)
+
+
+shapes = (1, 2, 3, 5, 7, 16, (1, 1), (2, 2), (3, 5), (1, 7))
+
+
+class norm_test(NewOpenCVTests):
+
+    def test_norm_for_one_array(self):
+        np.random.seed(123)
+        for norm_type, norm in norm_type_under_test.items():
+            element_types = get_element_types(norm_type)
+            for shape, element_type in product(shapes, element_types):
+                array = generate_vector(shape, element_type)
+                expected = norm(array)
+                actual = cv.norm(array, norm_type)
+                self.assertAlmostEqual(
+                    expected, actual, places=2,
+                    msg='Array {0} of {1} and norm {2}'.format(
+                        array, element_type.__name__, norm_name[norm_type]
+                    )
+                )
+
+    def test_norm_for_two_arrays(self):
+        np.random.seed(456)
+        for norm_type, norm in norm_type_under_test.items():
+            element_types = get_element_types(norm_type)
+            for shape, element_type in product(shapes, element_types):
+                first = generate_vector(shape, element_type)
+                second = generate_vector(shape, element_type)
+                expected = norm(first, second)
+                actual = cv.norm(first, second, norm_type)
+                self.assertAlmostEqual(
+                    expected, actual, places=2,
+                    msg='Arrays {0} {1} of type {2} and norm {3}'.format(
+                        first, second, element_type.__name__,
+                        norm_name[norm_type]
+                    )
+                )
+
+    def test_norm_fails_for_wrong_type(self):
+        for norm_type in (cv.NORM_HAMMING, cv.NORM_HAMMING2):
+            with self.assertRaises(Exception,
+                                   msg='Type is not checked {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(np.array([1, 2], dtype=np.int32), norm_type)
+
+    def test_norm_fails_for_array_and_scalar(self):
+        for norm_type in norm_type_under_test:
+            with self.assertRaises(Exception,
+                                   msg='Exception is not thrown for {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(np.array([1, 2], dtype=np.uint8), 123, norm_type)
+
+    def test_norm_fails_for_scalar_and_array(self):
+        for norm_type in norm_type_under_test:
+            with self.assertRaises(Exception,
+                                   msg='Exception is not thrown for {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(4, np.array([1, 2], dtype=np.uint8), norm_type)
+
+    def test_norm_fails_for_array_and_norm_type_as_scalar(self):
+        for norm_type in norm_type_under_test:
+            with self.assertRaises(Exception,
+                                   msg='Exception is not thrown for {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(np.array([3, 4, 5], dtype=np.uint8),
+                        norm_type, normType=norm_type)
+
+
+if __name__ == '__main__':
+    NewOpenCVTests.bootstrap()

From c1c84d2fd1b4be0724546cd4a65e351bc29c1652 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Mon, 6 Jan 2020 14:03:05 +0300
Subject: [PATCH 4/6] ONNX graphs simplifier

---
 modules/dnn/src/graph_simplifier.cpp          | 207 +++++++++++++
 modules/dnn/src/graph_simplifier.hpp          | 100 +++++++
 .../dnn/src/onnx/onnx_graph_simplifier.cpp    | 157 ++++++++++
 .../dnn/src/onnx/onnx_graph_simplifier.hpp    |  30 ++
 modules/dnn/src/onnx/onnx_importer.cpp        |   5 +
 .../src/tensorflow/tf_graph_simplifier.cpp    | 277 +++++-------------
 modules/dnn/test/test_onnx_importer.cpp       |   1 +
 7 files changed, 575 insertions(+), 202 deletions(-)
 create mode 100644 modules/dnn/src/graph_simplifier.cpp
 create mode 100644 modules/dnn/src/graph_simplifier.hpp
 create mode 100644 modules/dnn/src/onnx/onnx_graph_simplifier.cpp
 create mode 100644 modules/dnn/src/onnx/onnx_graph_simplifier.hpp

diff --git a/modules/dnn/src/graph_simplifier.cpp b/modules/dnn/src/graph_simplifier.cpp
new file mode 100644
index 0000000000..62651053fb
--- /dev/null
+++ b/modules/dnn/src/graph_simplifier.cpp
@@ -0,0 +1,207 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "precomp.hpp"
+
+#include "graph_simplifier.hpp"
+
+#include <queue>
+
+namespace cv { namespace dnn {
+
+Subgraph::~Subgraph() {}
+
+int Subgraph::addNodeToMatch(const std::string& op, int input_0, int input_1,
+                             int input_2, int input_3)
+{
+    int nodeInputs[] = {input_0, input_1, input_2, input_3};
+    int numInputs = 0;
+    for (int i = 0; i < 4; ++i)
+    {
+        numInputs += (int)(nodeInputs[i] != -1);
+    }
+    return addNodeToMatch(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+}
+
+int Subgraph::addNodeToMatch(const std::string& op, const std::vector<int>& inputs_)
+{
+    for (int i = 0; i < inputs_.size(); ++i)
+    {
+        CV_Assert(inputs_[i] < (int)nodes.size());
+    }
+    nodes.push_back(op);
+    inputs.push_back(inputs_);
+    return nodes.size() - 1;
+}
+
+void Subgraph::setFusedNode(const std::string& op, int input_0, int input_1,
+                            int input_2, int input_3, int input_4, int input_5)
+{
+    int nodeInputs[] = {input_0, input_1, input_2, input_3, input_4, input_5};
+    int numInputs = 0;
+    for (int i = 0; i < 6; ++i)
+    {
+        CV_Assert(nodeInputs[i] < (int)nodes.size());
+        numInputs += (int)(nodeInputs[i] != -1);
+    }
+    setFusedNode(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+}
+
+void Subgraph::setFusedNode(const std::string& op, const std::vector<int>& inputs_)
+{
+    fusedNodeInputs = inputs_;
+    fusedNodeOp = op;
+}
+
+int Subgraph::getInputNodeId(const Ptr<ImportGraphWrapper>& net,
+                             const Ptr<ImportNodeWrapper>& node,
+                             int inpId)
+{
+    CV_Assert(inpId < node->getNumInputs());
+    std::string name = node->getInputName(inpId);
+    // If operation produces several tensors, they are specified by index
+    // after ':' character. In example, "input:0".
+    name = name.substr(0, name.rfind(':'));
+    const int numNodes = net->getNumNodes();
+    for (int i = 0; i < numNodes; ++i)
+    {
+        if (net->getNodeName(i) == name)
+            return i;
+    }
+    CV_Error(Error::StsParseError, "Input node with name " + name + " not found");
+}
+
+bool Subgraph::match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                     std::vector<int>& matchedNodesIds,
+                     std::vector<int>& targetNodesIds)
+{
+    matchedNodesIds.clear();
+    targetNodesIds.clear();
+
+    std::queue<int> nodesToMatch;
+    std::queue<int> targetNodes;
+    nodesToMatch.push(nodeId);
+    targetNodes.push(nodes.size() - 1);
+    while (!nodesToMatch.empty())
+    {
+        int nodeToMatch = nodesToMatch.front();
+        int targetNodeId = targetNodes.front();
+        nodesToMatch.pop();
+        targetNodes.pop();
+
+        if (std::find(matchedNodesIds.begin(), matchedNodesIds.end(), nodeToMatch) !=
+            matchedNodesIds.end())
+            continue;
+
+        const Ptr<ImportNodeWrapper> node = net->getNode(nodeToMatch);
+        if (node->getType() != nodes[targetNodeId])
+            return false;
+
+        std::vector<int>& inputNodes = inputs[targetNodeId];
+        if (inputNodes.size() != node->getNumInputs())
+            return false;
+
+        for (int j = 0; j < inputNodes.size(); ++j)
+        {
+            if (nodes[inputNodes[j]].empty())  // Unknown input node type.
+                continue;
+            nodeId = getInputNodeId(net, node, j);
+            const Ptr<ImportNodeWrapper> inpNode = net->getNode(nodeId);
+            if (inpNode->getType() != "Const")
+            {
+                nodesToMatch.push(nodeId);
+                targetNodes.push(inputNodes[j]);
+            }
+            else if (nodes[inputNodes[j]] != "Const")
+                return false;
+        }
+        matchedNodesIds.push_back(nodeToMatch);
+        targetNodesIds.push_back(targetNodeId);
+    }
+
+    const int n = matchedNodesIds.size();
+    std::vector<std::pair<int, int> > elements(n);
+    for (int i = 0; i < n; ++i)
+        elements[i] = std::make_pair(matchedNodesIds[i], targetNodesIds[i]);
+    std::sort(elements.begin(), elements.end());
+    for (int i = 0; i < n; ++i)
+    {
+        matchedNodesIds[i] = elements[i].first;
+        targetNodesIds[i] = elements[i].second;
+    }
+    return true;
+}
+
+void Subgraph::replace(const Ptr<ImportGraphWrapper>& net, const std::vector<int>& matchedNodesIds,
+                       const std::vector<int>& targetNodesIds)
+{
+    // Extract names of input nodes.
+    std::vector<std::string> inputsNames(fusedNodeInputs.size());
+    for (int i = 0; i < fusedNodeInputs.size(); ++i)
+    {
+        std::string inpName;
+        // Find input node name looking at inputs of fused nodes.
+        for (int j = 0; j < matchedNodesIds.size() && inpName.empty(); ++j)
+        {
+            Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[j]);
+            std::vector<int>& inpIndices = inputs[targetNodesIds[j]];
+
+            CV_Assert(node->getNumInputs() == inpIndices.size());
+            for (int k = 0; k < inpIndices.size(); ++k)
+            {
+                if (inpIndices[k] == fusedNodeInputs[i])
+                {
+                    inpName = node->getInputName(k);
+                    break;
+                }
+            }
+        }
+        CV_Assert(!inpName.empty());
+        inputsNames[i] = inpName;
+    }
+
+    // Remove matched nodes except the last one. Indices in ascending order are expected.
+    Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds.back());
+    for (int i = matchedNodesIds.size() - 2; i >= 0; --i)
+        net->removeNode(matchedNodesIds[i]);
+
+    // Modify the last node to be a fused one.
+    node->setType(fusedNodeOp);
+    node->setInputNames(inputsNames);
+
+    std::vector<Ptr<ImportNodeWrapper> > inputNodes(inputsNames.size());
+    for (int i = 0; i < inputsNames.size(); ++i)
+    {
+        inputNodes[i] = net->getNode(getInputNodeId(net, node, i));
+    }
+    finalize(net, node, inputNodes);
+}
+
+void Subgraph::finalize(const Ptr<ImportGraphWrapper>& net,
+                        const Ptr<ImportNodeWrapper>& fusedNode,
+                        std::vector<Ptr<ImportNodeWrapper> >& inputs) {}
+
+void simplifySubgraphs(const Ptr<ImportGraphWrapper>& net,
+                       const std::vector<Ptr<Subgraph> >& patterns)
+{
+    int numNodes = net->getNumNodes();
+    std::vector<int> matchedNodesIds, targetNodesIds;
+    for (int i = 0; i < numNodes; ++i)
+    {
+        for (int j = 0; j < patterns.size(); ++j)
+        {
+            if (patterns[j]->match(net, i, matchedNodesIds, targetNodesIds))
+            {
+                patterns[j]->replace(net, matchedNodesIds, targetNodesIds);
+                numNodes -= matchedNodesIds.size() - 1;  // #matchedNodes removed and one added.
+                break;
+            }
+        }
+    }
+}
+
+}}  // namespace cv::dnn
diff --git a/modules/dnn/src/graph_simplifier.hpp b/modules/dnn/src/graph_simplifier.hpp
new file mode 100644
index 0000000000..8f3958ba52
--- /dev/null
+++ b/modules/dnn/src/graph_simplifier.hpp
@@ -0,0 +1,100 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef __OPENCV_DNN_GRAPH_SIMPLIFIER_HPP__
+#define __OPENCV_DNN_GRAPH_SIMPLIFIER_HPP__
+
+#include <string>
+
+#include <opencv2/core.hpp>
+
+namespace cv { namespace dnn {
+
+class ImportNodeWrapper
+{
+public:
+    virtual ~ImportNodeWrapper() {};
+
+    virtual int getNumInputs() const = 0;
+
+    virtual std::string getInputName(int idx) const = 0;
+
+    virtual std::string getType() const = 0;
+
+    virtual void setType(const std::string& type) = 0;
+
+    virtual void setInputNames(const std::vector<std::string>& inputs) = 0;
+};
+
+class ImportGraphWrapper
+{
+public:
+    virtual ~ImportGraphWrapper() {};
+
+    virtual Ptr<ImportNodeWrapper> getNode(int idx) const = 0;
+
+    virtual int getNumNodes() const = 0;
+
+    virtual std::string getNodeName(int idx) const = 0;
+
+    virtual void removeNode(int idx) = 0;
+};
+
+class Subgraph  // Interface to match and replace subgraphs.
+{
+public:
+    virtual ~Subgraph();
+
+    // Add a node to be matched in the origin graph. Specify ids of nodes that
+    // are expected to be inputs. Returns id of a newly added node.
+    // TODO: Replace inputs to std::vector<int> in C++11
+    int addNodeToMatch(const std::string& op, int input_0 = -1, int input_1 = -1,
+                       int input_2 = -1, int input_3 = -1);
+
+    int addNodeToMatch(const std::string& op, const std::vector<int>& inputs_);
+
+    // Specify resulting node. All the matched nodes in subgraph excluding
+    // input nodes will be fused into this single node.
+    // TODO: Replace inputs to std::vector<int> in C++11
+    void setFusedNode(const std::string& op, int input_0 = -1, int input_1 = -1,
+                      int input_2 = -1, int input_3 = -1, int input_4 = -1,
+                      int input_5 = -1);
+
+    void setFusedNode(const std::string& op, const std::vector<int>& inputs_);
+
+    static int getInputNodeId(const Ptr<ImportGraphWrapper>& net,
+                              const Ptr<ImportNodeWrapper>& node,
+                              int inpId);
+
+    // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
+    // Const nodes are skipped during matching. Returns true if nodes are matched and can be fused.
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds);
+
+    // Fuse matched subgraph.
+    void replace(const Ptr<ImportGraphWrapper>& net, const std::vector<int>& matchedNodesIds,
+                 const std::vector<int>& targetNodesIds);
+
+    virtual void finalize(const Ptr<ImportGraphWrapper>& net,
+                          const Ptr<ImportNodeWrapper>& fusedNode,
+                          std::vector<Ptr<ImportNodeWrapper> >& inputs);
+
+private:
+    std::vector<std::string> nodes;         // Nodes to be matched in the origin graph.
+    std::vector<std::vector<int> > inputs;  // Connections of an every node to it's inputs.
+
+    std::string fusedNodeOp;           // Operation name of resulting fused node.
+    std::vector<int> fusedNodeInputs;  // Inputs of fused node.
+};
+
+void simplifySubgraphs(const Ptr<ImportGraphWrapper>& net,
+                       const std::vector<Ptr<Subgraph> >& patterns);
+
+}}  // namespace dnn, namespace cv
+
+#endif  // __OPENCV_DNN_GRAPH_SIMPLIFIER_HPP__
diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
new file mode 100644
index 0000000000..f9f9194a22
--- /dev/null
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@@ -0,0 +1,157 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "../precomp.hpp"
+
+#include "../graph_simplifier.hpp"
+#include "onnx_graph_simplifier.hpp"
+
+#include <queue>
+
+namespace cv { namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+
+// This wrapper can behave differently for fake input nodes and real graph nodes.
+class ONNXNodeWrapper : public ImportNodeWrapper
+{
+public:
+    ONNXNodeWrapper(opencv_onnx::NodeProto* _node = 0) : node(_node) {}
+
+    virtual int getNumInputs() const CV_OVERRIDE
+    {
+        return node ? node->input_size() : 0;
+    }
+
+    virtual std::string getInputName(int idx) const CV_OVERRIDE
+    {
+        CV_Assert_N(node, idx < node->input_size());
+        return node->input(idx);
+    }
+
+    virtual std::string getType() const CV_OVERRIDE
+    {
+        return node ? node->op_type() : "";
+    }
+
+    virtual void setType(const std::string& type) CV_OVERRIDE
+    {
+        CV_Assert(node);
+        node->set_op_type(type);
+    }
+
+    virtual void setInputNames(const std::vector<std::string>& inputs) CV_OVERRIDE
+    {
+        CV_Assert(node);
+        node->clear_input();
+        for (int i = 0; i < inputs.size(); ++i)
+            node->add_input(inputs[i]);
+    }
+
+    opencv_onnx::NodeProto* node;
+};
+
+// ONNX graph's inputs are separate from nodes so we index them before the rest of nodes.
+class ONNXGraphWrapper : public ImportGraphWrapper
+{
+public:
+    ONNXGraphWrapper(opencv_onnx::GraphProto& _net) : net(_net)
+    {
+        numInputs = net.input_size();
+    }
+
+    virtual Ptr<ImportNodeWrapper> getNode(int idx) const CV_OVERRIDE
+    {
+        opencv_onnx::NodeProto* node = 0;
+        if (idx >= numInputs)
+            node = net.mutable_node(idx - numInputs);
+        return makePtr<ONNXNodeWrapper>(node);
+    }
+
+    virtual int getNumNodes() const CV_OVERRIDE
+    {
+        return numInputs + net.node_size();
+    }
+
+    virtual std::string getNodeName(int idx) const CV_OVERRIDE
+    {
+        if (idx < numInputs)
+            return net.input(idx).name();
+        else
+            return net.node(idx - numInputs).output(0);
+    }
+
+    virtual void removeNode(int idx) CV_OVERRIDE
+    {
+        CV_Assert(idx >= numInputs);
+        net.mutable_node()->DeleteSubrange(idx - numInputs, 1);
+    }
+
+private:
+    int numInputs;
+    opencv_onnx::GraphProto& net;
+};
+
+class SoftMaxSubgraph : public Subgraph
+{
+public:
+    SoftMaxSubgraph()
+    {
+        int input = addNodeToMatch("");
+        int inpExp = addNodeToMatch("Exp", input);
+        int sum = addNodeToMatch("ReduceSum", inpExp);
+        addNodeToMatch("Div", inpExp, sum);
+        setFusedNode("Softmax", input);
+    }
+
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds) CV_OVERRIDE
+    {
+        if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
+        {
+            Ptr<ImportNodeWrapper> sum = net->getNode(matchedNodesIds[1]);
+            opencv_onnx::NodeProto* node = sum.dynamicCast<ONNXNodeWrapper>()->node;
+
+            for (int i = 0; i < node->attribute_size(); i++)
+            {
+                opencv_onnx::AttributeProto attr = node->attribute(i);
+                if (attr.name() != "axes")
+                    continue;
+                if (attr.ints_size() != 1)
+                    CV_Error(Error::StsNotImplemented, format("Unexpected number of axes: %d", attr.ints_size()));
+                axis = attr.ints(0);
+                return true;
+            }
+            CV_Error(Error::StsNotImplemented, "Missed axes attribute");
+        }
+        return false;
+    }
+
+    virtual void finalize(const Ptr<ImportGraphWrapper>&,
+                          const Ptr<ImportNodeWrapper>& fusedNode,
+                          std::vector<Ptr<ImportNodeWrapper> >&) CV_OVERRIDE
+    {
+        opencv_onnx::NodeProto* node = fusedNode.dynamicCast<ONNXNodeWrapper>()->node;
+        opencv_onnx::AttributeProto* attr = node->add_attribute();
+        attr->set_name("axis");
+        attr->set_i(axis);
+    }
+
+private:
+    int axis;
+};
+
+void simplifySubgraphs(opencv_onnx::GraphProto& net)
+{
+    std::vector<Ptr<Subgraph> > subgraphs;
+    subgraphs.push_back(makePtr<SoftMaxSubgraph>());
+
+    simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
+}
+
+CV__DNN_EXPERIMENTAL_NS_END
+}}  // namespace cv::dnn
diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp
new file mode 100644
index 0000000000..52b4e5ecc0
--- /dev/null
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp
@@ -0,0 +1,30 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
+#define __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
+
+#include "../precomp.hpp"
+
+#if defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsuggest-override"
+#endif
+#include "opencv-onnx.pb.h"
+#if defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic pop
+#endif
+
+namespace cv { namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+
+void simplifySubgraphs(opencv_onnx::GraphProto& net);
+
+CV__DNN_EXPERIMENTAL_NS_END
+}}  // namespace dnn, namespace cv
+
+#endif  // __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index f08b6bd740..36945f5317 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -26,6 +26,8 @@
 #pragma GCC diagnostic pop
 #endif
 
+#include "onnx_graph_simplifier.hpp"
+
 namespace cv {
 namespace dnn {
 CV__DNN_EXPERIMENTAL_NS_BEGIN
@@ -326,6 +328,9 @@ void ONNXImporter::populateNet(Net dstNet)
 {
     CV_Assert(model_proto.has_graph());
     opencv_onnx::GraphProto graph_proto = model_proto.graph();
+
+    simplifySubgraphs(graph_proto);
+
     std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto);
     // List of internal blobs shapes.
     std::map<std::string, MatShape> outShapes;
diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
index 0f9670e8a4..0d53be5a58 100644
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@@ -9,6 +9,7 @@
 
 #ifdef HAVE_PROTOBUF
 
+#include "../graph_simplifier.hpp"
 #include "tf_graph_simplifier.hpp"
 #include <queue>
 
@@ -18,203 +19,87 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
 using ::google::protobuf::RepeatedField;
 using ::google::protobuf::MapPair;
 
-class Subgraph  // Interface to match and replace TensorFlow subgraphs.
+class TFNodeWrapper : public ImportNodeWrapper
 {
 public:
-    virtual ~Subgraph() {}
+    TFNodeWrapper(tensorflow::NodeDef* _node) : node(_node) {}
 
-    // Add a node to be matched in the origin graph. Specify ids of nodes that
-    // are expected to be inputs. Returns id of a newly added node.
-    // TODO: Replace inputs to std::vector<int> in C++11
-    int addNodeToMatch(const std::string& op, int input_0 = -1, int input_1 = -1,
-                       int input_2 = -1, int input_3 = -1)
+    virtual int getNumInputs() const CV_OVERRIDE
     {
-        int nodeInputs[] = {input_0, input_1, input_2, input_3};
-        int numInputs = 0;
-        for (int i = 0; i < 4; ++i)
-        {
-            numInputs += (int)(nodeInputs[i] != -1);
-        }
-        return addNodeToMatch(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+        return node->input_size();
     }
 
-    int addNodeToMatch(const std::string& op, const std::vector<int>& inputs_)
+    virtual std::string getInputName(int idx) const CV_OVERRIDE
     {
-        for (int i = 0; i < inputs_.size(); ++i)
-        {
-            CV_Assert(inputs_[i] < (int)nodes.size());
-        }
-        nodes.push_back(op);
-        inputs.push_back(inputs_);
-        return nodes.size() - 1;
+        return node->input(idx);
     }
 
-    // Specify resulting node. All the matched nodes in subgraph excluding
-    // input nodes will be fused into this single node.
-    // TODO: Replace inputs to std::vector<int> in C++11
-    void setFusedNode(const std::string& op, int input_0 = -1, int input_1 = -1,
-                      int input_2 = -1, int input_3 = -1, int input_4 = -1,
-                      int input_5 = -1)
+    virtual std::string getType() const CV_OVERRIDE
     {
-        int nodeInputs[] = {input_0, input_1, input_2, input_3, input_4, input_5};
-        int numInputs = 0;
-        for (int i = 0; i < 6; ++i)
-        {
-            CV_Assert(nodeInputs[i] < (int)nodes.size());
-            numInputs += (int)(nodeInputs[i] != -1);
-        }
-        setFusedNode(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+        return node->op();
     }
 
-    void setFusedNode(const std::string& op, const std::vector<int>& inputs_)
+    virtual void setType(const std::string& type) CV_OVERRIDE
     {
-        fusedNodeInputs = inputs_;
-        fusedNodeOp = op;
+        node->set_op(type);
     }
 
-    static int getInputNodeId(const tensorflow::GraphDef& net,
-                              const tensorflow::NodeDef& node,
-                              int inpId)
+    virtual void setInputNames(const std::vector<std::string>& inputs) CV_OVERRIDE
     {
-        CV_Assert(inpId < node.input_size());
-        std::string name = node.input(inpId);
-        // If operation produces several tensors, they are specified by index
-        // after ':' character. In example, "input:0".
-        name = name.substr(0, name.rfind(':'));
-        const int numNodes = net.node_size();
-        for (int i = 0; i < numNodes; ++i)
-        {
-            if (net.node(i).name() == name)
-                return i;
-        }
-        CV_Error(Error::StsParseError, "Input node with name " + name + " not found");
-    }
-
-    // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
-    // Const nodes are skipped during matching. Returns true if nodes are matched and can be fused.
-    virtual bool match(const tensorflow::GraphDef& net, int nodeId,
-                       std::vector<int>& matchedNodesIds,
-                       std::vector<int>& targetNodesIds)
-    {
-        matchedNodesIds.clear();
-        targetNodesIds.clear();
-
-        std::queue<int> nodesToMatch;
-        std::queue<int> targetNodes;
-        nodesToMatch.push(nodeId);
-        targetNodes.push(nodes.size() - 1);
-        while (!nodesToMatch.empty())
-        {
-            int nodeToMatch = nodesToMatch.front();
-            int targetNodeId = targetNodes.front();
-            nodesToMatch.pop();
-            targetNodes.pop();
-
-            if (std::find(matchedNodesIds.begin(), matchedNodesIds.end(), nodeToMatch) !=
-                matchedNodesIds.end())
-                continue;
-
-            const tensorflow::NodeDef& node = net.node(nodeToMatch);
-            if (node.op() != nodes[targetNodeId])
-                return false;
-
-            std::vector<int>& inputNodes = inputs[targetNodeId];
-            if (inputNodes.size() != node.input_size())
-                return false;
-
-            for (int j = 0; j < inputNodes.size(); ++j)
-            {
-                if (nodes[inputNodes[j]].empty())  // Unknown input node type.
-                    continue;
-                nodeId = getInputNodeId(net, node, j);
-                const tensorflow::NodeDef& inpNode = net.node(nodeId);
-                if (inpNode.op() != "Const")
-                {
-                    nodesToMatch.push(nodeId);
-                    targetNodes.push(inputNodes[j]);
-                }
-                else if (nodes[inputNodes[j]] != "Const")
-                    return false;
-            }
-            matchedNodesIds.push_back(nodeToMatch);
-            targetNodesIds.push_back(targetNodeId);
-        }
-
-        const int n = matchedNodesIds.size();
-        std::vector<std::pair<int, int> > elements(n);
-        for (int i = 0; i < n; ++i)
-            elements[i] = std::make_pair(matchedNodesIds[i], targetNodesIds[i]);
-        std::sort(elements.begin(), elements.end());
-        for (int i = 0; i < n; ++i)
-        {
-            matchedNodesIds[i] = elements[i].first;
-            targetNodesIds[i] = elements[i].second;
-        }
-        return true;
-    }
-
-    // Fuse matched subgraph.
-    void replace(tensorflow::GraphDef& net, const std::vector<int>& matchedNodesIds,
-                 const std::vector<int>& targetNodesIds)
-    {
-        // Extract names of input nodes.
-        std::vector<std::string> inputsNames(fusedNodeInputs.size());
-        for (int i = 0; i < fusedNodeInputs.size(); ++i)
-        {
-            std::string inpName;
-            // Find input node name looking at inputs of fused nodes.
-            for (int j = 0; j < matchedNodesIds.size() && inpName.empty(); ++j)
-            {
-                const tensorflow::NodeDef &node = net.node(matchedNodesIds[j]);
-                std::vector<int>& inpIndices = inputs[targetNodesIds[j]];
-
-                CV_Assert(node.input_size() == inpIndices.size());
-                for (int k = 0; k < inpIndices.size(); ++k)
-                {
-                    if (inpIndices[k] == fusedNodeInputs[i])
-                    {
-                        inpName = node.input(k);
-                        break;
-                    }
-                }
-            }
-            CV_Assert(!inpName.empty());
-            inputsNames[i] = inpName;
-        }
-
-        // Remove matched nodes except the last one. Indices in ascending order are expected.
-        tensorflow::NodeDef* node = net.mutable_node(matchedNodesIds.back());
-        for (int i = matchedNodesIds.size() - 2; i >= 0; --i)
-            net.mutable_node()->DeleteSubrange(matchedNodesIds[i], 1);
-
-        // Modify the last node to be a fused one.
-        node->set_op(fusedNodeOp);
         node->clear_input();
-        for (int i = 0; i < inputsNames.size(); ++i)
-        {
-            node->add_input(inputsNames[i]);
-        }
-
-        std::vector<tensorflow::NodeDef*> inputNodes(inputsNames.size());
-        for (int i = 0; i < inputsNames.size(); ++i)
-        {
-            inputNodes[i] = net.mutable_node(getInputNodeId(net, *node, i));
-        }
-        finalize(net, node, inputNodes);
+        for (int i = 0; i < inputs.size(); ++i)
+            node->add_input(inputs[i]);
     }
 
-    virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef*,
-                          std::vector<tensorflow::NodeDef*>&) {}
-
-private:
-    std::vector<std::string> nodes;         // Nodes to be matched in the origin graph.
-    std::vector<std::vector<int> > inputs;  // Connections of an every node to it's inputs.
-
-    std::string fusedNodeOp;           // Operation name of resulting fused node.
-    std::vector<int> fusedNodeInputs;  // Inputs of fused node.
+    tensorflow::NodeDef* node;
 };
 
-class BatchNormSubgraph : public Subgraph
+class TFGraphWrapper : public ImportGraphWrapper
+{
+public:
+    TFGraphWrapper(tensorflow::GraphDef& _net) : net(_net) {}
+
+    virtual Ptr<ImportNodeWrapper> getNode(int idx) const CV_OVERRIDE
+    {
+        return makePtr<TFNodeWrapper>(net.mutable_node(idx));
+    }
+
+    virtual int getNumNodes() const CV_OVERRIDE
+    {
+        return net.node_size();
+    }
+
+    virtual std::string getNodeName(int idx) const CV_OVERRIDE
+    {
+        return net.node(idx).name();
+    }
+
+    virtual void removeNode(int idx) CV_OVERRIDE
+    {
+        net.mutable_node()->DeleteSubrange(idx, 1);
+    }
+
+    tensorflow::GraphDef& net;
+};
+
+class TFSubgraph : public Subgraph
+{
+    virtual void finalize(const Ptr<ImportGraphWrapper>& netWrapper,
+                          const Ptr<ImportNodeWrapper>& fusedNodeWrapper,
+                          std::vector<Ptr<ImportNodeWrapper> >& inputs) CV_OVERRIDE
+    {
+        std::vector<tensorflow::NodeDef*> inputNodes(inputs.size());
+        for (int i = 0; i < inputs.size(); ++i)
+            inputNodes[i] = inputs[i].dynamicCast<TFNodeWrapper>()->node;
+        finalize(netWrapper.dynamicCast<TFGraphWrapper>()->net,
+                 fusedNodeWrapper.dynamicCast<TFNodeWrapper>()->node, inputNodes);
+    }
+
+    virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
+                          std::vector<tensorflow::NodeDef*>& inputNodes) {}
+};
+
+class BatchNormSubgraph : public TFSubgraph
 {
 public:
     BatchNormSubgraph()
@@ -250,7 +135,7 @@ public:
     }
 };
 
-class BatchNormNoGammaSubgraph : public Subgraph
+class BatchNormNoGammaSubgraph : public TFSubgraph
 {
 public:
     BatchNormNoGammaSubgraph()
@@ -366,20 +251,21 @@ public:
         setFusedNode("Relu6", input);
     }
 
-    virtual bool match(const tensorflow::GraphDef& net, int nodeId,
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
                        std::vector<int>& matchedNodesIds,
                        std::vector<int>& targetNodesIds) CV_OVERRIDE
     {
         if (!Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
             return false;
-        Mat maxValue = getTensorContent(net.node(matchedNodesIds.front() + 1).attr().at("value").tensor());
+        tensorflow::NodeDef* node = net->getNode(matchedNodesIds.front() + 1).dynamicCast<TFNodeWrapper>()->node;
+        Mat maxValue = getTensorContent(node->attr().at("value").tensor());
         return maxValue.type() == CV_32FC1 && maxValue.total() == 1 && maxValue.at<float>(0) == 6;
     }
 };
 
 // Keras' reshape stores output shape in separate Const nodes by one value.
 // Need to merge them into a single Const node.
-class ReshapeKerasSubgraph : public Subgraph
+class ReshapeKerasSubgraph : public TFSubgraph
 {
 public:
     ReshapeKerasSubgraph(int _numOutDims) : numOutDims(_numOutDims)
@@ -402,15 +288,15 @@ public:
         setFusedNode("Reshape", ids);
     }
 
-    virtual bool match(const tensorflow::GraphDef& net, int nodeId,
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
                        std::vector<int>& matchedNodesIds,
                        std::vector<int>& targetNodesIds) CV_OVERRIDE
     {
-        const tensorflow::NodeDef& node = net.node(nodeId);
-        if (node.input_size() == 0)
+        Ptr<ImportNodeWrapper> node = net->getNode(nodeId);
+        if (node->getNumInputs() == 0)
             return false;
 
-        inpName = node.input(0);
+        inpName = node->getInputName(0);
         return Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds);
     }
 
@@ -457,7 +343,7 @@ public:
     }
 };
 
-class DeconvolutionValidKerasSubgraph : public Subgraph
+class DeconvolutionValidKerasSubgraph : public TFSubgraph
 {
 public:
     DeconvolutionValidKerasSubgraph()
@@ -518,7 +404,7 @@ public:
     }
 };
 
-class DeconvolutionSameKerasSubgraph : public Subgraph
+class DeconvolutionSameKerasSubgraph : public TFSubgraph
 {
 public:
     DeconvolutionSameKerasSubgraph()
@@ -608,7 +494,7 @@ public:
 };
 
 // In case of resizing by factor.
-class UpsamplingKerasSubgraph : public Subgraph
+class UpsamplingKerasSubgraph : public TFSubgraph
 {
 public:
     UpsamplingKerasSubgraph(const std::string& type)
@@ -703,7 +589,7 @@ public:
     }
 };
 
-class KerasMVNSubgraph : public Subgraph
+class KerasMVNSubgraph : public TFSubgraph
 {
 public:
     KerasMVNSubgraph()
@@ -758,20 +644,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
     subgraphs.push_back(Ptr<Subgraph>(new ReshapeAsShapeSubgraph()));
     subgraphs.push_back(Ptr<Subgraph>(new KerasMVNSubgraph()));
 
-    int numNodes = net.node_size();
-    std::vector<int> matchedNodesIds, targetNodesIds;
-    for (int i = 0; i < numNodes; ++i)
-    {
-        for (int j = 0; j < subgraphs.size(); ++j)
-        {
-            if (subgraphs[j]->match(net, i, matchedNodesIds, targetNodesIds))
-            {
-                subgraphs[j]->replace(net, matchedNodesIds, targetNodesIds);
-                numNodes -= matchedNodesIds.size() - 1;  // #matchedNodes removed and one added.
-                break;
-            }
-        }
-    }
+    simplifySubgraphs(Ptr<ImportGraphWrapper>(new TFGraphWrapper(net)), subgraphs);
 }
 
 void RemoveIdentityOps(tensorflow::GraphDef& net)
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 2122813195..3f821ddf34 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -396,6 +396,7 @@ TEST_P(Test_ONNX_layers, Softmax)
 {
     testONNXModels("softmax");
     testONNXModels("log_softmax", npy, 0, 0, false, false);
+    testONNXModels("softmax_unfused");
 }
 
 TEST_P(Test_ONNX_layers, Split_EltwiseMax)

From be86338a79d7433860a3dfa1e40b0806d662a54e Mon Sep 17 00:00:00 2001
From: Liubov Batanina <piccione-mail@yandex.ru>
Date: Tue, 14 Jan 2020 12:51:19 +0300
Subject: [PATCH 5/6] Enable acrossSpatial normalizeL2 on Myriad

---
 modules/dnn/src/layers/normalize_bbox_layer.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp
index 63f93e2eb5..6c1d381feb 100644
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@ -69,7 +69,10 @@ public:
             if (pnorm != 2)
                 return false;
 
-            return preferableTarget == DNN_TARGET_MYRIAD ? !acrossSpatial : startAxis == 1;
+            if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && preferableTarget == DNN_TARGET_MYRIAD)
+                return !acrossSpatial;
+
+            return startAxis == 1;
         }
         return backendId == DNN_BACKEND_OPENCV;
     }
@@ -339,7 +342,6 @@ public:
         }
         else
         {
-            // weight->get_shape().size() > 1 ~> channel_shared = false
             weight = std::make_shared<ngraph::op::Constant>(
                                       ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
         }

From 4b938208800abc14e4515d71c85cfcc091a48534 Mon Sep 17 00:00:00 2001
From: sajarindider <sajarindider@gmail.com>
Date: Thu, 19 Dec 2019 00:46:46 -0500
Subject: [PATCH 6/6] Fixed small inefficiency in seamless clone init

---
 modules/photo/src/seamless_cloning_impl.cpp | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/modules/photo/src/seamless_cloning_impl.cpp b/modules/photo/src/seamless_cloning_impl.cpp
index 1b87e86b20..8fd4bc7865 100644
--- a/modules/photo/src/seamless_cloning_impl.cpp
+++ b/modules/photo/src/seamless_cloning_impl.cpp
@@ -57,12 +57,8 @@ void Cloning::computeGradientX( const Mat &img, Mat &gx)
     }
     else if (img.channels() == 1)
     {
-        Mat tmp[3];
-        for(int chan = 0 ; chan < 3 ; ++chan)
-        {
-            filter2D(img, tmp[chan], CV_32F, kernel);
-        }
-        merge(tmp, 3, gx);
+        filter2D(img, gx, CV_32F, kernel);
+        cvtColor(gx, gx, COLOR_GRAY2BGR);
     }
 }
 
@@ -78,12 +74,8 @@ void Cloning::computeGradientY( const Mat &img, Mat &gy)
     }
     else if (img.channels() == 1)
     {
-        Mat tmp[3];
-        for(int chan = 0 ; chan < 3 ; ++chan)
-        {
-            filter2D(img, tmp[chan], CV_32F, kernel);
-        }
-        merge(tmp, 3, gy);
+        filter2D(img, gy, CV_32F, kernel);
+        cvtColor(gy, gy, COLOR_GRAY2BGR);
     }
 }