Merge pull request #16840 from l-bat:matmul_inputs

* Supported FullyConnected layer with two inputs * Skipped test * Fix conditions * Added OpenCL support * Supported ReduceMean3D * Supported Expand layer * Fix warning * Added Normalize subgraph * refactoring * Used addLayer * Fix check * Used addLayer * Skip failed test * Added normalize1 subgraph * Fix comments
2025-08-06 14:36:36 +08:00 · 2020-04-07 17:12:18 +03:00 · 2020-04-07 17:12:18 +03:00 · 734771418e
commit 734771418e
parent 51a8885566
4 changed files with 382 additions and 77 deletions
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@ -75,32 +75,34 @@ public:
    FullyConnectedLayerImpl(const LayerParams& params)
    {
        setParamsFrom(params);
-        CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
-
-        int numOutput = params.get<int>("num_output");
-        int innerSize = (int)blobs[0].total() / numOutput;
        bias = params.get<bool>("bias_term", true);
        axis = params.get<int>("axis", 1);
-
-        CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total());
-        CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total()));
-
-        weightsMat = blobs[0] = blobs[0].reshape(1, numOutput);
-        int vecsize = weightsMat.cols;
-        if( vecsize % VEC_ALIGN != 0 )
+        if (!blobs.empty())
        {
-            int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
-            Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type());
-            Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned);
-            wpadding.setTo(Scalar::all(0.));
-            weightsMat = weightsBuf.colRange(0, vecsize);
-            blobs[0].copyTo(weightsMat);
-        }
+            CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
+            int numOutput = params.get<int>("num_output");
+            int innerSize = (int)blobs[0].total() / numOutput;

-        if (bias)
-            biasMat = blobs[1] = blobs[1].reshape(1, 1);
-        else
-            biasMat = Mat::zeros(1, numOutput, weightsMat.type());
+            CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total());
+            CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total()));
+
+            weightsMat = blobs[0] = blobs[0].reshape(1, numOutput);
+            int vecsize = weightsMat.cols;
+            if (vecsize % VEC_ALIGN != 0)
+            {
+                int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
+                Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type());
+                Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned);
+                wpadding.setTo(Scalar::all(0.));
+                weightsMat = weightsBuf.colRange(0, vecsize);
+                blobs[0].copyTo(weightsMat);
+            }
+
+            if (bias)
+                biasMat = blobs[1] = blobs[1].reshape(1, 1);
+            else
+                biasMat = Mat::zeros(1, numOutput, weightsMat.type());
+        }
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -108,20 +110,35 @@ public:
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &) const CV_OVERRIDE
    {
-        CV_Assert(inputs.size() == 1);
-        CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
-        CV_Assert(blobs[0].dims == 2);
+        int numOutput, cAxis;
+        if (blobs.empty())
+        {
+            CV_CheckEQ(inputs.size(), (size_t)2, "");
+            numOutput = inputs[1].back();
+            cAxis = inputs[0].size() - 1;
+            CV_CheckEQ(numOutput, inputs[0][cAxis - 1], "");
+            int dims = inputs[0].size();
+            CV_CheckEQ(inputs[1].size(), (size_t)dims, "");
+            CV_CheckGE(dims, 2, "");
+            for (int i = 0; i < dims - 2; i++)
+                CV_CheckEQ(inputs[0][i], inputs[1][i], "");
+            CV_CheckEQ(inputs[0].back(), inputs[1][dims - 2], "");
+        }
+        else
+        {
+            CV_CheckEQ(inputs.size(), (size_t)1, "");
+            CV_CheckEQ(blobs[0].dims, 2, "");
+            numOutput = blobs[0].size[0];
+            CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
+            cAxis = clamp(axis, inputs[0]);
+        }

-        int cAxis = clamp(axis, inputs[0]);
-        int numOutput = blobs[0].size[0];
        MatShape outShape(cAxis + 1);
        for (int i = 0; i < cAxis; ++i)
            outShape[i] = inputs[0][i];
        outShape.back() = numOutput;

-        outputs.resize(inputs.size(), outShape);
-
-        CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
+        outputs.resize(1, outShape);
        return false;
    }

@ -129,7 +146,8 @@ public:
    {
        return backendId == DNN_BACKEND_OPENCV ||
               (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && axis == 1);
+               (((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && !blobs.empty()) ||
+                backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
    }

    virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
@ -288,6 +306,51 @@ public:
        inps.getUMatVector(inputs);
        outs.getUMatVector(outputs);

+        if (inputs.size() == 2)
+        {
+            int dims = outputs[0].dims;
+            int m = inputs[0].size[dims - 2];
+            int n = inputs[0].size[dims - 1];
+            int k = inputs[1].size[dims - 1];
+            int rows = inputs[0].total() / (m * n);
+
+            MatShape sh_A = shape(rows, m * n);
+            MatShape sh_B = shape(rows, n * k);
+            MatShape sh_C = shape(rows, m * k);
+            UMat inp = inputs[0].reshape(1, sh_A.size(), &sh_A[0]);
+            UMat weight = inputs[1].reshape(1, sh_B.size(), &sh_B[0]);
+            UMat out = outputs[0].reshape(1, sh_C.size(), &sh_C[0]);
+
+            UMat A, B, C, A_fp32, B_fp32, C_fp32;
+            for (int i = 0; i < rows; ++i)
+            {
+                A = inp.row(i).reshape(1, m);
+                B = weight.row(i).reshape(1, n);
+                C = out.row(i).reshape(1, m);
+
+                if (use_half)
+                {
+                    convertFp16(A, A_fp32);
+                    convertFp16(B, B_fp32);
+                    convertFp16(C, C_fp32);
+                }
+                else
+                {
+                    A_fp32 = A;
+                    B_fp32 = B;
+                    C_fp32 = C;
+                }
+                cv::gemm(A_fp32, B_fp32, 1, noArray(), 0, C_fp32);
+                if (use_half)
+                {
+                    convertFp16(A_fp32, A);
+                    convertFp16(B_fp32, B);
+                    convertFp16(C_fp32, C);
+                }
+            }
+            return true;
+        }
+
        int axisCan = clamp(axis, inputs[0].dims);
        int numOutput = blobs[0].size[0];
        int innerSize = blobs[0].size[1];
@ -407,16 +470,42 @@ public:
        inputs_arr.getMatVector(input);
        outputs_arr.getMatVector(output);

-        int axisCan = clamp(axis, input[0].dims);
-        int outerSize = input[0].total(0, axisCan);
-
-        for (size_t i = 0; i < input.size(); i++)
+        if (!blobs.empty())
        {
-            Mat srcMat = input[i].reshape(1, outerSize);
-            Mat dstMat = output[i].reshape(1, outerSize);
+            int axisCan = clamp(axis, input[0].dims);
+            int outerSize = input[0].total(0, axisCan);

-            const int nstripes = getNumThreads();
-            FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes);
+            for (size_t i = 0; i < input.size(); i++)
+            {
+                Mat srcMat = input[i].reshape(1, outerSize);
+                Mat dstMat = output[i].reshape(1, outerSize);
+
+                const int nstripes = getNumThreads();
+                FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes);
+            }
+        }
+        else
+        {
+            float* inpData = input[0].ptr<float>();
+            float* weightData = input[1].ptr<float>();
+            float* outData = output[0].ptr<float>();
+
+            int dims = output[0].dims;
+            int numSlice = output[0].total() / output[0].total(dims - 2);
+            int m = input[0].size[dims - 2];
+            int n = input[0].size[dims - 1];
+            int k = input[1].size[dims - 1];
+            for (int i = 0; i < numSlice; i++)
+            {
+                Mat inpSlice(m, n, CV_32F, inpData);
+                Mat weightSlice(n, k, CV_32F, weightData);
+                Mat outSlice(m, k, CV_32F, outData);
+
+                outSlice = inpSlice * weightSlice;
+                inpData += inpSlice.total();
+                weightData += weightSlice.total();
+                outData += outSlice.total();
+            }
        }
    }

@ -467,20 +556,28 @@ public:
                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
    {
        auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        int batch = ieInpNode->get_shape()[0];
+        std::shared_ptr<ngraph::Node> matmul;

-        std::vector<size_t> data = {(size_t)batch, (size_t)blobs[0].size[1]};
-        auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
-        auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
+        if (nodes.size() == 2)
+        {
+            auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+            matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
+        }
+        else
+        {
+            std::vector<size_t> data = {(size_t)ieInpNode->get_shape()[0], (size_t)blobs[0].size[1]};
+            auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
+            auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
+
+            std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
+            auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
+            matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
+        }

-        std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
-        auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
-        auto matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
        if (bias) {
            auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
                                              ngraph::Shape{(size_t)blobs[1].size[1]}, blobs[1].data);
-            auto fc = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY);
-            return Ptr<BackendNode>(new InfEngineNgraphNode(fc));
+            matmul = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY);
        }
        return Ptr<BackendNode>(new InfEngineNgraphNode(matmul));
    }
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@ -154,6 +154,73 @@ private:
    int axis;
 };

+class NormalizeSubgraph1 : public Subgraph
+{
+public:
+    NormalizeSubgraph1() : axis(1)
+    {
+        input = addNodeToMatch("");
+        norm = addNodeToMatch("ReduceL2", input);
+        addNodeToMatch("Div", input, norm);
+        setFusedNode("Normalize", input);
+    }
+
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds) CV_OVERRIDE
+    {
+        if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
+        {
+            Ptr<ImportNodeWrapper> norm = net->getNode(matchedNodesIds[0]);
+            opencv_onnx::NodeProto* node = norm.dynamicCast<ONNXNodeWrapper>()->node;
+
+            for (int i = 0; i < node->attribute_size(); i++)
+            {
+                opencv_onnx::AttributeProto attr = node->attribute(i);
+                if (attr.name() != "axes")
+                    continue;
+                if (attr.ints_size() != 1)
+                    CV_Error(Error::StsNotImplemented, format("Unexpected number of axes: %d", attr.ints_size()));
+                axis = attr.ints(0);
+                return true;
+            }
+            CV_Error(Error::StsNotImplemented, "Missed axes attribute");
+        }
+        return false;
+    }
+
+    virtual void finalize(const Ptr<ImportGraphWrapper>&,
+                          const Ptr<ImportNodeWrapper>& fusedNode,
+                          std::vector<Ptr<ImportNodeWrapper> >&) CV_OVERRIDE
+    {
+        opencv_onnx::NodeProto* node = fusedNode.dynamicCast<ONNXNodeWrapper>()->node;
+        opencv_onnx::AttributeProto* axis_attr = node->add_attribute();
+        axis_attr->set_name("axis");
+        axis_attr->set_i(axis);
+
+        opencv_onnx::AttributeProto* end_axis_attr = node->add_attribute();
+        end_axis_attr->set_name("end_axis");
+        end_axis_attr->set_i(axis);
+    }
+
+protected:
+    int input, norm;
+    int axis;
+};
+
+
+class NormalizeSubgraph2 : public NormalizeSubgraph1
+{
+public:
+    NormalizeSubgraph2() : NormalizeSubgraph1()
+    {
+        int clip = addNodeToMatch("Clip", norm);
+        int shape = addNodeToMatch("Shape", input);
+        int expand = addNodeToMatch("Expand", clip, shape);
+        addNodeToMatch("Div", input, expand);
+    }
+};
+
 class GatherCastSubgraph : public Subgraph
 {
 public:
@ -299,6 +366,8 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
    subgraphs.push_back(makePtr<ResizeSubgraph1>());
    subgraphs.push_back(makePtr<ResizeSubgraph2>());
    subgraphs.push_back(makePtr<SoftMaxSubgraph>());
+    subgraphs.push_back(makePtr<NormalizeSubgraph1>());
+    subgraphs.push_back(makePtr<NormalizeSubgraph2>());

    simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
 }
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -391,19 +391,71 @@ void ONNXImporter::populateNet(Net dstNet)
                    CV_Error(Error::StsNotImplemented, "Unsupported mode of ReduceMean operation.");

                MatShape inpShape = outShapes[node_proto.input(0)];
-                if (inpShape.size() != 4 && inpShape.size() != 5)
+                DictValue axes = layerParams.get("axes");
+                if (inpShape.size() == 3 && axes.size() <= 2)
+                {
+                    int axis = axes.get<int>(0);
+                    CV_CheckNE(axis, 0, "");
+                    outShapes[layerParams.name] = inpShape;
+                    outShapes[layerParams.name][axis] = 1;
+
+                    LayerParams reshapeLp;
+                    reshapeLp.name = layerParams.name + "/reshape";
+                    reshapeLp.type = "Reshape";
+                    CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+                    reshapeLp.set("axis", 0);
+                    reshapeLp.set("num_axes", 1);
+                    int newShape[] = {1, -1};
+                    reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2));
+
+                    opencv_onnx::NodeProto proto;
+                    proto.add_input(node_proto.input(0));
+                    proto.add_output(reshapeLp.name);
+                    addLayer(dstNet, reshapeLp, proto, layer_id, outShapes);
+
+                    LayerParams avgLp;
+                    avgLp.name = layerParams.name + "/avg";
+                    avgLp.type = "Pooling";
+                    CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
+                    avgLp.set("pool", "ave");
+                    if (axes.size() == 2)
+                    {
+                        CV_CheckEQ(axes.get<int>(0), 1, "Unsupported ReduceMean mode");
+                        CV_CheckEQ(axes.get<int>(1), 2, "Unsupported ReduceMean mode");
+                        avgLp.set("global_pooling", true);
+                        outShapes[layerParams.name][axes.get<int>(1)] = 1;
+                    }
+                    else
+                    {
+                        avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true);
+                        avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1);
+                    }
+
+                    node_proto.set_input(0, reshapeLp.name);
+                    node_proto.set_output(0, avgLp.name);
+                    addLayer(dstNet, avgLp, node_proto, layer_id, outShapes);
+
+                    layerParams.type = "Flatten";
+                    layerParams.set("axis", 0);
+                    layerParams.set("end_axis", 1);
+
+                    node_proto.set_input(0, avgLp.name);
+                    node_proto.set_output(0, layerParams.name);
+                }
+                else
+                {
+                    if (inpShape.size() != 4 && inpShape.size() != 5)
                    CV_Error(Error::StsNotImplemented, "Unsupported input shape of reduce_mean operation.");

-                DictValue axes = layerParams.get("axes");
-                CV_Assert(axes.size() <= inpShape.size() - 2);
-                std::vector<int> kernel_size(inpShape.size() - 2, 1);
-                for (int i = 0; i < axes.size(); i++) {
-                    int axis = axes.get<int>(i);
-                    CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
-                    kernel_size[axis - 2] = inpShape[axis];
+                    CV_Assert(axes.size() <= inpShape.size() - 2);
+                    std::vector<int> kernel_size(inpShape.size() - 2, 1);
+                    for (int i = 0; i < axes.size(); i++) {
+                        int axis = axes.get<int>(i);
+                        CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
+                        kernel_size[axis - 2] = inpShape[axis];
+                    }
+                    layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
                }
-
-                layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
            }
        }
        else if (layer_type == "Slice")
@ -825,10 +877,14 @@ void ONNXImporter::populateNet(Net dstNet)
        {
            CV_Assert(node_proto.input_size() == 2);
            layerParams.type = "InnerProduct";
-            Mat blob = getBlob(node_proto, constBlobs, 1);
-            layerParams.blobs.push_back(blob.t());
            layerParams.set("bias_term", false);
-            layerParams.set("num_output", layerParams.blobs[0].size[0]);
+
+            if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
+            {
+                Mat blob = getBlob(node_proto, constBlobs, 1);
+                layerParams.blobs.push_back(blob.t());
+                layerParams.set("num_output", layerParams.blobs[0].size[0]);
+            }
        }
        else if (layer_type == "Mul" || layer_type == "Div")
        {
@ -977,22 +1033,6 @@ void ONNXImporter::populateNet(Net dstNet)
                continue;
            }
        }
-        else if (layer_type == "ReduceL2")
-        {
-            CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
-            CV_Assert(graph_proto.node_size() > li + 1 && graph_proto.node(li + 1).op_type() == "Div");
-            ++li;
-            node_proto = graph_proto.node(li);
-            layerParams.name = node_proto.output(0);
-            layerParams.type = "Normalize";
-
-            DictValue axes_dict = layerParams.get("axes");
-            if (axes_dict.size() != 1)
-                CV_Error(Error::StsNotImplemented, "Multidimensional reduceL2");
-            int axis = axes_dict.getIntValue(0);
-            layerParams.set("axis",axis);
-            layerParams.set("end_axis", axis);
-        }
        else if (layer_type == "Squeeze")
        {
            CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
@ -1080,6 +1120,78 @@ void ONNXImporter::populateNet(Net dstNet)
            layerParams.type = "Reshape";
            layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
        }
+        else if (layer_type == "Expand")
+        {
+            CV_CheckEQ(node_proto.input_size(), 2, "");
+            CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
+            Mat newShapeMat = getBlob(node_proto, constBlobs, 1);
+            MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
+
+            shapeIt = outShapes.find(node_proto.input(0));
+            CV_Assert(shapeIt != outShapes.end());
+            MatShape inpShape = shapeIt->second;
+            CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
+
+            std::vector<int> broadcast_axes;
+            for (int i = 0; i < targetShape.size(); i++)
+            {
+                if (targetShape[i] != inpShape[i])
+                {
+                    if (inpShape[i] == 1)
+                        broadcast_axes.push_back(i);
+                    else
+                        CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
+                }
+            }
+
+            if (broadcast_axes.size() == 2 &&
+                broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
+            {
+                LayerParams constParams;
+                constParams.name = layerParams.name + "/const";
+                CV_Assert(layer_id.find(constParams.name) == layer_id.end());
+                constParams.type = "Const";
+
+                Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr<int>(), CV_32F);
+                constParams.blobs.push_back(inp);
+
+                opencv_onnx::NodeProto proto;
+                proto.add_output(constParams.name);
+                addLayer(dstNet, constParams, proto, layer_id, outShapes);
+
+                layerParams.type = "Scale";
+                layerParams.set("bias_term", false);
+                node_proto.set_input(0, constParams.name);
+                node_proto.set_input(1, shapeIt->first);
+            }
+            else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
+            {
+                String base_name = layerParams.name + "/copy_";
+                std::vector<std::string> input_names;
+                for (int j = 0; j < targetShape[broadcast_axes[0]]; j++)
+                {
+                    std::ostringstream ss;
+                    ss << j;
+                    LayerParams copyLP;
+                    copyLP.name = base_name + ss.str();
+                    copyLP.type = "Identity";
+                    CV_Assert(layer_id.find(copyLP.name) == layer_id.end());
+                    input_names.push_back(copyLP.name);
+
+                    node_proto.set_output(0, copyLP.name);
+                    addLayer(dstNet, copyLP, node_proto, layer_id, outShapes);
+                }
+                node_proto.clear_input();
+                for (int i = 0; i < input_names.size(); i++)
+                {
+                    node_proto.add_input(input_names[i]);
+                }
+                layerParams.set("axis", broadcast_axes[0]);
+                layerParams.type = "Concat";
+            }
+            else
+                CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
+        }
        else if (layer_type == "Reshape")
        {
            CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -179,6 +179,8 @@ TEST_P(Test_ONNX_layers, Shape)
 TEST_P(Test_ONNX_layers, ReduceMean)
 {
    testONNXModels("reduce_mean");
+    testONNXModels("reduce_mean_axis1");
+    testONNXModels("reduce_mean_axis2");
 }

 TEST_P(Test_ONNX_layers, ReduceMean3D)
@ -308,6 +310,30 @@ TEST_P(Test_ONNX_layers, Multiplication)
    testONNXModels("mul");
 }

+TEST_P(Test_ONNX_layers, MatMul)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+
+    testONNXModels("matmul_2d");
+    testONNXModels("matmul_3d");
+    testONNXModels("matmul_4d");
+}
+
+TEST_P(Test_ONNX_layers, Expand)
+{
+    testONNXModels("expand_batch");
+    testONNXModels("expand_channels");
+}
+
+TEST_P(Test_ONNX_layers, ExpandHW)
+{
+    // ngraph::op::v1::Multiply bug
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    testONNXModels("expand_hw");
+}
+
 TEST_P(Test_ONNX_layers, Constant)
 {
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
@ -413,6 +439,7 @@ TEST_P(Test_ONNX_layers, Squeeze)
 TEST_P(Test_ONNX_layers, ReduceL2)
 {
    testONNXModels("reduceL2");
+    testONNXModels("reduceL2_subgraph");
 }

 TEST_P(Test_ONNX_layers, Split)