Merge pull request #20605 from rogday:split_slice_shenanigans

Add Normalize subgraph, fix Slice, Mul and Expand * Add Normalize subgraph, support for starts<0 and axis<0 in Slice, Mul broadcasting in the middle and fix Expand's unsqueeze * remove todos * remove range-based for loop * address review comments * change >> to > > in template * fix indexation * fix expand that does nothing
2025-07-24 14:06:27 +08:00 · 2021-09-09 14:41:40 +03:00 · 2021-09-09 14:41:40 +03:00 · 4807cd8a6e
commit 4807cd8a6e
parent 1e0d290f2e
4 changed files with 173 additions and 43 deletions
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@ -58,6 +58,31 @@ namespace cv
 namespace dnn
 {

+void sliceRangesFromShape(const MatShape& inpShape, int& axis, std::vector<std::vector<cv::Range> >& sliceRanges)
+{
+    CV_Assert(inpShape.size() > 0);
+    bool axisNeg = (axis < 0);
+    axis = (axis + static_cast<int>(inpShape.size())) % inpShape.size();
+    int n = inpShape[axis];
+
+    for (size_t i = 0; i < sliceRanges.size(); ++i){
+        std::vector<Range>& ranges = sliceRanges[i];
+        if (axisNeg)
+        {
+            ranges.insert(ranges.begin(), axis, Range::all());
+        }
+        Range& range = ranges.back();
+
+        if (range.start >= 0)
+        {
+            continue;
+        }
+
+        CV_Assert(n != 0);
+        range.start = (n + range.start) % n;
+    }
+}
+
 class SliceLayerImpl : public SliceLayer
 {
 public:
@ -69,20 +94,22 @@ public:
        num_split = params.get<int>("num_split", 0);
        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
        shapesInitialized = !hasDynamicShapes;
+
        if (params.has("slice_point"))
        {
            CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
            const DictValue &indicesValue = params.get("slice_point");
+            int size = axis > 0 ? axis + 1 : 1;
            sliceRanges.resize(indicesValue.size() + 1,
-                               std::vector<Range>(axis + 1, Range::all()));
+                               std::vector<Range>(size, Range::all()));
            int prevSlice = 0;
            for (int i = 0; i < indicesValue.size(); ++i)
            {
-                sliceRanges[i][axis].start = prevSlice;
-                sliceRanges[i][axis].end = indicesValue.get<int>(i);
-                prevSlice = sliceRanges[i][axis].end;
+                sliceRanges[i][size - 1].start = prevSlice;
+                sliceRanges[i][size - 1].end = indicesValue.get<int>(i);
+                prevSlice = sliceRanges[i][size - 1].end;
            }
-            sliceRanges.back()[axis].start = prevSlice;
+            sliceRanges.back()[size - 1].start = prevSlice;
        }
        else if (params.has("begin"))
        {
@ -97,7 +124,6 @@ public:
            {
                int start = begins.get<int>(i);
                int sizeOrEnd = sizesOrEnds.get<int>(i);  // It may be negative to reverse indexation.
-                CV_Assert(start >= 0);

                sliceRanges[0][i].start = start;
                if (params.has("size"))
@ -154,16 +180,20 @@ public:
        CV_Assert(inputs.size() == 1);
        MatShape inpShape = inputs[0];

-        if (!sliceRanges.empty())
+        int axis_rw = axis;
+        std::vector<std::vector<cv::Range> > sliceRanges_rw = sliceRanges;
+        sliceRangesFromShape(inpShape, axis_rw, sliceRanges_rw);
+
+        if (!sliceRanges_rw.empty())
        {
-            outputs.resize(sliceRanges.size(), inpShape);
+            outputs.resize(sliceRanges_rw.size(), inpShape);
            for (int i = 0; i < outputs.size(); ++i)
            {
-                CV_Assert(sliceRanges[i].size() <= inpShape.size());
-                for (int j = 0; j < sliceRanges[i].size(); ++j)
+                CV_Assert(sliceRanges_rw[i].size() <= inpShape.size());
+                for (int j = 0; j < sliceRanges_rw[i].size(); ++j)
                {
                    if (shapesInitialized || inpShape[j] > 0)
-                        outputs[i][j] = normalize_axis_range(sliceRanges[i][j], inpShape[j]).size();
+                        outputs[i][j] = normalize_axis_range(sliceRanges_rw[i][j], inpShape[j]).size();

                    if (!sliceSteps.empty() && (i < sliceSteps.size()) && (j < sliceSteps[i].size()) && (sliceSteps[i][j] > 1))
                        outputs[i][j] = (outputs[i][j] + sliceSteps[i][j] - 1) / sliceSteps[i][j];
@ -172,10 +202,10 @@ public:
        }
        else  // Divide input blob on equal parts by axis.
        {
-            CV_Assert(0 <= axis && axis < inpShape.size());
+            CV_Assert(0 <= axis_rw && axis_rw < inpShape.size());
            int splits = num_split ? num_split : requiredOutputs;
-            CV_Assert(splits > 0 && inpShape[axis] % splits == 0);
-            inpShape[axis] /= splits;
+            CV_Assert(splits > 0 && inpShape[axis_rw] % splits == 0);
+            inpShape[axis_rw] /= splits;
            outputs.resize(splits, inpShape);
        }
        return false;
@ -200,6 +230,7 @@ public:
        CV_Assert(inputs.size() == 1);
        const MatSize& inpShape = inputs[0].size;

+        sliceRangesFromShape(shape(inputs[0]), axis, sliceRanges);
        finalSliceRanges = sliceRanges;

        if (sliceRanges.empty())
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@ -231,6 +231,27 @@ public:
    }
 };

+class NormalizeSubgraph2_2 : public NormalizeSubgraphBase
+{
+public:
+    NormalizeSubgraph2_2()
+    {
+        int input = addNodeToMatch("");
+        int norm = addNodeToMatch("ReduceL2", input);
+
+        int min = addNodeToMatch("");
+        int max = addNodeToMatch("");
+        int clip = addNodeToMatch("Clip", norm, min, max);
+
+        int shape = addNodeToMatch("");
+        int expand = addNodeToMatch("Expand", clip, shape);
+
+        addNodeToMatch("Div", input, expand);
+
+        setFusedNode("Normalize", input);
+    }
+};
+
 class NormalizeSubgraph3 : public NormalizeSubgraphBase
 {
 public:
@ -555,6 +576,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
    subgraphs.push_back(makePtr<SoftMaxSubgraph>());
    subgraphs.push_back(makePtr<NormalizeSubgraph1>());
    subgraphs.push_back(makePtr<NormalizeSubgraph2>());
+    subgraphs.push_back(makePtr<NormalizeSubgraph2_2>());
    subgraphs.push_back(makePtr<NormalizeSubgraph3>());
    subgraphs.push_back(makePtr<BatchNormalizationSubgraph1>());
    subgraphs.push_back(makePtr<BatchNormalizationSubgraph2>());
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -59,6 +59,8 @@ class ONNXImporter
    void addLayer(LayerParams& layerParams,
                  const opencv_onnx::NodeProto& node_proto);

+    void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
+                   const std::string& input, size_t n);
 public:

    ONNXImporter(Net& net, const char *onnxFile)
@ -427,6 +429,37 @@ void ONNXImporter::addLayer(LayerParams& layerParams,
    }
 }

+/** @brief Make N copies of input layer and set them as input to node_proto.
+ * @param prefix prefix of new layers' names
+ * @param node_proto node which will contain all copies as inputs
+ * @param input name of the node to copy
+ * @param n number of copies
+ */
+void ONNXImporter::expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
+                             const std::string& input, size_t n)
+{
+    std::vector<std::string> input_names;
+    input_names.reserve(n);
+    for (size_t j = 0; j < n; j++)
+    {
+        LayerParams copyLP;
+        copyLP.name = format("%s/copy_%d", prefix.c_str(), j);
+        copyLP.type = "Identity";
+        CV_Assert((layer_id.find(copyLP.name) == layer_id.end()) &&
+            "Couldn't copy the node: generated name already exists in the graph.");
+        input_names.push_back(copyLP.name);
+
+        node_proto.set_input(0, input);
+        node_proto.set_output(0, copyLP.name);
+        addLayer(copyLP, node_proto);
+    }
+    node_proto.clear_input();
+    for (size_t i = 0; i < input_names.size(); i++)
+    {
+        node_proto.add_input(input_names[i]);
+    }
+}
+
 void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
 {
    constBlobs.insert(std::make_pair(name, blob));
@ -1288,6 +1321,37 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
    addLayer(layerParams, node_proto);
 }

+void findBroadAxis(const MatShape& broadShape, const MatShape& outShape, size_t& axis, int& broadAxis)
+{
+    const size_t diff = outShape.size() - broadShape.size();
+
+    // find the first non-one element of the broadcasting shape
+    axis = 0;
+    for (; axis < broadShape.size() && broadShape[axis] == 1; ++axis) {}
+
+    // find the last non-one element of the broadcasting shape
+    size_t endAxis = broadShape.size();
+    for (; endAxis > axis && broadShape[endAxis - 1] == 1; --endAxis) {}
+
+    // find one between axis and endAxis - as it needs to be broadcasted,
+    // dimensions from the left of axis and from the right of endAxis will be handled by Scale layer
+    broadAxis = -1;
+    for (size_t i = axis; i < endAxis; ++i)
+    {
+        size_t outAxis = i + diff;
+        if (outShape[outAxis] == broadShape[i])
+        {
+            continue;
+        }
+
+        // ensure we need to broadcast only 1 dimension in the middle
+        CV_Assert(broadShape[i] == 1 && broadAxis == -1);
+        broadAxis = static_cast<int>(outAxis);
+    }
+
+    axis += diff;
+}
+
 // "Mul" "Div"
 void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
 {
@ -1410,13 +1474,31 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro
        }

        const MatShape& broadShape = outShapes[node_proto.input(1)];
-        const size_t outShapeSize = outShapes[node_proto.input(0)].size();
-        const size_t diff = outShapeSize - broadShape.size();
+        const MatShape& outShape = outShapes[node_proto.input(0)];

-        size_t axis;
-        for (axis = diff; axis < broadShape.size() && broadShape[axis - diff] == 1; ++axis) {}
+        size_t axis = 0;
+        int broadAxis = -1;
+        findBroadAxis(broadShape, outShape, axis, broadAxis);

-        CV_Assert(axis != outShapeSize);
+        // if there is a one dimension in the middle that should be broadcasted, broadcast it
+        if (broadAxis != -1)
+        {
+            opencv_onnx::NodeProto concat_node_proto = node_proto;
+            const std::string& input1 = concat_node_proto.input(1);
+
+            expandMid(layerParams.name, concat_node_proto, input1, outShape[broadAxis]);
+
+            LayerParams concatLP;
+            concatLP.name = layerParams.name + "/concat";
+            concatLP.set("axis", broadAxis);
+            concatLP.type = "Concat";
+            concat_node_proto.set_output(0, concatLP.name);
+
+            addLayer(concatLP, concat_node_proto);
+            node_proto.set_input(1, concatLP.name);
+        }
+
+        CV_Assert(axis != outShape.size());
        layerParams.set("axis", static_cast<int>(axis));
        layerParams.type = "Scale";
    }
@ -1685,12 +1767,11 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
    // Unsqueeze and repeat along new axis
    if (targetShape.size() == inpShape.size() + 1)
    {
+        inpShape.insert(inpShape.begin(), targetShape.size() - inpShape.size(), 1);
        for (int i = 0; i < targetShape.size(); i++)
        {
-            if (targetShape[i] == -1 && i < inpShape.size())
+            if (abs(targetShape[i]) == 1)
                targetShape[i] = inpShape[i];
-            else if (i < inpShape.size() && targetShape[i] != inpShape[i])
-                inpShape.insert(inpShape.begin() + i, 1);
        }
        if (haveVariables)
        {
@ -1710,14 +1791,19 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
    CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");

    std::vector<int> broadcast_axes;
+    // shapes aren't right-aligned here because targetShape.size() == inpShape.size()
    for (int i = 0; i < targetShape.size(); i++)
    {
        if (targetShape[i] != inpShape[i])
        {
            if (inpShape[i] == 1)
+            {
                broadcast_axes.push_back(i);
-            else
+            }
+            else if (targetShape[i] != 1)
+            {
                CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
+            }
        }
    }

@ -1756,31 +1842,16 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
    }
    else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
    {
-        String base_name = layerParams.name + "/copy_";
-        std::vector<std::string> input_names;
-        for (int j = 0; j < targetShape[broadcast_axes[0]]; j++)
-        {
-            std::ostringstream ss;
-            ss << j;
-            LayerParams copyLP;
-            copyLP.name = base_name + ss.str();
-            copyLP.type = "Identity";
-            CV_Assert(layer_id.find(copyLP.name) == layer_id.end());
-            input_names.push_back(copyLP.name);
+        expandMid(layerParams.name, node_proto, srcName, targetShape[broadcast_axes[0]]);

-            node_proto.set_input(0, srcName);
-            node_proto.set_output(0, copyLP.name);
-            addLayer(copyLP, node_proto);
-        }
-        node_proto.clear_input();
-        for (int i = 0; i < input_names.size(); i++)
-        {
-            node_proto.add_input(input_names[i]);
-        }
        layerParams.set("axis", broadcast_axes[0]);
        layerParams.type = "Concat";
        node_proto.set_output(0, layerParams.name);
    }
+    else if (broadcast_axes.empty())
+    {
+        layerParams.type = "Identity";
+    }
    else
        CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
    addLayer(layerParams, node_proto);
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -285,6 +285,7 @@ TEST_P(Test_ONNX_layers, Scale)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
    testONNXModels("scale");
    testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3);
+    testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2);
 }

 TEST_P(Test_ONNX_layers, ReduceMean3D)
@ -471,6 +472,8 @@ TEST_P(Test_ONNX_layers, MatMulAdd)

 TEST_P(Test_ONNX_layers, Expand)
 {
+    testONNXModels("expand");
+    testONNXModels("expand_identity");
    testONNXModels("expand_batch");
    testONNXModels("expand_channels");
    testONNXModels("expand_neg_batch");
@ -611,6 +614,7 @@ TEST_P(Test_ONNX_layers, ReduceL2)
    testONNXModels("reduceL2");
    testONNXModels("reduceL2_subgraph");
    testONNXModels("reduceL2_subgraph_2");
+    testONNXModels("reduceL2_subgraph2_2");
 }

 TEST_P(Test_ONNX_layers, Split)
@ -624,6 +628,7 @@ TEST_P(Test_ONNX_layers, Split)
    testONNXModels("split_3");
    testONNXModels("split_4");
    testONNXModels("split_sizes");
+    testONNXModels("split_neg_axis");
 }

 TEST_P(Test_ONNX_layers, Slice)
@ -632,6 +637,7 @@ TEST_P(Test_ONNX_layers, Slice)
    testONNXModels("slice", npy, 0, 0, false, false);
 #else
    testONNXModels("slice");
+    testONNXModels("slice_neg_starts");
    testONNXModels("slice_opset_11");
 #endif
 }