Merge pull request #21608 from zihaomu:depth2space

DNN: add depth2space and space2depth layer for onnx importer
2025-07-21 11:36:46 +08:00 · 2022-05-12 08:46:36 +00:00 · 2022-05-12 08:46:36 +00:00 · dda96264df
commit dda96264df
parent eff5605be5 64ded50bbf
2 changed files with 81 additions and 6 deletions
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -175,6 +175,7 @@ private:
    void parseSoftMax              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseDetectionOutput      (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseCumSum               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+    void parseDepthToSpace         (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseSimpleLayers         (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);

    // Domain: com.microsoft
@ -3183,6 +3184,85 @@ void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::Node
    addLayer(layerParams, node_proto);
 }

+void ONNXImporter::parseDepthToSpace(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
+{
+    // We parse "DepthToSpace" and "SpaceToDepth" in this function.
+    opencv_onnx::NodeProto node_proto = node_proto_;
+    const std::string& layer_type = node_proto.op_type();
+    CV_Assert(layer_type == "DepthToSpace" || layer_type == "SpaceToDepth");
+
+    // Get blocksize
+    CV_Assert(layerParams.has("blocksize"));
+    int blocksize = layerParams.get<int>("blocksize");
+    CV_Assert(blocksize > 0);
+
+    // Get mode, only for "DepthToSpace"
+    std::string modeType = layerParams.get<std::string>("mode", "DCR");
+
+    MatShape inpShape = outShapes[node_proto.input(0)];
+    CV_Assert(inpShape.size() == 4);
+    int N = inpShape[0], C = inpShape[1], H = inpShape[2], W = inpShape[3];
+
+    // Implement DepthToSpace and SpaceToDepth by the Reshape and Permute layer.
+    std::array<int, 6> shape0, perm;
+    std::array<int, 4> shape1;
+
+    if (layer_type == "DepthToSpace")
+    {
+        if (modeType == "DCR")
+        {
+            shape0 = {N, blocksize, blocksize, C/(blocksize * blocksize), H, W};
+            perm = {0, 3, 4, 1, 5, 2};
+            shape1 = {N, C/(blocksize * blocksize), H * blocksize, W * blocksize};
+        }
+        else if (modeType == "CRD")
+        {
+            shape0 = {N, C/(blocksize * blocksize), blocksize, blocksize, H, W};
+            perm = {0, 1, 4, 2, 5, 3};
+            shape1 = {N, C/(blocksize * blocksize), H * blocksize, W * blocksize};
+        }
+        else
+            CV_Error(Error::StsNotImplemented, "The mode of " + modeType + " in " + layer_type + " Layer is not supported");
+    }
+    else // SpaceToDepth
+    {
+        shape0 = {N, C, H/blocksize, blocksize, W/blocksize, blocksize};
+        perm = {0, 3, 5, 1, 2, 4};
+        shape1 = {N, C * blocksize * blocksize, H/blocksize, W/blocksize};
+    }
+
+    // Step1: Reshape
+    LayerParams reshapeLp;
+    reshapeLp.name = layerParams.name + "/reshape";
+    reshapeLp.type = "Reshape";
+    CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+    reshapeLp.set("dim", DictValue::arrayInt(shape0.data(), shape0.size()));
+
+    opencv_onnx::NodeProto protoReshape;
+    protoReshape.add_input(node_proto.input(0));
+    protoReshape.add_output(reshapeLp.name);
+    addLayer(reshapeLp, protoReshape);
+
+    // Step2: Transpose
+    LayerParams permuteLp;
+    permuteLp.name = layerParams.name + "/permute";
+    permuteLp.type = "Permute";
+    CV_Assert(layer_id.find(permuteLp.name) == layer_id.end());
+    permuteLp.set("order", DictValue::arrayInt(perm.data(), perm.size()));
+
+    opencv_onnx::NodeProto protoPermute;
+    protoPermute.add_input(reshapeLp.name);
+    protoPermute.add_output(permuteLp.name);
+    addLayer(permuteLp, protoPermute);
+
+    // Step3: Reshape
+    layerParams.type = "Reshape";
+    layerParams.set("dim", DictValue::arrayInt(shape1.data(), shape1.size()));
+
+    node_proto.set_input(0, permuteLp.name);
+    addLayer(layerParams, node_proto);
+}
+
 void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 {
    for (int j = 0; j < node_proto.input_size(); j++) {
@ -3672,6 +3752,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
    dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax;
    dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput;
    dispatch["CumSum"] = &ONNXImporter::parseCumSum;
+    dispatch["SpaceToDepth"] = dispatch["DepthToSpace"] = &ONNXImporter::parseDepthToSpace;

    std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
                                          "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
--- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
@ -96,10 +96,6 @@
 "test_cumsum_2d_axis_0",
 "test_cumsum_2d_axis_1",
 "test_cumsum_2d_negative_axis",
-"test_depthtospace_crd_mode",
-"test_depthtospace_crd_mode_example",
-"test_depthtospace_dcr_mode",
-"test_depthtospace_example",
 "test_dequantizelinear",
 "test_dequantizelinear_axis",
 "test_det_2d",
@ -490,8 +486,6 @@
 "test_slice_neg_steps",
 "test_slice_negative_axes",
 "test_slice_start_out_of_bounds",
-"test_spacetodepth",
-"test_spacetodepth_example",
 "test_split_variable_parts_1d",
 "test_split_variable_parts_2d",
 "test_split_variable_parts_default_axis",