Merge pull request #22362 from fengyuentau:conv_asym_pad_fuse

Remove asymmetric padding in Conv layer since it is supported in CPU backend
2025-08-05 22:19:14 +08:00 · 2022-08-29 17:56:17 +03:00 · 2022-08-29 17:56:17 +03:00 · 1fd45a1b85
commit 1fd45a1b85
parent 2619099fe5 2959286eb5
6 changed files with 46 additions and 86 deletions
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -101,10 +101,6 @@ public:
        if (kernel_size.size() == 2) {
            kernel = Size(kernel_size[1], kernel_size[0]);
            stride = Size(strides[1], strides[0]);
-            for (int i = 0; i < pads_begin.size(); i++) {
-                if (pads_begin[i] != pads_end[i])
-                    CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
-            }
            pad = Size(pads_begin[1], pads_begin[0]);
            dilation = Size(dilations[1], dilations[0]);

@ -166,10 +162,6 @@ public:
        }
        getConvPoolPaddings(inpShape, kernel_size, strides, padMode, pads_begin, pads_end);
        if (pads_begin.size() == 2) {
-            for (int i = 0; i < pads_begin.size(); i++) {
-                if (pads_begin[i] != pads_end[i])
-                    CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
-            }
            pad = Size(pads_begin[1], pads_begin[0]);
        }
        fusedWeights = false;
@ -1811,7 +1803,10 @@ public:
            config.in_shape = shape(inputs[0]);
            config.out_shape = shape(outputs[0]);
            config.kernel = kernel;
-            config.pad = pad;
+            // pads_begin: 0 - pad_top, 1 - pad_left
+            // pads_end: 0 - pad_bottom, 1 - pad_right
+            std::vector<int> pads = {int(pads_begin[0]), int(pads_end[0]), int(pads_begin[1]), int(pads_end[1])};
+            config.pads = pads;
            config.stride = stride;
            config.dilation = dilation;
            if (inputs[0].dims != 4 && inputs[0].dims != umat_blobs[0].dims)
@ -2025,7 +2020,7 @@ public:
        }

 #ifdef HAVE_TENGINE
-        bool tengine_ret = false; ;
+        bool tengine_ret = false;

        std::vector<Mat> teng_in, teng_out;
        inputs_arr.getMatVector(teng_in);
@ -2050,20 +2045,24 @@ public:
        /* tengine_init will run when first time. */
        if(NULL == tengine_graph)
        {
+            // pads_begin: 0 - pad_top,    1 - pad_left
+            // pads_end:   0 - pad_bottom, 1 - pad_right
+            // pad_h0: pad_top,  pad_h1: pad_bottom
+            // pad_w0: pad_left, pad_w1: pad_right
            tengine_graph = tengine_init(name.c_str(), input_, inch, ngroups, in_h, in_w,
                                         output_, out_b, outch, out_h, out_w,
                                         kernel_, kernel_size.size(), kernel.height, kernel.width,
                                         teg_bias, stride.height, stride.width,
-                                         pad.height,  pad.width, dilation.height, dilation.width,
+                                         pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width,
                                         weightsMat.step1(), padMode, tengine_graph, nstripes);
-            /*printf("Init(%s):  input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ,"
-                   "stride(%d %d), pad(%d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n",
-                   name.c_str(),input_, inch, ngroups, in_h, in_w,
-                   output_, out_b, outch, out_h, out_w,
-                   kernel_, kernel_size.size(), kernel.height, kernel.width,
-                   teg_bias, stride.height, stride.width,
-                   pad.height,  pad.width, dilation.height, dilation.width,
-                   weightsMat.step1(), padMode.c_str() ,tengine_graph);*/
+            // printf("Init(%s):  input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ,"
+            //        "stride(%d %d), pad(%d %d %d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n",
+            //        name.c_str(),input_, inch, ngroups, in_h, in_w,
+            //        output_, out_b, outch, out_h, out_w,
+            //        kernel_, kernel_size.size(), kernel.height, kernel.width,
+            //        teg_bias, stride.height, stride.width,
+            //        pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width,
+            //        weightsMat.step1(), padMode.c_str() ,tengine_graph);
        }
        if(NULL != tengine_graph)
        {
--- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
@ -55,17 +55,18 @@ struct OCL4DNNConvConfig
 {
    OCL4DNNConvConfig() :
        kernel(1, 1),
-        pad(0, 0),
        stride(1, 1),
        dilation(1, 1),
        group(1),
        bias_term(false),
        use_half(false)
-    {}
+    {
+        pads = {0, 0, 0, 0};
+    }
    MatShape in_shape;
    MatShape out_shape;
    Size kernel;
-    Size pad;
+    std::vector<int> pads; // [pad_top, pad_bottom, pad_left, pad_right]
    Size stride;
    Size dilation;
    int group; // = 1;
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@ -181,8 +181,11 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
    // assumption: spatial dimension is 2.
    kernel_h_ = config.kernel.height;
    kernel_w_ = config.kernel.width;
-    pad_h_ = config.pad.height;
-    pad_w_ = config.pad.width;
+    // pads: [pad_top, pad_bottom, pad_left, pad_right]
+    pad_h_ = config.pads[0]; // pad_top
+    pad_bottom_ = config.pads[1];
+    pad_w_ = config.pads[2]; // pad_left
+    pad_right_ = config.pads[3];
    stride_h_ = config.stride.height;
    stride_w_ = config.stride.width;
    dilation_h_ = config.dilation.height;
@ -194,12 +197,6 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
    output_w_ = config.out_shape[dims - spatial_dims + 1];
    bottom_dim_ = channels_ * width_ * height_;
    top_dim_ = num_output_ * output_w_ * output_h_;
-    int Ph = (output_h_ - 1) * stride_h_ + (dilation_h_ * (kernel_h_ - 1) + 1) - height_;
-    int Pw = (output_w_ - 1) * stride_w_ + (dilation_w_ * (kernel_w_ - 1) + 1) - width_;
-    Ph = (Ph > 0) ? Ph : 0;
-    Pw = (Pw > 0) ? Pw : 0;
-    pad_right_  = (Pw + 1) / 2;
-    pad_bottom_ = (Ph + 1) / 2;

    cache_path_ = utils::getConfigurationParameterString("OPENCV_OCL4DNN_CONFIG_PATH", "");
    dwconv_ = (num_output_ == channels_ && channels_ == group_);
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -2071,44 +2071,6 @@ void ONNXImporter::parseConv(LayerParams& layerParams, const opencv_onnx::NodePr
    int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
    layerParams.set("num_output", outCn);

-    // Check for asymmetric padding in Conv2D
-    if (layerParams.has("pad"))
-    {
-        bool asymmetricPadding = false;
-        DictValue pads = layerParams.get("pad");
-        const int dims = pads.size() / 2;
-        for (int i = 0; i < dims; ++i)
-        {
-            if (pads.get<int>(i) != pads.get<int>(i + dims))
-            {
-                asymmetricPadding = true;
-                break;
-            }
-        }
-        if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r]
-        {
-            layerParams.erase("pad");
-            // No paddings required for N, C axis
-            std::vector<int> paddings(4, 0);
-            // Add paddings for H, W axis
-            for (int i = 0; i < dims; ++i)
-            {
-                paddings.push_back(pads.get<int>(i));
-                paddings.push_back(pads.get<int>(dims + i));
-            }
-            LayerParams padLp;
-            padLp.name = layerParams.name + "/pad";
-            padLp.type = "Padding";
-            padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
-
-            opencv_onnx::NodeProto proto;
-            proto.add_input(node_proto.input(0));
-            proto.add_output(padLp.name);
-
-            addLayer(padLp, proto);
-            node_proto.set_input(0, padLp.name);
-        }
-    }
    addLayer(layerParams, node_proto);
 }

--- a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
+++ b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
@ -34,11 +34,15 @@ namespace cv
 {
 namespace dnn
 {
+// pad_h0: pad_top
+// pad_h1: pad_bottom
+// pad_w0: pad_left
+// pad_w1: pad_right
 teng_graph_t  tengine_init(const char* name , float* input_, int inch, int group, int in_h, int in_w,
                        float *output_, int out_b, int outch, int out_h, int out_w,
                        float *kernel_,int kernel_s , int kernel_h, int kernel_w,
-                        float *teg_bias, int stride_h,int stride_w,
-                        int pad_h, int pad_w,  int dilation_h, int dilation_w,
+                        float *teg_bias, int stride_h, int stride_w,
+                        int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w,
                        size_t wstep, const std::string padMode , teng_graph_t& graph, int nstripes) ;

 bool tengine_forward(teng_graph_t& graph) ;
--- a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp
+++ b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp
@ -56,7 +56,7 @@ static int create_input_node(teng_graph_t graph, const char* node_name, int inch
 }

 static int create_conv_node(teng_graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w,
-    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int inch, int outch, int group,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h0, int pad_h1, int pad_w0, int pad_w1, int inch, int outch, int group,
    int dilation_h, int dilation_w, int activation, std::string padMode)
 {
    node_t conv_node      = teng_create_graph_node(graph, node_name, "Convolution");
@ -107,15 +107,12 @@ static int create_conv_node(teng_graph_t graph, const char* node_name, const cha
    teng_release_graph_node(b_node);
    teng_release_graph_tensor(b_tensor);

-    int pad_h1 = pad_h;
-    int pad_w1 = pad_w;
-
    if (!padMode.empty())
    {
        if (padMode == "SAME")
        {
-            int out_h_temp = (in_h-kernel_h + 2*pad_h)/stride_h + 1;
-            int out_w_temp = (in_w-kernel_w + 2*pad_w)/stride_w + 1;
+            int out_h_temp = (in_h-kernel_h + 2*pad_h0)/stride_h + 1;
+            int out_w_temp = (in_w-kernel_w + 2*pad_w0)/stride_w + 1;

            if (out_h_temp < out_h)
                pad_h1 += 1;
@ -129,8 +126,8 @@ static int create_conv_node(teng_graph_t graph, const char* node_name, const cha
    teng_set_node_attr_int(conv_node, "kernel_w", &kernel_w);
    teng_set_node_attr_int(conv_node, "stride_h", &stride_h);
    teng_set_node_attr_int(conv_node, "stride_w", &stride_w);
-    teng_set_node_attr_int(conv_node, "pad_h0", &pad_h);
-    teng_set_node_attr_int(conv_node, "pad_w0", &pad_w);
+    teng_set_node_attr_int(conv_node, "pad_h0", &pad_h0);
+    teng_set_node_attr_int(conv_node, "pad_w0", &pad_w0);
    teng_set_node_attr_int(conv_node, "pad_h1", &pad_h1);
    teng_set_node_attr_int(conv_node, "pad_w1", &pad_w1);
    teng_set_node_attr_int(conv_node, "output_channel", &outch);
@ -149,7 +146,7 @@ static teng_graph_t create_conv_graph(const char* layer_name, float* input_data,
                        float* output_data, int outch, int out_h, int out_w,
                        int kernel_h, int kernel_w,
                        int stride_h,int stride_w,
-                        int pad_h, int pad_w,  int dilation_h, int dilation_w, int activation,
+                        int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, int activation,
                        float* teg_weight, float* teg_bias, std::string padMode, int nstripes)
 {
    node_t    conv_node     = NULL;
@ -188,7 +185,7 @@ static teng_graph_t create_conv_graph(const char* layer_name, float* input_data,
    }

    if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w,
-        stride_h, stride_w, pad_h, pad_w, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0)
+        stride_h, stride_w, pad_h0, pad_h1, pad_w0, pad_w1, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0)
    {
        CV_LOG_WARNING(NULL,"Tengine: create conv node failed." );
        ok = false;
@ -289,8 +286,8 @@ static bool tengine_init_flag = false;
 teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int group, int in_h, int in_w,
                        float *output_, int out_b, int outch, int out_h, int out_w,
                        float *kernel_, int kernel_s ,int kernel_h, int kernel_w,
-                        float *teg_bias, int stride_h,int stride_w,
-                        int pad_h, int pad_w,  int dilation_h, int dilation_w,
+                        float *teg_bias, int stride_h, int stride_w,
+                        int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w,
                        size_t wstep, const std::string padMode, teng_graph_t &graph, int nstripes)
 {
    std::vector<float> teg_weight_vec;
@ -299,9 +296,9 @@ teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int g
    // Do not using the activation fuse mode, just convolution only.
    int activation = -1;

-    if (!(kernel_s == 2 && kernel_h == kernel_w && pad_h == pad_w
+    if (!(kernel_s == 2 && kernel_h == kernel_w
        && dilation_h == dilation_w && stride_h == stride_w
-        && out_b == 1 && pad_h < 10)) // just for Conv2D
+        && out_b == 1 && pad_h0 < 10 && pad_h1 < 10 && pad_w0 < 10 && pad_w1 < 10)) // just for Conv2D
    {
       // printf("return : just for Conv2D\n");
        return NULL;
@ -314,7 +311,7 @@ teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int g
               kernel_w, kernel_h,
               stride_w, stride_h,
               dilation_w, dilation_h,
-               pad_w, pad_h);
+               pad_h0, pad_h1, pad_w0, pad_w1);
     */
        // weight
        if (kernel_inwh != wstep)
@ -342,7 +339,7 @@ teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int g
        graph = create_conv_graph(layer_name, input_, inch, group, in_h, in_w,
                                    output_, outch, out_h, out_w,
                                    kernel_h, kernel_w, stride_h,stride_w,
-                                    pad_h, pad_w, dilation_h, dilation_w, activation,
+                                    pad_h0, pad_h1, pad_w0, pad_w1, dilation_h, dilation_w, activation,
                                    teg_weight, teg_bias, padMode, nstripes);
        if(NULL == graph )
        {