Merge pull request #25241 from alexlyulkov:al/int64-padding

Added int support to padding layer #25241 Added int32 and int64 support to padding layer (CPU and CUDA). ONNX parser doesn't convert non-zero padding value to float now. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-08-06 06:26:29 +08:00 · 2024-04-09 11:20:56 +03:00 · 2024-04-09 11:20:56 +03:00 · f454303f6a
commit f454303f6a
parent e8a52c7e94
6 changed files with 110 additions and 9 deletions
--- a/modules/dnn/src/cuda/padding.cu
+++ b/modules/dnn/src/cuda/padding.cu
@ -197,5 +197,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
    template void copy_with_reflection101(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
 #endif
    template void copy_with_reflection101(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
+    template void copy_with_reflection101(const Stream&, TensorSpan<int32_t>, TensorView<int32_t>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
+    template void copy_with_reflection101(const Stream&, TensorSpan<int64_t>, TensorView<int64_t>, std::vector<std::pair<std::size_t, std::size_t>> ranges);

 }}}} /* namespace namespace cv::dnn::cuda4dnn::kernels */
--- a/modules/dnn/src/cuda4dnn/primitives/padding.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/padding.hpp
@ -34,7 +34,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
        using wrapper_type = GetCUDABackendWrapperType<T>;

        /* `ranges` is indexed by axis and contains the range in the output where the input is copied to */
-        PaddingOp(csl::Stream stream_, PaddingType type_, T value_, std::vector<cv::Range> ranges)
+        PaddingOp(csl::Stream stream_, PaddingType type_, T value_, const std::vector<cv::Range>& ranges)
            : stream(std::move(stream_)),  type{ type_ }, value{ value_ }, dstRanges(std::move(ranges))
        {
        }
--- a/modules/dnn/src/layers/padding_layer.cpp
+++ b/modules/dnn/src/layers/padding_layer.cpp
@ -34,7 +34,7 @@ public:
    PaddingLayerImpl(const LayerParams &params)
    {
        setParamsFrom(params);
-        paddingValue = params.get<float>("value", 0);
+        paddingValue = params.get<double>("value", 0);
        inputDims = params.get<int>("input_dims", -1);
        paddingType = params.get<String>("type", "constant");

@ -70,6 +70,23 @@ public:
        return false;
    }

+    void getTypes(const std::vector<MatType>& inputs,
+        const int requiredOutputs,
+        const int requiredInternals,
+        std::vector<MatType>& outputs,
+        std::vector<MatType>& internals) const CV_OVERRIDE
+    {
+        CV_CheckEQ(inputs.size(), 1u, "");
+        if (preferableTarget == DNN_TARGET_CUDA_FP16 || preferableTarget == DNN_TARGET_CUDA)
+            CV_CheckType(inputs[0], inputs[0] == CV_32F || inputs[0] == CV_32S || inputs[0] == CV_64S, "");
+        else if (preferableTarget == DNN_TARGET_OPENCL_FP16)
+            CV_CheckType(inputs[0], inputs[0] == CV_16F || inputs[0] == CV_8S || inputs[0] == CV_32S || inputs[0] == CV_64S, "");
+        else
+            CV_CheckType(inputs[0], inputs[0] == CV_32F || inputs[0] == CV_8S || inputs[0] == CV_32S || inputs[0] == CV_64S, "");
+
+        outputs.assign(requiredOutputs, inputs[0]);
+    }
+
    void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
    {
        std::vector<Mat> inputs;
@ -184,7 +201,7 @@ public:
        else
            CV_Error(Error::StsNotImplemented, "Unsupported padding mode");

-        return make_cuda_node<cuda4dnn::PaddingOp>(preferableTarget, std::move(context->stream), ptype, paddingValue, dstRanges);
+        return make_cuda_node_with_type<cuda4dnn::PaddingOp>(preferableTarget, inputs[0]->getHostMatDepth(), std::move(context->stream), ptype, paddingValue, dstRanges);
    }
 #endif

@ -248,7 +265,8 @@ public:
        auto padding_below = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{begins.size()}, begins.data());
        auto padding_above = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{ends.size()}, ends.data());
        auto pad_mode = paddingType == "constant" ? ov::op::PadMode::CONSTANT : ov::op::PadMode::REFLECT; // SYMMETRIC
-        auto arg_pad_value = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, &paddingValue);;
+        float paddingValueFloat = paddingValue;
+        auto arg_pad_value = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, &paddingValueFloat);

        auto pad = paddingType == "constant" ?
             std::make_shared<ov::op::v1::Pad>(ieInpNode, padding_below, padding_above, arg_pad_value, pad_mode) :
@ -261,7 +279,7 @@ private:
    std::vector<std::pair<int, int> > paddings;  // Pairs pad before, pad after.
    std::vector<Range> dstRanges;
    int inputDims;
-    float paddingValue;
+    double paddingValue;
    std::string paddingType;
 };

--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -2410,8 +2410,16 @@ void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodePro
        if (node_proto.input_size() == 3 && !node_proto.input(2).empty())
        {
            Mat value = getBlob(node_proto, 2);
-            float padValue = (depth == CV_8S) ? (float)value.ptr<int8_t>()[0] : value.ptr<float>()[0];
-            layerParams.set("value", padValue);
+            double padValue = 0;
+            switch(value.depth())
+            {
+                case CV_32F: padValue = value.ptr<float>()[0];   break;
+                case CV_32S: padValue = value.ptr<int32_t>()[0]; break;
+                case CV_64S: padValue = value.ptr<int64_t>()[0]; break;
+                case CV_8S:  padValue = value.ptr<int8_t>()[0];  break;
+                default: CV_Error(Error::BadDepth, "Unsupported type");
+            }
+            layerParams.set<double>("value", (double)padValue);
        }
    }
    addLayer(layerParams, node_proto);
@ -3403,7 +3411,7 @@ void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeP
            padLp.type = "PaddingInt8";
            padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
            padLp.set("depth", CV_8S);
-            padLp.set("value", inp_zp);
+            padLp.set<double>("value", (double)inp_zp);

            opencv_onnx::NodeProto proto;
            proto.add_input(node_proto.input(0));
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -617,7 +617,7 @@ void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef
    padLp.name = layer.name() + "/pad";
    padLp.type = "Padding";
    padLp.set("paddings", DictValue::arrayInt(pads, sizeof(pads) / sizeof(pads[0])));
-    padLp.set("value", value);
+    padLp.set<double>("value", (double)value);

    int id = dstNet.addLayer(padLp.name, padLp.type, padLp);
    layer_id[padLp.name] = id;
--- a/modules/dnn/test/test_int.cpp
+++ b/modules/dnn/test/test_int.cpp
@ -775,6 +775,79 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_Cast_Int, Combine(
    dnnBackendsAndTargets()
 ));

+typedef testing::TestWithParam<tuple<int, tuple<Backend, Target> > > Test_Pad_Int;
+TEST_P(Test_Pad_Int, random)
+{
+    int matType = get<0>(GetParam());
+    tuple<Backend, Target> backend_target= get<1>(GetParam());
+    Backend backend = get<0>(backend_target);
+    Target target = get<1>(backend_target);
+
+    std::vector<int> inShape{2, 3, 4, 5};
+    int64_t low = 1000000;
+    Mat input(inShape, matType);
+    cv::randu(input, low, low + 100);
+    std::vector<int> paddings{0, 0, 0, 0, 1, 0, 0, 1};
+
+    Net net;
+    LayerParams lp;
+    lp.type = "Padding";
+    lp.name = "testLayer";
+    lp.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
+    lp.set<double>("value", 25);
+
+    net.addLayerToPrev(lp.name, lp.type, lp);
+
+    net.setInput(input);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat re;
+    re = net.forward();
+    EXPECT_EQ(re.depth(), matType);
+    EXPECT_EQ(re.size.dims(), 4);
+    EXPECT_EQ(re.size[0], 2);
+    EXPECT_EQ(re.size[1], 3);
+    EXPECT_EQ(re.size[2], 5);
+    EXPECT_EQ(re.size[3], 6);
+
+    std::vector<int> reIndices(4);
+    std::vector<int> inIndices(4);
+    for (int i0 = 0; i0 < re.size[0]; ++i0)
+    {
+        reIndices[0] = i0;
+        inIndices[0] = i0;
+        for (int i1 = 0; i1 < re.size[1]; ++i1)
+        {
+            reIndices[1] = i1;
+            inIndices[1] = i1;
+            for (int i2 = 0; i2 < re.size[2]; ++i2)
+            {
+                reIndices[2] = i2;
+                inIndices[2] = i2 - 1;
+                for (int i3 = 0; i3 < re.size[3]; ++i3)
+                {
+                    reIndices[3] = i3;
+                    inIndices[3] = i3;
+                    if (i2 < 1 || i3 >= input.size[3])
+                    {
+                        EXPECT_EQ(getValueAt(re, reIndices.data()), 25l);
+                    }
+                    else
+                    {
+                        EXPECT_EQ(getValueAt(re, reIndices.data()), getValueAt(input, inIndices.data()));
+                    }
+                }
+            }
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Test_Pad_Int, Combine(
+    testing::Values(CV_32S, CV_64S),
+    dnnBackendsAndTargets()
+));
+
 typedef testing::TestWithParam<tuple<int, tuple<Backend, Target> > > Test_Slice_Int;
 TEST_P(Test_Slice_Int, random)
 {