Merge pull request #20682 from rogday:min

* Add Min layer to CPU, OpenCL, Halide, Inference Engine, NGraph and CUDA * fix indentation * add min to fusion and halide tests; fix doc
2025-06-07 17:44:04 +08:00 · 2021-09-22 15:17:37 +03:00 · 2021-09-22 15:17:37 +03:00 · 38b9ec7a18
commit 38b9ec7a18
parent 2558ab3de7
10 changed files with 76 additions and 9 deletions
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -610,7 +610,7 @@ CV__DNN_INLINE_NS_BEGIN
    /** @brief Element wise operation on inputs

    Extra optional parameters:
-    - "operation" as string. Values are "sum" (default), "prod", "max", "div"
+    - "operation" as string. Values are "sum" (default), "prod", "max", "div", "min"
    - "coeff" as float array. Specify weights of inputs for SUM operation
    - "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels"
    */
--- a/modules/dnn/src/cuda/eltwise_ops.cu
+++ b/modules/dnn/src/cuda/eltwise_ops.cu
@ -74,6 +74,11 @@ void eltwise_max_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
    eltwise_op<T, MaxFunctor<T>>(stream, output, x, y);
 }

+template <class T>
+void eltwise_min_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
+    eltwise_op<T, MinFunctor<T>>(stream, output, x, y);
+}
+
 template <class T>
 void eltwise_sum_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
    eltwise_op<T, SumFunctor<T>>(stream, output, x, y);
@ -100,11 +105,13 @@ void eltwise_div_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
    template void eltwise_sum_coeff_2(const Stream&, Span<__half>, __half, View<__half>, __half, View<__half>);
    template void eltwise_sum_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
    template void eltwise_max_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
+    template void eltwise_min_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
 #endif
    template void eltwise_div_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
    template void eltwise_prod_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
    template void eltwise_sum_coeff_2(const Stream&, Span<float>, float, View<float>, float, View<float>);
    template void eltwise_sum_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
    template void eltwise_max_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
+    template void eltwise_min_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);

 }}}} /* namespace cv::dnn::cuda4dnn::kernels */
--- a/modules/dnn/src/cuda/functors.hpp
+++ b/modules/dnn/src/cuda/functors.hpp
@ -262,6 +262,21 @@ struct MaxFunctor {
    }
 };

+template <class T>
+struct MinFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() { }
+    };
+
+    CUDA4DNN_DEVICE MinFunctor() { }
+    CUDA4DNN_DEVICE MinFunctor(const Params& params) { }
+
+    CUDA4DNN_DEVICE T operator()(T x, T y) {
+        using csl::device::min;
+        return min(x, y);
+    }
+};
+
 template <class T>
 struct SumFunctor {
    struct Params {
--- a/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
+++ b/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
@ -15,6 +15,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
    template <class T>
    void eltwise_max_2(const csl::Stream& stream, csl::Span<T> output, csl::View<T> x, csl::View<T> y);

+    template <class T>
+    void eltwise_min_2(const csl::Stream& stream, csl::Span<T> output, csl::View<T> x, csl::View<T> y);
+
    template <class T>
    void eltwise_sum_2(const csl::Stream& stream, csl::Span<T> output, csl::View<T> x, csl::View<T> y);

--- a/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
@ -25,7 +25,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
        MAX,
        SUM,
        PRODUCT,
-        DIV
+        DIV,
+        MIN,
    };

    class EltwiseOpBase : public CUDABackendNode {
@ -78,6 +79,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
                switch (op)
                {
                case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, input_x, input_y); break;
+                case EltwiseOpType::MIN: kernels::eltwise_min_2<T>(stream, output, input_x, input_y); break;
                case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, input_x, input_y); break;
                case EltwiseOpType::DIV: kernels::eltwise_div_2<T>(stream, output, input_x, input_y); break;
                case EltwiseOpType::SUM:
@ -104,6 +106,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
                    switch (op)
                    {
                    case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, output, input); break;
+                    case EltwiseOpType::MIN: kernels::eltwise_min_2<T>(stream, output, output, input); break;
                    case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, output, input); break;
                    case EltwiseOpType::DIV: kernels::eltwise_div_2<T>(stream, output, output, input); break;
                    case EltwiseOpType::SUM:
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@ -71,7 +71,8 @@ public:
        PROD = 0,
        SUM = 1,
        MAX = 2,
-        DIV = 3
+        DIV = 3,
+        MIN = 4,
    } op;
    std::vector<float> coeffs;

@ -109,6 +110,8 @@ public:
                op = SUM;
            else if (operation == "max")
                op = MAX;
+            else if (operation == "min")
+                op = MIN;
            else if (operation == "div")
                op = DIV;
            else
@ -470,6 +473,13 @@ public:
                                    dstptr[j] = std::max(srcptr0[j], srcptrI[j]);
                                }
                            }
+                            else if (op == MIN)
+                            {
+                                for (int j = 0; j < blockSize; j++)
+                                {
+                                    dstptr[j] = std::min(srcptr0[j], srcptrI[j]);
+                                }
+                            }
                            else if (op == SUM)
                            {
                                if (!coeffsptr || (coeffsptr[0] == 1.0f && coeffsptr[1] == 1.0f))
@ -524,6 +534,13 @@ public:
                                dstptr[j] = std::max(dstptr[j], srcptrI[j]);
                            }
                        }
+                        else if (op == MIN)
+                        {
+                            for (int j = 0; j < blockSize; j++)
+                            {
+                                dstptr[j] = std::min(dstptr[j], srcptrI[j]);
+                            }
+                        }
                        else if (op == SUM)
                        {
                            if (!coeffsptr || coeffsptr[inputIdx] == 1.0f)
@ -641,6 +658,11 @@ public:
                for (int i = 2; i < inputs.size(); ++i)
                    max(inputs[i], outputs[0], outputs[0]);
                break;
+            case MIN:
+                min(inputs[0], inputs[1], outputs[0]);
+                for (int i = 2; i < inputs.size(); ++i)
+                    min(inputs[i], outputs[0], outputs[0]);
+                break;
            default:
                return false;
        }
@ -745,6 +767,7 @@ public:
        auto op_ = [this] {
            switch (op) {
            case MAX: return cuda4dnn::EltwiseOpType::MAX;
+            case MIN: return cuda4dnn::EltwiseOpType::MIN;
            case SUM: return cuda4dnn::EltwiseOpType::SUM;
            case PROD: return cuda4dnn::EltwiseOpType::PRODUCT;
            case DIV: return cuda4dnn::EltwiseOpType::DIV;
@ -799,6 +822,12 @@ public:
                for (int i = 2; i < inputBuffers.size(); ++i)
                    topExpr = max(topExpr, inputBuffers[i](x, y, c, n));
                break;
+            case MIN:
+                topExpr = min(inputBuffers[0](x, y, c, n),
+                              inputBuffers[1](x, y, c, n));
+                for (int i = 2; i < inputBuffers.size(); ++i)
+                    topExpr = min(topExpr, inputBuffers[i](x, y, c, n));
+                break;
            default:
                return Ptr<BackendNode>();
        }
@ -823,6 +852,8 @@ public:
            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV);
        else if (op == MAX)
            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
+        else if (op == MIN)
+            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MIN);
        else
            CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");

@ -857,6 +888,7 @@ public:
                case PROD: curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, next_node); break;
                case DIV:  curr_node = std::make_shared<ngraph::op::v1::Divide>(curr_node, next_node); break;
                case MAX:  curr_node = std::make_shared<ngraph::op::v1::Maximum>(curr_node, next_node); break;
+                case MIN:  curr_node = std::make_shared<ngraph::op::v1::Minimum>(curr_node, next_node); break;
                default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
            }
        }
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -105,7 +105,7 @@ private:
    void parseSplit                (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseBias                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parsePow                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
-    void parseMax                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+    void parseMinMax               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseNeg                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseConstant             (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseLSTM                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
@ -1105,10 +1105,12 @@ void ONNXImporter::parsePow(LayerParams& layerParams, const opencv_onnx::NodePro
    addLayer(layerParams, node_proto);
 }

-void ONNXImporter::parseMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+// "Min" "Max"
+void ONNXImporter::parseMinMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 {
+    const std::string& layer_type = node_proto.op_type();
    layerParams.type = "Eltwise";
-    layerParams.set("operation", "max");
+    layerParams.set("operation", layer_type == "Max" ? "max" : "min");
    addLayer(layerParams, node_proto);
 }

@ -2421,7 +2423,7 @@ const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap()
    dispatch["Split"] = &ONNXImporter::parseSplit;
    dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias;
    dispatch["Pow"] = &ONNXImporter::parsePow;
-    dispatch["Max"] = &ONNXImporter::parseMax;
+    dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseMinMax;
    dispatch["Neg"] = &ONNXImporter::parseNeg;
    dispatch["Constant"] = &ONNXImporter::parseConstant;
    dispatch["LSTM"] = &ONNXImporter::parseLSTM;
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@ -893,7 +893,7 @@ TEST_P(Eltwise, Accuracy)

 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Eltwise, Combine(
 /*input size*/ Values(Vec3i(1, 4, 5), Vec3i(2, 8, 6)),
-/*operation*/  Values("prod", "sum", "div", "max"),
+/*operation*/  Values("prod", "sum", "div", "max", "min"),
 /*num convs*/  Values(1, 2, 3),
 /*weighted(for sum only)*/ Bool(),
               dnnBackendsAndTargetsWithHalide()
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -2340,7 +2340,7 @@ public:
    static testing::internal::ParamGenerator<std::string> eltwiseOpList()
    {
        // TODO: automate list generation
-        return Values("sum", "max", "prod", "div");
+        return Values("sum", "max", "min", "prod", "div");
    }

    static testing::internal::ParamGenerator<std::string> activationLayersList()
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -301,6 +301,11 @@ TEST_P(Test_ONNX_layers, ReduceMax)
    testONNXModels("reduce_max_axis_1");
 }

+TEST_P(Test_ONNX_layers, Min)
+{
+    testONNXModels("min", npy, 0, 0, false, true, 2);
+}
+
 TEST_P(Test_ONNX_layers, Scale)
 {
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)