Solve the bug of same shape broadcast with CUDA

2025-01-18 22:44:02 +08:00 · 2023-04-28 17:25:49 +08:00 · 2023-04-28 17:25:49 +08:00 · 46991bcd62
commit 46991bcd62
parent 85b04f0b4d
3 changed files with 8 additions and 4 deletions
--- a/modules/dnn/src/cuda/eltwise_ops.cu
+++ b/modules/dnn/src/cuda/eltwise_ops.cu
@ -150,7 +150,7 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
         */
        for (int r = 0; r < output.rank(); r++)
        {
-            while (x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
+            while (x.rank() > r && y.rank() > r && x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
                CV_Assert(output.get_axis_size(r) == 1);

                x.squeeze(r);
@ -183,6 +183,9 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
                    auto new_size = inShape1[i] * inShape1[j];
                    inShape1[i] = new_size;
                    inShape2[i] = new_size;
+                    // outShape should be changed after merged
+                    auto output_new_size = outShape[i] * outShape[j];
+                    outShape[i] = output_new_size;

                    /* delete axis `j` */
                    inShape1.erase(std::begin(inShape1) + j);
--- a/modules/dnn/src/layers/nary_eltwise_layers.cpp
+++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp
@ -51,8 +51,6 @@ public:
        WHERE,
    } op;

-    // If the eltwise implementation is modified, you need to force enable the 'Layer_Test_Eltwise_bcast'
-    // test in the 'test_layers.cpp' file to make sure it all passes
    NaryEltwiseLayerImpl(const LayerParams& params)
    {
        setParamsFrom(params);
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -1949,6 +1949,9 @@ public:
        int backend = get<0>(backend_target);
        int target = get<1>(backend_target);

+        if (backend == DNN_BACKEND_CUDA && dim > 4)
+            applyTestTag(CV_TEST_TAG_LONG);
+
        vector<vector<int>> dim_shape_list;
        get_all_arr(dim_shape_list, dim);
        replace(dim_shape_list, 1, 3);
@ -2028,7 +2031,7 @@ private:
    }
 };

-TEST_P(Layer_Test_Eltwise_bcast, DISABLED_brute_force)
+TEST_P(Layer_Test_Eltwise_bcast, brute_force)
 {
    test_bcast();
 }