Merge pull request #23560 from WanliZhong:eltwise_cuda_bug

DNN/CUDA: Solve the bug of same shape broadcast with CUDA
2025-06-13 04:52:53 +08:00 · 2023-05-16 14:16:37 +03:00 · 2023-05-16 14:16:37 +03:00 · 59ca444b26
commit 59ca444b26
parent 04d71da6e7 46991bcd62
3 changed files with 8 additions and 4 deletions
--- a/modules/dnn/src/cuda/eltwise_ops.cu
+++ b/modules/dnn/src/cuda/eltwise_ops.cu
@ -150,7 +150,7 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
         */
        for (int r = 0; r < output.rank(); r++)
        {
-            while (x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
+            while (x.rank() > r && y.rank() > r && x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
                CV_Assert(output.get_axis_size(r) == 1);
                x.squeeze(r);
@ -183,6 +183,9 @@ void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, Ten
                    auto new_size = inShape1[i] * inShape1[j];
                    inShape1[i] = new_size;
                    inShape2[i] = new_size;
                    // outShape should be changed after merged
                    auto output_new_size = outShape[i] * outShape[j];
                    outShape[i] = output_new_size;
                    /* delete axis `j` */
                    inShape1.erase(std::begin(inShape1) + j);
--- a/modules/dnn/src/layers/nary_eltwise_layers.cpp
+++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp
@ -51,8 +51,6 @@ public:
        WHERE,
    } op;
    // If the eltwise implementation is modified, you need to force enable the 'Layer_Test_Eltwise_bcast'
    // test in the 'test_layers.cpp' file to make sure it all passes
    NaryEltwiseLayerImpl(const LayerParams& params)
    {
        setParamsFrom(params);
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -1949,6 +1949,9 @@ public:
        int backend = get<0>(backend_target);
        int target = get<1>(backend_target);
        if (backend == DNN_BACKEND_CUDA && dim > 4)
            applyTestTag(CV_TEST_TAG_LONG);
        vector<vector<int>> dim_shape_list;
        get_all_arr(dim_shape_list, dim);
        replace(dim_shape_list, 1, 3);
@ -2028,7 +2031,7 @@ private:
    }
 };
-TEST_P(Layer_Test_Eltwise_bcast, DISABLED_brute_force)
+TEST_P(Layer_Test_Eltwise_bcast, brute_force)
 {
    test_bcast();
 }