enable fusion tests, update thresholds and fix missed eltwise fusions

2025-06-07 17:44:04 +08:00 · 2020-11-21 17:35:20 +05:30 · 2020-11-21 17:35:20 +05:30 · 0f8ab0557e
commit 0f8ab0557e
parent 0105f8fa38
9 changed files with 73 additions and 35 deletions
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -2681,7 +2681,6 @@ struct Net::Impl : public detail::NetImplBase
 #ifdef HAVE_CUDA
                    // CUDA backend supports fusion with eltwise sum (without variable channels)
                    // `nextEltwiseLayer` is reset if eltwise layer doesn't have a compatible configuration for fusion
                    if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
                    {
                        // we create a temporary backend node for eltwise layer to obtain the eltwise configuration
@ -2691,38 +2690,41 @@ struct Net::Impl : public detail::NetImplBase
                        // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
                        // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
                        if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
-                            nextEltwiseLayer = Ptr<EltwiseLayer>();
+                            break;
                    }
 #endif
-                    if (pinsToKeep.count(lpNext) != 0)
+                    if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0)
                        break;
                    if (nextData->inputBlobsId.size() != 2)
                        break;
-                    if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
+                    if (IS_DNN_OPENCL_TARGET(preferableTarget))
                    {
-                        if (nextData->params.has("coeff"))
+                        if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
                        {
-                            DictValue paramCoeff = nextData->params.get("coeff");
+                            if (nextData->params.has("coeff"))
                            int n = paramCoeff.size();
                            bool isCoeffOneOne = (n == 2);
                            for (int i = 0; isCoeffOneOne && i < n; i++)
                            {
-                                float c = paramCoeff.get<float>(i);
+                                DictValue paramCoeff = nextData->params.get("coeff");
-                                isCoeffOneOne &= (c == 1.0f);
+                                int n = paramCoeff.size();
-                            }
+                                bool isCoeffOneOne = (n == 2);
-                            if (!isCoeffOneOne)
+                                for (int i = 0; isCoeffOneOne && i < n; i++)
-                            {
+                                {
-                                CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
+                                    float c = paramCoeff.get<float>(i);
-                                break;
+                                    isCoeffOneOne &= (c == 1.0f);
                                }
                                if (!isCoeffOneOne)
                                {
                                    CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
                                    break;
                                }
                            }
                        }
-                    }
+                        else
-                    else
+                        {
-                    {
+                            CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
-                        CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
+                            break;
-                        break;
+                        }
                    }
                    {
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@ -321,6 +321,7 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
    else if (target == DNN_TARGET_CUDA_FP16)
    {
        scoreDiff = 0.03;
        iouDiff = 0.13;
    }
    processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
@ -511,7 +512,7 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
    else if (target == DNN_TARGET_CUDA_FP16)
    {
        l1 = 0.3;
-        lInf = 7.2;
+        lInf = 7.6;
    }
    processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
 #if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@ -749,7 +749,7 @@ TEST_P(Test_Caffe_nets, RFCN)
    if (target == DNN_TARGET_CUDA_FP16)
    {
        scoreDiff = 0.0034;
-        iouDiff = 0.11;
+        iouDiff = 0.12;
    }
    static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
                                           0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@ -677,6 +677,8 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
    double scoreDiff = 0.01f;
    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.15 : 0.01f;
    if (target == DNN_TARGET_CUDA_FP16)
        iouDiff = 0.02;
    std::string config_file = "yolov4-tiny.cfg";
    std::string weights_file = "yolov4-tiny.weights";
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -2228,7 +2228,7 @@ public:
    static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
    {
-        return dnnBackendsAndTargets(false, false, true, false, false, false); // OCV OpenCL + OCV CPU
+        return dnnBackendsAndTargets(false, false, true, false, true, false); // OCV OpenCL + OCV CPU + CUDA
    }
 };
@ -2280,7 +2280,12 @@ TEST_P(ConvolutionActivationFusion, Accuracy)
                expectedFusedLayers.push_back(activId);
        }
    }
-
+    else if (backendId == DNN_BACKEND_CUDA)
    {
        if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
            actType == "Mish" || actType == "Sigmoid" || actType == "Power")
                expectedFusedLayers.push_back(activId);
    }
    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
@ -2319,7 +2324,7 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
    std::string eltwiseOp = get<1>(GetParam());
    bool weightedEltwise = get<2>(GetParam());
    if (eltwiseOp != "sum" && weightedEltwise)
-            throw SkipTestException("weighted eltwise not supported");
+        throw SkipTestException("weighted eltwise not supported");
    LayerParams eltwiseParams;
    TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
@ -2332,7 +2337,11 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
    Backend backendId = get<0>(get<3>(GetParam()));
    Target targetId = get<1>(get<3>(GetParam()));
-    TestLayerFusion::test(input, net, backendId, targetId);
+
    std::vector<int> expectedFusedLayers;
    if (backendId == DNN_BACKEND_CUDA && eltwiseOp == "sum" && !weightedEltwise)
        expectedFusedLayers.push_back(eltwiseId);
    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
 /* bias */              testing::Bool(),
@ -2411,7 +2420,16 @@ TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
            }
        }
    }
-
+    else if(backendId == DNN_BACKEND_CUDA)
    {
        if (eltwiseOp == "sum" && !weightedEltwise)
        {
            expectedFusedLayers.push_back(eltwiseId);
            if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
                actType == "Mish" || actType == "Sigmoid" || actType == "Power")
                expectedFusedLayers.push_back(activId);
        }
    }
    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
@ -2486,7 +2504,16 @@ TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
                expectedFusedLayers.push_back(activId); // activation fused with convolution
        }
    }
-
+    else if(backendId == DNN_BACKEND_CUDA)
    {
        if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
            actType == "Mish" || actType == "Sigmoid" || actType == "Power")
        {
                expectedFusedLayers.push_back(activId);
                if (eltwiseOp == "sum" && !weightedEltwise)
                    expectedFusedLayers.push_back(eltwiseId);
        }
    }
    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(
--- a/modules/dnn/test/test_model.cpp
+++ b/modules/dnn/test/test_model.cpp
@ -263,7 +263,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
    }
    else if (target == DNN_TARGET_CUDA_FP16)
    {
-        scoreDiff = 4e-4;
+        scoreDiff = 0.002;
        iouDiff = 1e-2;
    }
    float confThreshold = FLT_MIN;
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -221,7 +221,8 @@ TEST_P(Test_ONNX_layers, Deconvolution)
    testONNXModels("two_deconvolution", npy, 0, 0, false, false);
    testONNXModels("deconvolution_group", npy, 0, 0, false, false);
    testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false);
-    testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
+    if (target != DNN_TARGET_CUDA_FP16) // bug
        testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
 }
 TEST_P(Test_ONNX_layers, Deconvolution3D)
@ -675,6 +676,8 @@ TEST_P(Test_ONNX_layers, LinearWithConstant)
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
    applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
 #endif
    if (backend == DNN_BACKEND_CUDA)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
    testONNXModels("lin_with_constant");
 }
@ -685,6 +688,8 @@ TEST_P(Test_ONNX_layers, MatmulWithTwoInputs)
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
    applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
 #endif
    if (backend == DNN_BACKEND_CUDA)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
    testONNXModels("matmul_with_two_inputs");
 }
@ -1159,8 +1164,8 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics)
    float l1 = 0.0013, lInf = 0.009;
    if (target == DNN_TARGET_CUDA_FP16)
    {
-        l1 = 0.008;
+        l1 = 0.01;
-        lInf = 0.04;
+        lInf = 0.06;
    }
    checkBackend(&input0, &ref0);
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -1256,7 +1256,7 @@ TEST_P(Test_TensorFlow_nets, EfficientDet)
    if (target == DNN_TARGET_CUDA_FP16)
    {
        scoreDiff = 0.002;
-        iouDiff = 0.004;
+        iouDiff = 0.005;
    }
    normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
    expectNoFallbacksFromIE(net);
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@ -165,7 +165,8 @@ TEST_P(Test_Torch_layers, run_reshape_single_sample)
    }
    else if (target == DNN_TARGET_CUDA_FP16)
    {
-        l1 = 0.01;
+        l1 = 0.02;
        lInf = 0.04;
    }
    runTorchNet("net_reshape_single_sample", "", false, false, true, l1, lInf);
 }