Merge pull request #18285 from YashasSamaga:cuda4dnn-update-tests

This commit is contained in:
Alexander Alekhin 2020-11-27 08:26:45 +00:00
commit df18431b45
9 changed files with 73 additions and 35 deletions

View File

@ -2681,7 +2681,6 @@ struct Net::Impl : public detail::NetImplBase
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
// CUDA backend supports fusion with eltwise sum (without variable channels) // CUDA backend supports fusion with eltwise sum (without variable channels)
// `nextEltwiseLayer` is reset if eltwise layer doesn't have a compatible configuration for fusion
if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty()) if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
{ {
// we create a temporary backend node for eltwise layer to obtain the eltwise configuration // we create a temporary backend node for eltwise layer to obtain the eltwise configuration
@ -2691,38 +2690,41 @@ struct Net::Impl : public detail::NetImplBase
// CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used. // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
// Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors. // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty()) if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
nextEltwiseLayer = Ptr<EltwiseLayer>(); break;
} }
#endif #endif
if (pinsToKeep.count(lpNext) != 0) if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0)
break; break;
if (nextData->inputBlobsId.size() != 2) if (nextData->inputBlobsId.size() != 2)
break; break;
if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum") if (IS_DNN_OPENCL_TARGET(preferableTarget))
{ {
if (nextData->params.has("coeff")) if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
{ {
DictValue paramCoeff = nextData->params.get("coeff"); if (nextData->params.has("coeff"))
int n = paramCoeff.size();
bool isCoeffOneOne = (n == 2);
for (int i = 0; isCoeffOneOne && i < n; i++)
{ {
float c = paramCoeff.get<float>(i); DictValue paramCoeff = nextData->params.get("coeff");
isCoeffOneOne &= (c == 1.0f); int n = paramCoeff.size();
} bool isCoeffOneOne = (n == 2);
if (!isCoeffOneOne) for (int i = 0; isCoeffOneOne && i < n; i++)
{ {
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only"); float c = paramCoeff.get<float>(i);
break; isCoeffOneOne &= (c == 1.0f);
}
if (!isCoeffOneOne)
{
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
break;
}
} }
} }
} else
else {
{ CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation")); break;
break; }
} }
{ {

View File

@ -321,6 +321,7 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
else if (target == DNN_TARGET_CUDA_FP16) else if (target == DNN_TARGET_CUDA_FP16)
{ {
scoreDiff = 0.03; scoreDiff = 0.03;
iouDiff = 0.13;
} }
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
@ -511,7 +512,7 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
else if (target == DNN_TARGET_CUDA_FP16) else if (target == DNN_TARGET_CUDA_FP16)
{ {
l1 = 0.3; l1 = 0.3;
lInf = 7.2; lInf = 7.6;
} }
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf); processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
#if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000) #if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000)

View File

@ -749,7 +749,7 @@ TEST_P(Test_Caffe_nets, RFCN)
if (target == DNN_TARGET_CUDA_FP16) if (target == DNN_TARGET_CUDA_FP16)
{ {
scoreDiff = 0.0034; scoreDiff = 0.0034;
iouDiff = 0.11; iouDiff = 0.12;
} }
static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16); 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);

View File

@ -677,6 +677,8 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
double scoreDiff = 0.01f; double scoreDiff = 0.01f;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.15 : 0.01f; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.15 : 0.01f;
if (target == DNN_TARGET_CUDA_FP16)
iouDiff = 0.02;
std::string config_file = "yolov4-tiny.cfg"; std::string config_file = "yolov4-tiny.cfg";
std::string weights_file = "yolov4-tiny.weights"; std::string weights_file = "yolov4-tiny.weights";

View File

@ -2228,7 +2228,7 @@ public:
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests() static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
{ {
return dnnBackendsAndTargets(false, false, true, false, false, false); // OCV OpenCL + OCV CPU return dnnBackendsAndTargets(false, false, true, false, true, false); // OCV OpenCL + OCV CPU + CUDA
} }
}; };
@ -2280,7 +2280,12 @@ TEST_P(ConvolutionActivationFusion, Accuracy)
expectedFusedLayers.push_back(activId); expectedFusedLayers.push_back(activId);
} }
} }
else if (backendId == DNN_BACKEND_CUDA)
{
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
expectedFusedLayers.push_back(activId);
}
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers); TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
} }
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine( INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
@ -2319,7 +2324,7 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
std::string eltwiseOp = get<1>(GetParam()); std::string eltwiseOp = get<1>(GetParam());
bool weightedEltwise = get<2>(GetParam()); bool weightedEltwise = get<2>(GetParam());
if (eltwiseOp != "sum" && weightedEltwise) if (eltwiseOp != "sum" && weightedEltwise)
throw SkipTestException("weighted eltwise not supported"); throw SkipTestException("weighted eltwise not supported");
LayerParams eltwiseParams; LayerParams eltwiseParams;
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise); TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
@ -2332,7 +2337,11 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
Backend backendId = get<0>(get<3>(GetParam())); Backend backendId = get<0>(get<3>(GetParam()));
Target targetId = get<1>(get<3>(GetParam())); Target targetId = get<1>(get<3>(GetParam()));
TestLayerFusion::test(input, net, backendId, targetId);
std::vector<int> expectedFusedLayers;
if (backendId == DNN_BACKEND_CUDA && eltwiseOp == "sum" && !weightedEltwise)
expectedFusedLayers.push_back(eltwiseId);
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
} }
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine( INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
/* bias */ testing::Bool(), /* bias */ testing::Bool(),
@ -2411,7 +2420,16 @@ TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
} }
} }
} }
else if(backendId == DNN_BACKEND_CUDA)
{
if (eltwiseOp == "sum" && !weightedEltwise)
{
expectedFusedLayers.push_back(eltwiseId);
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
expectedFusedLayers.push_back(activId);
}
}
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers); TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
} }
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine( INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
@ -2486,7 +2504,16 @@ TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
expectedFusedLayers.push_back(activId); // activation fused with convolution expectedFusedLayers.push_back(activId); // activation fused with convolution
} }
} }
else if(backendId == DNN_BACKEND_CUDA)
{
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
{
expectedFusedLayers.push_back(activId);
if (eltwiseOp == "sum" && !weightedEltwise)
expectedFusedLayers.push_back(eltwiseId);
}
}
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers); TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
} }
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine( INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(

View File

@ -263,7 +263,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
} }
else if (target == DNN_TARGET_CUDA_FP16) else if (target == DNN_TARGET_CUDA_FP16)
{ {
scoreDiff = 4e-4; scoreDiff = 0.002;
iouDiff = 1e-2; iouDiff = 1e-2;
} }
float confThreshold = FLT_MIN; float confThreshold = FLT_MIN;

View File

@ -221,7 +221,8 @@ TEST_P(Test_ONNX_layers, Deconvolution)
testONNXModels("two_deconvolution", npy, 0, 0, false, false); testONNXModels("two_deconvolution", npy, 0, 0, false, false);
testONNXModels("deconvolution_group", npy, 0, 0, false, false); testONNXModels("deconvolution_group", npy, 0, 0, false, false);
testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false); testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false);
testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false); if (target != DNN_TARGET_CUDA_FP16) // bug
testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
} }
TEST_P(Test_ONNX_layers, Deconvolution3D) TEST_P(Test_ONNX_layers, Deconvolution3D)
@ -675,6 +676,8 @@ TEST_P(Test_ONNX_layers, LinearWithConstant)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000) #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
#endif #endif
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
testONNXModels("lin_with_constant"); testONNXModels("lin_with_constant");
} }
@ -685,6 +688,8 @@ TEST_P(Test_ONNX_layers, MatmulWithTwoInputs)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000) #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
#endif #endif
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
testONNXModels("matmul_with_two_inputs"); testONNXModels("matmul_with_two_inputs");
} }
@ -1159,8 +1164,8 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics)
float l1 = 0.0013, lInf = 0.009; float l1 = 0.0013, lInf = 0.009;
if (target == DNN_TARGET_CUDA_FP16) if (target == DNN_TARGET_CUDA_FP16)
{ {
l1 = 0.008; l1 = 0.01;
lInf = 0.04; lInf = 0.06;
} }
checkBackend(&input0, &ref0); checkBackend(&input0, &ref0);

View File

@ -1256,7 +1256,7 @@ TEST_P(Test_TensorFlow_nets, EfficientDet)
if (target == DNN_TARGET_CUDA_FP16) if (target == DNN_TARGET_CUDA_FP16)
{ {
scoreDiff = 0.002; scoreDiff = 0.002;
iouDiff = 0.004; iouDiff = 0.005;
} }
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff); normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
expectNoFallbacksFromIE(net); expectNoFallbacksFromIE(net);

View File

@ -165,7 +165,8 @@ TEST_P(Test_Torch_layers, run_reshape_single_sample)
} }
else if (target == DNN_TARGET_CUDA_FP16) else if (target == DNN_TARGET_CUDA_FP16)
{ {
l1 = 0.01; l1 = 0.02;
lInf = 0.04;
} }
runTorchNet("net_reshape_single_sample", "", false, false, true, l1, lInf); runTorchNet("net_reshape_single_sample", "", false, false, true, l1, lInf);
} }