mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 13:10:12 +08:00
Merge pull request #18285 from YashasSamaga:cuda4dnn-update-tests
This commit is contained in:
commit
df18431b45
@ -2681,7 +2681,6 @@ struct Net::Impl : public detail::NetImplBase
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
// CUDA backend supports fusion with eltwise sum (without variable channels)
|
||||
// `nextEltwiseLayer` is reset if eltwise layer doesn't have a compatible configuration for fusion
|
||||
if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
|
||||
{
|
||||
// we create a temporary backend node for eltwise layer to obtain the eltwise configuration
|
||||
@ -2691,38 +2690,41 @@ struct Net::Impl : public detail::NetImplBase
|
||||
// CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
|
||||
// Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
|
||||
if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
|
||||
nextEltwiseLayer = Ptr<EltwiseLayer>();
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (pinsToKeep.count(lpNext) != 0)
|
||||
if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0)
|
||||
break;
|
||||
if (nextData->inputBlobsId.size() != 2)
|
||||
break;
|
||||
|
||||
if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
|
||||
if (IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||
{
|
||||
if (nextData->params.has("coeff"))
|
||||
if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
|
||||
{
|
||||
DictValue paramCoeff = nextData->params.get("coeff");
|
||||
int n = paramCoeff.size();
|
||||
bool isCoeffOneOne = (n == 2);
|
||||
for (int i = 0; isCoeffOneOne && i < n; i++)
|
||||
if (nextData->params.has("coeff"))
|
||||
{
|
||||
float c = paramCoeff.get<float>(i);
|
||||
isCoeffOneOne &= (c == 1.0f);
|
||||
}
|
||||
if (!isCoeffOneOne)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
|
||||
break;
|
||||
DictValue paramCoeff = nextData->params.get("coeff");
|
||||
int n = paramCoeff.size();
|
||||
bool isCoeffOneOne = (n == 2);
|
||||
for (int i = 0; isCoeffOneOne && i < n; i++)
|
||||
{
|
||||
float c = paramCoeff.get<float>(i);
|
||||
isCoeffOneOne &= (c == 1.0f);
|
||||
}
|
||||
if (!isCoeffOneOne)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
|
||||
break;
|
||||
else
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -321,6 +321,7 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
|
||||
else if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
scoreDiff = 0.03;
|
||||
iouDiff = 0.13;
|
||||
}
|
||||
|
||||
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
|
||||
@ -511,7 +512,7 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
|
||||
else if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
l1 = 0.3;
|
||||
lInf = 7.2;
|
||||
lInf = 7.6;
|
||||
}
|
||||
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
|
||||
#if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
|
||||
|
@ -749,7 +749,7 @@ TEST_P(Test_Caffe_nets, RFCN)
|
||||
if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
scoreDiff = 0.0034;
|
||||
iouDiff = 0.11;
|
||||
iouDiff = 0.12;
|
||||
}
|
||||
static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
|
||||
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
|
||||
|
@ -677,6 +677,8 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
|
||||
|
||||
double scoreDiff = 0.01f;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.15 : 0.01f;
|
||||
if (target == DNN_TARGET_CUDA_FP16)
|
||||
iouDiff = 0.02;
|
||||
|
||||
std::string config_file = "yolov4-tiny.cfg";
|
||||
std::string weights_file = "yolov4-tiny.weights";
|
||||
|
@ -2228,7 +2228,7 @@ public:
|
||||
|
||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
|
||||
{
|
||||
return dnnBackendsAndTargets(false, false, true, false, false, false); // OCV OpenCL + OCV CPU
|
||||
return dnnBackendsAndTargets(false, false, true, false, true, false); // OCV OpenCL + OCV CPU + CUDA
|
||||
}
|
||||
};
|
||||
|
||||
@ -2280,7 +2280,12 @@ TEST_P(ConvolutionActivationFusion, Accuracy)
|
||||
expectedFusedLayers.push_back(activId);
|
||||
}
|
||||
}
|
||||
|
||||
else if (backendId == DNN_BACKEND_CUDA)
|
||||
{
|
||||
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
|
||||
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
|
||||
expectedFusedLayers.push_back(activId);
|
||||
}
|
||||
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
|
||||
@ -2319,7 +2324,7 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
|
||||
std::string eltwiseOp = get<1>(GetParam());
|
||||
bool weightedEltwise = get<2>(GetParam());
|
||||
if (eltwiseOp != "sum" && weightedEltwise)
|
||||
throw SkipTestException("weighted eltwise not supported");
|
||||
throw SkipTestException("weighted eltwise not supported");
|
||||
LayerParams eltwiseParams;
|
||||
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
|
||||
|
||||
@ -2332,7 +2337,11 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
|
||||
|
||||
Backend backendId = get<0>(get<3>(GetParam()));
|
||||
Target targetId = get<1>(get<3>(GetParam()));
|
||||
TestLayerFusion::test(input, net, backendId, targetId);
|
||||
|
||||
std::vector<int> expectedFusedLayers;
|
||||
if (backendId == DNN_BACKEND_CUDA && eltwiseOp == "sum" && !weightedEltwise)
|
||||
expectedFusedLayers.push_back(eltwiseId);
|
||||
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
|
||||
/* bias */ testing::Bool(),
|
||||
@ -2411,7 +2420,16 @@ TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else if(backendId == DNN_BACKEND_CUDA)
|
||||
{
|
||||
if (eltwiseOp == "sum" && !weightedEltwise)
|
||||
{
|
||||
expectedFusedLayers.push_back(eltwiseId);
|
||||
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
|
||||
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
|
||||
expectedFusedLayers.push_back(activId);
|
||||
}
|
||||
}
|
||||
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
|
||||
@ -2486,7 +2504,16 @@ TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
|
||||
expectedFusedLayers.push_back(activId); // activation fused with convolution
|
||||
}
|
||||
}
|
||||
|
||||
else if(backendId == DNN_BACKEND_CUDA)
|
||||
{
|
||||
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
|
||||
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
|
||||
{
|
||||
expectedFusedLayers.push_back(activId);
|
||||
if (eltwiseOp == "sum" && !weightedEltwise)
|
||||
expectedFusedLayers.push_back(eltwiseId);
|
||||
}
|
||||
}
|
||||
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(
|
||||
|
@ -263,7 +263,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
|
||||
}
|
||||
else if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
scoreDiff = 4e-4;
|
||||
scoreDiff = 0.002;
|
||||
iouDiff = 1e-2;
|
||||
}
|
||||
float confThreshold = FLT_MIN;
|
||||
|
@ -221,7 +221,8 @@ TEST_P(Test_ONNX_layers, Deconvolution)
|
||||
testONNXModels("two_deconvolution", npy, 0, 0, false, false);
|
||||
testONNXModels("deconvolution_group", npy, 0, 0, false, false);
|
||||
testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false);
|
||||
testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
|
||||
if (target != DNN_TARGET_CUDA_FP16) // bug
|
||||
testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Deconvolution3D)
|
||||
@ -675,6 +676,8 @@ TEST_P(Test_ONNX_layers, LinearWithConstant)
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
|
||||
#endif
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
||||
testONNXModels("lin_with_constant");
|
||||
}
|
||||
|
||||
@ -685,6 +688,8 @@ TEST_P(Test_ONNX_layers, MatmulWithTwoInputs)
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
|
||||
#endif
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
||||
testONNXModels("matmul_with_two_inputs");
|
||||
}
|
||||
|
||||
@ -1159,8 +1164,8 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics)
|
||||
float l1 = 0.0013, lInf = 0.009;
|
||||
if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
l1 = 0.008;
|
||||
lInf = 0.04;
|
||||
l1 = 0.01;
|
||||
lInf = 0.06;
|
||||
}
|
||||
|
||||
checkBackend(&input0, &ref0);
|
||||
|
@ -1256,7 +1256,7 @@ TEST_P(Test_TensorFlow_nets, EfficientDet)
|
||||
if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
scoreDiff = 0.002;
|
||||
iouDiff = 0.004;
|
||||
iouDiff = 0.005;
|
||||
}
|
||||
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
|
||||
expectNoFallbacksFromIE(net);
|
||||
|
@ -165,7 +165,8 @@ TEST_P(Test_Torch_layers, run_reshape_single_sample)
|
||||
}
|
||||
else if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
l1 = 0.01;
|
||||
l1 = 0.02;
|
||||
lInf = 0.04;
|
||||
}
|
||||
runTorchNet("net_reshape_single_sample", "", false, false, true, l1, lInf);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user