Merge pull request #21884 from rogday:cuda_cleanup

Fix CUDA compilation issues and adjust thresholds.

* Fix CUDA compilation issues and adjust thresholds.

* add conformance tests to denylist
This commit is contained in:
rogday 2022-04-19 19:40:25 +03:00 committed by GitHub
parent 27c15bed60
commit 9cd5a0a1e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 41 additions and 22 deletions

View File

@ -260,7 +260,7 @@ void shrink(const Stream& stream, Span<T> output, View<T> input, T bias, T lambd
template <class T>
void reciprocal(const Stream& stream, Span<T> output, View<T> input) {
generic_op<T, SignFunctor<T>>(stream, output, input);
generic_op<T, ReciprocalFunctor<T>>(stream, output, input);
}
template <class T>

View File

@ -732,7 +732,8 @@ struct SignFunctor {
CUDA4DNN_HOST_DEVICE Params() {}
};
CUDA4DNN_DEVICE SignFunctor() : SignFunctor(Params{}) { }
CUDA4DNN_DEVICE SignFunctor() { }
CUDA4DNN_DEVICE SignFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T value) {
return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0));
@ -747,7 +748,7 @@ struct ShrinkFunctor {
T bias, lambd;
};
CUDA4DNN_DEVICE ShrinkFunctor() : bias(0), lambd(0.5) { }
CUDA4DNN_DEVICE ShrinkFunctor() : ShrinkFunctor(Params{}) { }
CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { }
CUDA4DNN_DEVICE T operator()(T value) {
@ -763,10 +764,11 @@ struct ReciprocalFunctor {
CUDA4DNN_HOST_DEVICE Params() {}
};
CUDA4DNN_DEVICE ReciprocalFunctor() : ReciprocalFunctor(Params{}) { }
CUDA4DNN_DEVICE ReciprocalFunctor() { }
CUDA4DNN_DEVICE ReciprocalFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T value) {
return T(1.0f)/value;
return T(1.f)/value;
}
};

View File

@ -2080,6 +2080,7 @@ public:
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
// TODO: extract bias from inputs and pass it
CV_Assert(inputs.size() == 1 || inputs.size() == 2);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();

View File

@ -2282,7 +2282,7 @@ struct SignFunctor : public BaseDefaultFunctor<SignFunctor>
inline float calculate(float x) const
{
return x > 0 ? 1 : (x < 0 ? -1 : 0);
return x > 0.f ? 1.f : (x < 0.f ? -1.f : 0.f);
}
#ifdef HAVE_CUDA
@ -2315,13 +2315,13 @@ struct ShrinkFunctor : public BaseDefaultFunctor<ShrinkFunctor>
inline float calculate(float x) const
{
return x > lambd ? x - bias : (x < -lambd ? x + bias : 0);
return x > lambd ? x - bias : (x < -lambd ? x + bias : 0.f);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
{
return make_cuda_node<cuda4dnn::ShrinkOp>(target, stream);
return make_cuda_node<cuda4dnn::ShrinkOp>(target, stream, bias, lambd);
}
#endif
@ -2343,7 +2343,7 @@ struct ReciprocalFunctor : public BaseDefaultFunctor<ReciprocalFunctor>
inline float calculate(float x) const
{
return 1.0/x;
return 1.f/x;
}
#ifdef HAVE_CUDA

View File

@ -320,7 +320,7 @@ __kernel void SignForward(const int n, __global T* in, __global T* out)
{
int index = get_global_id(0);
if(index < n)
out[index] = in[index] > 0.f ? 1.0f : (in[index] < 0.f) ? -1.0f : 0.0f);
out[index] = in[index] > 0.f ? 1.0f : ((in[index] < 0.f) ? -1.0f : 0.0f);
}
__kernel void ReciprocalForward(const int n, __global T* in, __global T* out)

View File

@ -516,7 +516,7 @@ TEST_P(DNNTestNetwork, DenseNet_121)
else if (target == DNN_TARGET_CUDA_FP16)
{
l1 = 0.008;
lInf = 0.05;
lInf = 0.06;
}
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf);
if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)

View File

@ -844,8 +844,9 @@ TEST_P(Test_two_inputs, basic)
Mat ref;
addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6;
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5;
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.06 : 1e-6;
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.3 : 1e-5;
normAssert(out, ref, "", l1, lInf);
if (cvtest::debugLevel > 0 || HasFailure())

View File

@ -512,7 +512,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
}
else if (target == DNN_TARGET_CUDA_FP16)
{
scoreDiff = 0.002;
scoreDiff = 0.0021;
iouDiff = 1e-2;
}
float confThreshold = FLT_MIN;
@ -661,7 +661,8 @@ TEST_P(Test_Model, Segmentation)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|| (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16))
{
norm = 2.0f; // l1 = 0.01 lInf = 2
}

View File

@ -954,7 +954,7 @@ public:
if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
{
default_l1 = 4e-3;
default_l1 = 7e-3;
default_lInf = 2e-2;
}
else

View File

@ -66,6 +66,15 @@
"test_maxunpool_export_with_output_shape",
"test_mul_bcast",
"test_mul_uint8",
"test_reduce_prod_default_axes_keepdims_example", // FP16 only
"test_reduce_prod_default_axes_keepdims_random", // FP16 only
"test_reduce_prod_do_not_keepdims_random", // FP16 only
"test_reduce_prod_keepdims_random", // FP16 only
"test_reduce_prod_negative_axes_keepdims_random", // FP16 only
"test_reduce_sum_square_default_axes_keepdims_random", // FP16 only
"test_reduce_sum_square_do_not_keepdims_random", // FP16 only
"test_reduce_sum_square_keepdims_random", // FP16 only
"test_reduce_sum_square_negative_axes_keepdims_random", // FP16 only
"test_softmax_default_axis",
"test_softmax_large_number", // FP16 only
"test_softmax_large_number_expanded", // FP16 only

View File

@ -169,16 +169,17 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias)
backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU &&
getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
#endif
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // supports only <= 2 inputs
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
String basename = "conv_variable_wb";
Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
ASSERT_FALSE(net.empty());
@ -464,11 +465,15 @@ TEST_P(Test_ONNX_layers, Scale)
TEST_P(Test_ONNX_layers, Scale_broadcast)
{
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting
testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3);
}
TEST_P(Test_ONNX_layers, Scale_broadcast_mid)
{
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting
testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2);
}
@ -2131,7 +2136,7 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus)
double lInf = default_lInf;
// Output values are in range [-2.011, 2.111]
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (target == DNN_TARGET_CUDA_FP16))
l1 = 0.007;
else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16)
{