mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 05:06:29 +08:00
Merge pull request #21884 from rogday:cuda_cleanup
Fix CUDA compilation issues and adjust thresholds. * Fix CUDA compilation issues and adjust thresholds. * add conformance tests to denylist
This commit is contained in:
parent
27c15bed60
commit
9cd5a0a1e6
@ -260,7 +260,7 @@ void shrink(const Stream& stream, Span<T> output, View<T> input, T bias, T lambd
|
||||
|
||||
template <class T>
|
||||
void reciprocal(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SignFunctor<T>>(stream, output, input);
|
||||
generic_op<T, ReciprocalFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
@ -732,7 +732,8 @@ struct SignFunctor {
|
||||
CUDA4DNN_HOST_DEVICE Params() {}
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SignFunctor() : SignFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE SignFunctor() { }
|
||||
CUDA4DNN_DEVICE SignFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0));
|
||||
@ -747,7 +748,7 @@ struct ShrinkFunctor {
|
||||
T bias, lambd;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ShrinkFunctor() : bias(0), lambd(0.5) { }
|
||||
CUDA4DNN_DEVICE ShrinkFunctor() : ShrinkFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
@ -763,10 +764,11 @@ struct ReciprocalFunctor {
|
||||
CUDA4DNN_HOST_DEVICE Params() {}
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ReciprocalFunctor() : ReciprocalFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE ReciprocalFunctor() { }
|
||||
CUDA4DNN_DEVICE ReciprocalFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
return T(1.0f)/value;
|
||||
return T(1.f)/value;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -2080,6 +2080,7 @@ public:
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
// TODO: extract bias from inputs and pass it
|
||||
CV_Assert(inputs.size() == 1 || inputs.size() == 2);
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto input_shape = input_wrapper->getShape();
|
||||
|
@ -2282,7 +2282,7 @@ struct SignFunctor : public BaseDefaultFunctor<SignFunctor>
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return x > 0 ? 1 : (x < 0 ? -1 : 0);
|
||||
return x > 0.f ? 1.f : (x < 0.f ? -1.f : 0.f);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
@ -2315,13 +2315,13 @@ struct ShrinkFunctor : public BaseDefaultFunctor<ShrinkFunctor>
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return x > lambd ? x - bias : (x < -lambd ? x + bias : 0);
|
||||
return x > lambd ? x - bias : (x < -lambd ? x + bias : 0.f);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::ShrinkOp>(target, stream);
|
||||
return make_cuda_node<cuda4dnn::ShrinkOp>(target, stream, bias, lambd);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2343,7 +2343,7 @@ struct ReciprocalFunctor : public BaseDefaultFunctor<ReciprocalFunctor>
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return 1.0/x;
|
||||
return 1.f/x;
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
@ -320,7 +320,7 @@ __kernel void SignForward(const int n, __global T* in, __global T* out)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = in[index] > 0.f ? 1.0f : (in[index] < 0.f) ? -1.0f : 0.0f);
|
||||
out[index] = in[index] > 0.f ? 1.0f : ((in[index] < 0.f) ? -1.0f : 0.0f);
|
||||
}
|
||||
|
||||
__kernel void ReciprocalForward(const int n, __global T* in, __global T* out)
|
||||
|
@ -516,7 +516,7 @@ TEST_P(DNNTestNetwork, DenseNet_121)
|
||||
else if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
l1 = 0.008;
|
||||
lInf = 0.05;
|
||||
lInf = 0.06;
|
||||
}
|
||||
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf);
|
||||
if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
|
||||
|
@ -844,8 +844,9 @@ TEST_P(Test_two_inputs, basic)
|
||||
Mat ref;
|
||||
addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
|
||||
|
||||
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6;
|
||||
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5;
|
||||
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.06 : 1e-6;
|
||||
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.3 : 1e-5;
|
||||
|
||||
normAssert(out, ref, "", l1, lInf);
|
||||
|
||||
if (cvtest::debugLevel > 0 || HasFailure())
|
||||
|
@ -512,7 +512,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
|
||||
}
|
||||
else if (target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
scoreDiff = 0.002;
|
||||
scoreDiff = 0.0021;
|
||||
iouDiff = 1e-2;
|
||||
}
|
||||
float confThreshold = FLT_MIN;
|
||||
@ -661,7 +661,8 @@ TEST_P(Test_Model, Segmentation)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
|| (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16))
|
||||
{
|
||||
norm = 2.0f; // l1 = 0.01 lInf = 2
|
||||
}
|
||||
|
@ -954,7 +954,7 @@ public:
|
||||
|
||||
if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
|
||||
{
|
||||
default_l1 = 4e-3;
|
||||
default_l1 = 7e-3;
|
||||
default_lInf = 2e-2;
|
||||
}
|
||||
else
|
||||
|
@ -66,6 +66,15 @@
|
||||
"test_maxunpool_export_with_output_shape",
|
||||
"test_mul_bcast",
|
||||
"test_mul_uint8",
|
||||
"test_reduce_prod_default_axes_keepdims_example", // FP16 only
|
||||
"test_reduce_prod_default_axes_keepdims_random", // FP16 only
|
||||
"test_reduce_prod_do_not_keepdims_random", // FP16 only
|
||||
"test_reduce_prod_keepdims_random", // FP16 only
|
||||
"test_reduce_prod_negative_axes_keepdims_random", // FP16 only
|
||||
"test_reduce_sum_square_default_axes_keepdims_random", // FP16 only
|
||||
"test_reduce_sum_square_do_not_keepdims_random", // FP16 only
|
||||
"test_reduce_sum_square_keepdims_random", // FP16 only
|
||||
"test_reduce_sum_square_negative_axes_keepdims_random", // FP16 only
|
||||
"test_softmax_default_axis",
|
||||
"test_softmax_large_number", // FP16 only
|
||||
"test_softmax_large_number_expanded", // FP16 only
|
||||
|
@ -169,16 +169,17 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias)
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported
|
||||
if (backend == DNN_BACKEND_VKCOM)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
|
||||
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU &&
|
||||
getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
#endif
|
||||
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // supports only <= 2 inputs
|
||||
|
||||
if (backend == DNN_BACKEND_VKCOM)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
|
||||
|
||||
String basename = "conv_variable_wb";
|
||||
Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
|
||||
ASSERT_FALSE(net.empty());
|
||||
@ -464,11 +465,15 @@ TEST_P(Test_ONNX_layers, Scale)
|
||||
|
||||
TEST_P(Test_ONNX_layers, Scale_broadcast)
|
||||
{
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting
|
||||
testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Scale_broadcast_mid)
|
||||
{
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting
|
||||
testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2);
|
||||
}
|
||||
|
||||
@ -2131,7 +2136,7 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus)
|
||||
double lInf = default_lInf;
|
||||
|
||||
// Output values are in range [-2.011, 2.111]
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (target == DNN_TARGET_CUDA_FP16))
|
||||
l1 = 0.007;
|
||||
else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user