opencv/modules/dnn/src/net_impl_backend.cpp
Aliaksei Urbanski 35ca2f78d6
Merge pull request #25880 from Jamim:fix/cuda-no-fp16
Fix CUDA for old GPUs without FP16 support #25880

Fixes #21461

~This is a build-time solution that reflects https://github.com/opencv/opencv/blob/4.10.0/modules/dnn/src/cuda4dnn/init.hpp#L68-L82.~
~We shouldn't add an invalid target while building with `CUDA_ARCH_BIN` < 53.~
_(please see [this discussion](https://github.com/opencv/opencv/pull/25880#discussion_r1668074505))_

This is a run-time solution that basically reverts [these lines](d0fe6ad109 (diff-757c5ab6ddf2f99cdd09f851e3cf17abff203aff4107d908c7ad3d0466f39604L245-R245)).

I've debugged these changes, [coupled with other fixes](https://github.com/gentoo/gentoo/pull/37479), on [Gentoo Linux](https://www.gentoo.org/) and [related tests passed](https://github.com/user-attachments/files/16135391/opencv-4.10.0.20240708-224733.log.gz) on my laptop with `GeForce GTX 960M`.

Alternative solution:
  - #21462

_Best regards!_

### Pull Request Readiness Checklist

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] `n/a` There is accuracy test, performance test and test data in opencv_extra repository, if applicable
- [ ] `n/a` The feature is well documented and sample code can be built with the project CMake
2024-07-10 12:39:30 +03:00

282 lines
8.8 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "net_impl.hpp"
#include "legacy_backend.hpp"
#include "backend.hpp"
#include "factory.hpp"
#ifdef HAVE_CUDA
#include "cuda4dnn/init.hpp"
#endif
namespace cv {
namespace dnn {
CV__DNN_INLINE_NS_BEGIN
Ptr<BackendWrapper> Net::Impl::wrap(Mat& host)
{
if (preferableBackend == DNN_BACKEND_OPENCV &&
(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_CPU_FP16))
return Ptr<BackendWrapper>();
MatShape shape(host.dims);
for (int i = 0; i < host.dims; ++i)
shape[i] = host.size[i];
void* data = host.data;
if (backendWrappers.find(data) != backendWrappers.end())
{
Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
if (preferableBackend == DNN_BACKEND_OPENCV)
{
#ifdef HAVE_OPENCL
CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
return OpenCLBackendWrapper::create(baseBuffer, host);
#else
CV_Error(Error::StsInternal, "");
#endif
}
else if (preferableBackend == DNN_BACKEND_HALIDE)
{
CV_Assert(haveHalide());
#ifdef HAVE_HALIDE
return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
#endif
}
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019;
}
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
return wrapMat(preferableBackend, preferableTarget, host);
}
else if (preferableBackend == DNN_BACKEND_WEBNN)
{
#ifdef HAVE_WEBNN
return wrapMat(preferableBackend, preferableTarget, host);
#endif
}
else if (preferableBackend == DNN_BACKEND_VKCOM)
{
#ifdef HAVE_VULKAN
return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
#endif
}
else if (preferableBackend == DNN_BACKEND_CUDA)
{
CV_Assert(haveCUDA());
#ifdef HAVE_CUDA
switch (preferableTarget)
{
case DNN_TARGET_CUDA:
return CUDABackendWrapperFP32::create(baseBuffer, shape);
case DNN_TARGET_CUDA_FP16:
return CUDABackendWrapperFP16::create(baseBuffer, shape);
default:
CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
}
#endif
}
else if (preferableBackend == DNN_BACKEND_TIMVX)
{
#ifdef HAVE_TIMVX
return Ptr<BackendWrapper>(new TimVXBackendWrapper(baseBuffer, host));
#endif
}
else if (preferableBackend == DNN_BACKEND_CANN)
{
CV_Assert(0 && "Internal error: DNN_BACKEND_CANN must be implemented through inheritance");
}
else
CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
}
Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
backendWrappers[data] = wrapper;
return wrapper;
}
void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
{
CV_TRACE_FUNCTION();
if (preferableBackend == DNN_BACKEND_OPENCV)
{
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_CPU_FP16 || IS_DNN_OPENCL_TARGET(preferableTarget));
}
else if (preferableBackend == DNN_BACKEND_HALIDE)
{
#ifdef HAVE_HALIDE
initHalideBackend();
#else
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Halide");
#endif
}
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
CV_Assert(0 && "Inheritance must be used with OpenVINO backend");
}
else if (preferableBackend == DNN_BACKEND_WEBNN)
{
#ifdef HAVE_WEBNN
initWebnnBackend(blobsToKeep_);
#else
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN");
#endif
}
else if (preferableBackend == DNN_BACKEND_VKCOM)
{
#ifdef HAVE_VULKAN
initVkComBackend();
#else
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Vulkan");
#endif
}
else if (preferableBackend == DNN_BACKEND_CUDA)
{
#ifdef HAVE_CUDA
initCUDABackend(blobsToKeep_);
#else
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of CUDA/CUDNN");
#endif
}
else if (preferableBackend == DNN_BACKEND_TIMVX)
{
#ifdef HAVE_TIMVX
initTimVXBackend();
#else
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of TimVX");
#endif
}
else if (preferableBackend == DNN_BACKEND_CANN)
{
CV_Assert(0 && "Internal error: DNN_BACKEND_CANN must be implemented through inheritance");
}
else
{
CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend));
}
}
void Net::Impl::setPreferableBackend(Net& net, int backendId)
{
if (backendId == DNN_BACKEND_DEFAULT)
backendId = (Backend)getParam_DNN_BACKEND_DEFAULT();
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam();
if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX &&
backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
CV_LOG_WARNING(NULL, "DNN: Only default, TIMVX and OpenVINO backends support quantized networks");
backendId = DNN_BACKEND_OPENCV;
}
#ifdef HAVE_DNN_NGRAPH
if (netWasQuantized && backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2023_0))
{
CV_LOG_WARNING(NULL, "DNN: OpenVINO 2023.0 and higher is required to supports quantized networks");
backendId = DNN_BACKEND_OPENCV;
}
#endif
if (preferableBackend != backendId)
{
clear();
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
#if defined(HAVE_INF_ENGINE)
switchToOpenVINOBackend(net);
#elif defined(ENABLE_PLUGINS)
auto& networkBackend = dnn_backend::createPluginDNNNetworkBackend("openvino");
networkBackend.switchBackend(net);
#else
CV_Error(Error::StsNotImplemented, "OpenVINO backend is not available in the current OpenCV build");
#endif
}
else if (backendId == DNN_BACKEND_CANN)
{
#ifdef HAVE_CANN
switchToCannBackend(net);
#else
CV_Error(Error::StsNotImplemented, "CANN backend is not availlable in the current OpenCV build");
#endif
}
else
{
preferableBackend = backendId;
}
}
}
void Net::Impl::setPreferableTarget(int targetId)
{
if (netWasQuantized && targetId != DNN_TARGET_CPU &&
targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16 && targetId != DNN_TARGET_NPU)
{
CV_LOG_WARNING(NULL, "DNN: Only CPU, OpenCL/OpenCL FP16 and NPU targets are supported by quantized networks");
targetId = DNN_TARGET_CPU;
}
if (preferableTarget != targetId)
{
preferableTarget = targetId;
if (IS_DNN_OPENCL_TARGET(targetId))
{
#ifndef HAVE_OPENCL
#ifdef HAVE_INF_ENGINE
if (preferableBackend == DNN_BACKEND_OPENCV)
#else
if (preferableBackend == DNN_BACKEND_DEFAULT ||
preferableBackend == DNN_BACKEND_OPENCV)
#endif // HAVE_INF_ENGINE
preferableTarget = DNN_TARGET_CPU;
#else
bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
preferableTarget = DNN_TARGET_OPENCL;
#endif
}
if (IS_DNN_CUDA_TARGET(targetId))
{
preferableTarget = DNN_TARGET_CPU;
#ifdef HAVE_CUDA
if (cuda4dnn::doesDeviceSupportFP16() && targetId == DNN_TARGET_CUDA_FP16)
preferableTarget = DNN_TARGET_CUDA_FP16;
else
preferableTarget = DNN_TARGET_CUDA;
#endif
}
#if !defined(__arm64__) || !__arm64__
if (targetId == DNN_TARGET_CPU_FP16)
{
CV_LOG_WARNING(NULL, "DNN: fall back to DNN_TARGET_CPU. Only ARM v8 CPU is supported by DNN_TARGET_CPU_FP16.");
targetId = DNN_TARGET_CPU;
}
#endif
clear();
if (targetId == DNN_TARGET_CPU_FP16)
{
if (useWinograd) {
CV_LOG_INFO(NULL, "DNN: DNN_TARGET_CPU_FP16 is set => Winograd convolution is disabled by default to preserve accuracy. If needed, enable it explicitly using enableWinograd(true).");
enableWinograd(false);
}
}
}
}
CV__DNN_INLINE_NS_END
}} // namespace cv::dnn