From e375d5786b3f31834d6fcc058cd6dee187bba240 Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:52:37 +0300 Subject: [PATCH] cuda - update npp calls to use the new NppStreamContext API if available --- .../include/opencv2/core/private.cuda.hpp | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index 36edd8ab31..39f2ddcdeb 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -134,6 +134,36 @@ namespace cv { namespace cuda template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; template<> struct NPPTypeTraits { typedef Npp64f npp_type; }; +#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func) +// NppStreamContext is introduced in NPP version 10100 included in CUDA toolkit 10.1 (CUDA_VERSION == 10010) however not all of the NPP functions called internally by OpenCV +// - have an NppStreamContext argument (e.g. nppiHistogramEvenGetBufferSize_8u_C1R_Ctx in CUDA 12.3) and/or +// - have a corresponding function in the supplied library (e.g. nppiEvenLevelsHost_32s_Ctx is not present in nppist.lib or libnppist.so as of CUDA 12.6) +// Because support for these functions has gradually been introduced without being mentioned in the release notes this flag is set to a version of NPP (version 12205 included in CUDA toolkit 12.4) which is known to work. +#define USE_NPP_STREAM_CTX NPP_VERSION >= 12205 +#if USE_NPP_STREAM_CTX + class NppStreamHandler + { + public: + inline explicit NppStreamHandler(cudaStream_t newStream) + { + nppStreamContext = {}; + nppSafeCall(nppGetStreamContext(&nppStreamContext)); + nppStreamContext.hStream = newStream; + cudaSafeCall(cudaStreamGetFlags(nppStreamContext.hStream, &nppStreamContext.nStreamFlags)); + } + + inline explicit NppStreamHandler(Stream& newStream) : NppStreamHandler(StreamAccessor::getStream(newStream)) {} + + inline operator NppStreamContext() const { + return nppStreamContext; + } + + inline NppStreamContext get() { return nppStreamContext; } + + private: + NppStreamContext nppStreamContext; + }; +#else class NppStreamHandler { public: @@ -157,9 +187,9 @@ namespace cv { namespace cuda private: cudaStream_t oldStream; }; +#endif }} -#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func) #define cuSafeCall(expr) cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func) #endif // HAVE_CUDA