cuda - update npp calls to use the new NppStreamContext API if available

This commit is contained in:
cudawarped 2024-10-02 13:52:37 +03:00
parent 3901426d85
commit e375d5786b

View File

@ -134,6 +134,36 @@ namespace cv { namespace cuda
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func)
// NppStreamContext is introduced in NPP version 10100 included in CUDA toolkit 10.1 (CUDA_VERSION == 10010) however not all of the NPP functions called internally by OpenCV
// - have an NppStreamContext argument (e.g. nppiHistogramEvenGetBufferSize_8u_C1R_Ctx in CUDA 12.3) and/or
// - have a corresponding function in the supplied library (e.g. nppiEvenLevelsHost_32s_Ctx is not present in nppist.lib or libnppist.so as of CUDA 12.6)
// Because support for these functions has gradually been introduced without being mentioned in the release notes this flag is set to a version of NPP (version 12205 included in CUDA toolkit 12.4) which is known to work.
#define USE_NPP_STREAM_CTX NPP_VERSION >= 12205
#if USE_NPP_STREAM_CTX
class NppStreamHandler
{
public:
inline explicit NppStreamHandler(cudaStream_t newStream)
{
nppStreamContext = {};
nppSafeCall(nppGetStreamContext(&nppStreamContext));
nppStreamContext.hStream = newStream;
cudaSafeCall(cudaStreamGetFlags(nppStreamContext.hStream, &nppStreamContext.nStreamFlags));
}
inline explicit NppStreamHandler(Stream& newStream) : NppStreamHandler(StreamAccessor::getStream(newStream)) {}
inline operator NppStreamContext() const {
return nppStreamContext;
}
inline NppStreamContext get() { return nppStreamContext; }
private:
NppStreamContext nppStreamContext;
};
#else
class NppStreamHandler
{
public:
@ -157,9 +187,9 @@ namespace cv { namespace cuda
private:
cudaStream_t oldStream;
};
#endif
}}
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func)
#define cuSafeCall(expr) cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func)
#endif // HAVE_CUDA