fixed build under CUDA 4.1

2025-06-20 10:00:51 +08:00 · 2012-01-30 13:15:20 +00:00 · 2012-01-30 13:15:20 +00:00 · f8aba8608d
commit f8aba8608d
parent 7ddb706b29
4 changed files with 83 additions and 32 deletions
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
@ -680,6 +680,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea

    bool aligned = isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16);

+#if CUDART_VERSION == 4000 
    if (aligned && src1.depth() == CV_8U && (src1.cols * src1.channels()) % 4 == 0)
    {
        NppStreamHandler h(stream);
@ -692,7 +693,10 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
        if (stream == 0)
            cudaSafeCall( cudaDeviceSynchronize() );
    }
-    else if (aligned && src1.depth() == CV_8U)
+    else 
+#endif
+    {
+        if (aligned && src1.depth() == CV_8U)
        {
            NppStreamHandler h(stream);

@ -702,6 +706,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }
+#if CUDART_VERSION == 4000 
        else if (aligned && src1.depth() == CV_32S)
        {
            NppStreamHandler h(stream);
@ -712,6 +717,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }
+#endif
        else if (aligned && src1.depth() == CV_32F)
        {
            NppStreamHandler h(stream);
@ -729,6 +735,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea

            func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
    }
+    }
 }

 void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
--- a/modules/gpu/src/graphcuts.cpp
+++ b/modules/gpu/src/graphcuts.cpp
@ -77,8 +77,18 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans

    NppStreamHandler h(stream);

+#if CUDART_VERSION > 4000 
+    NppiGraphcutState* pState;
+    nppSafeCall( nppiGraphcutInitAlloc(sznpp, &pState, buf.ptr<Npp8u>()) );
+    
+    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
+        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), pState) );
+
+    nppSafeCall( nppiGraphcutFree(pState) );
+#else
    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );
+#endif

    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@ -935,6 +935,31 @@ void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)

 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
 {
+#if CUDART_VERSION > 4000 
+    CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);
+
+    dst.create(src.size(), CV_32FC1);
+
+    NppiSize sz;
+    sz.width = src.cols;
+    sz.height = src.rows;
+
+    NppiRect nppRect;
+    nppRect.height = rect.height;
+    nppRect.width = rect.width;
+    nppRect.x = rect.x;
+    nppRect.y = rect.y;
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    NppStreamHandler h(stream);
+
+    nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp64f>(), static_cast<int>(sqr.step),
+                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
+
+    if (stream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
+#else
    CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);

    dst.create(src.size(), CV_32FC1);
@ -958,6 +983,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons

    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
+#endif
 }


--- a/modules/gpu/src/matrix_reductions.cpp
+++ b/modules/gpu/src/matrix_reductions.cpp
@ -117,7 +117,15 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)

    DeviceBuffer dbuf(2);

+#if CUDART_VERSION > 4000 
+    int bufSize;
+    nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
+
+    GpuMat buf(1, bufSize, CV_8UC1);
+    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
+#else
    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, dbuf, (double*)dbuf + 1) );
+#endif

    cudaSafeCall( cudaDeviceSynchronize() );