fixed build under CUDA 4.1

2025-06-20 01:41:14 +08:00 · 2012-01-30 13:15:20 +00:00 · 2012-01-30 13:15:20 +00:00 · f8aba8608d
commit f8aba8608d
parent 7ddb706b29
4 changed files with 83 additions and 32 deletions
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
@ -680,6 +680,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
    bool aligned = isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16);
 #if CUDART_VERSION == 4000 
    if (aligned && src1.depth() == CV_8U && (src1.cols * src1.channels()) % 4 == 0)
    {
        NppStreamHandler h(stream);
@ -692,7 +693,10 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
        if (stream == 0)
            cudaSafeCall( cudaDeviceSynchronize() );
    }
-    else if (aligned && src1.depth() == CV_8U)
+    else 
 #endif
    {
        if (aligned && src1.depth() == CV_8U)
        {
            NppStreamHandler h(stream);
@ -702,6 +706,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }
 #if CUDART_VERSION == 4000 
        else if (aligned && src1.depth() == CV_32S)
        {
            NppStreamHandler h(stream);
@ -712,6 +717,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }
 #endif
        else if (aligned && src1.depth() == CV_32F)
        {
            NppStreamHandler h(stream);
@ -730,6 +736,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
            func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
    }
    }
 }
 void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
 {
--- a/modules/gpu/src/graphcuts.cpp
+++ b/modules/gpu/src/graphcuts.cpp
@ -77,8 +77,18 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
    NppStreamHandler h(stream);
 #if CUDART_VERSION > 4000 
    NppiGraphcutState* pState;
    nppSafeCall( nppiGraphcutInitAlloc(sznpp, &pState, buf.ptr<Npp8u>()) );
    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), pState) );
    nppSafeCall( nppiGraphcutFree(pState) );
 #else
    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );
 #endif
    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@ -935,6 +935,31 @@ void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
 {
 #if CUDART_VERSION > 4000 
    CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);
    dst.create(src.size(), CV_32FC1);
    NppiSize sz;
    sz.width = src.cols;
    sz.height = src.rows;
    NppiRect nppRect;
    nppRect.height = rect.height;
    nppRect.width = rect.width;
    nppRect.x = rect.x;
    nppRect.y = rect.y;
    cudaStream_t stream = StreamAccessor::getStream(s);
    NppStreamHandler h(stream);
    nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp64f>(), static_cast<int>(sqr.step),
                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
 #else
    CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);
    dst.create(src.size(), CV_32FC1);
@ -958,6 +983,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
 #endif
 }
--- a/modules/gpu/src/matrix_reductions.cpp
+++ b/modules/gpu/src/matrix_reductions.cpp
@ -117,7 +117,15 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
    DeviceBuffer dbuf(2);
 #if CUDART_VERSION > 4000 
    int bufSize;
    nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
    GpuMat buf(1, bufSize, CV_8UC1);
    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
 #else
    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, dbuf, (double*)dbuf + 1) );
 #endif
    cudaSafeCall( cudaDeviceSynchronize() );