diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 0e45fa46c2..9e0e7f86c3 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -328,13 +328,13 @@ namespace cv { namespace gpu { namespace mathfunc __shared__ best_type smaxval[nthreads]; uint tid = threadIdx.y * blockDim.x + threadIdx.x; - uint idx = min(tid, gridDim.x * gridDim.y - 1); + uint idx = min(tid, size - 1); sminval[tid] = minval[idx]; smaxval[tid] = maxval[idx]; __syncthreads(); - findMinMaxInSmem(sminval, smaxval, tid); + findMinMaxInSmem(sminval, smaxval, tid); if (tid == 0) { @@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc // Returns required buffer sizes void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols, - int& b1rows, int& b2cols, int& b2rows) + int& b1rows, int& b2cols, int& b2rows) { dim3 threads, grid; estimateThreadCfg(cols, rows, threads, grid); @@ -623,7 +623,7 @@ namespace cv { namespace gpu { namespace mathfunc template void minMaxLocCaller(const DevMem2D src, double* minval, double* maxval, - int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf) + int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf) { dim3 threads, grid; estimateThreadCfg(src.cols, src.rows, threads, grid); @@ -671,7 +671,7 @@ namespace cv { namespace gpu { namespace mathfunc __shared__ uint smaxloc[nthreads]; uint tid = threadIdx.y * blockDim.x + threadIdx.x; - uint idx = min(tid, gridDim.x * gridDim.y - 1); + uint idx = min(tid, size - 1); sminval[tid] = minval[idx]; smaxval[tid] = maxval[idx]; @@ -679,7 +679,7 @@ namespace cv { namespace gpu { namespace mathfunc smaxloc[tid] = maxloc[idx]; __syncthreads(); - findMinMaxLocInSmem(sminval, smaxval, sminloc, smaxloc, tid); + findMinMaxLocInSmem(sminval, smaxval, sminloc, smaxloc, tid); if (tid == 0) { @@ -1150,7 +1150,7 @@ namespace cv { namespace gpu { namespace mathfunc const int tid = threadIdx.y * blockDim.x + threadIdx.x; - DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits::all(0); + DstType res = tid < size ? result[tid] : VecTraits::all(0); smem[tid] = res.x; smem[tid + nthreads] = res.y; __syncthreads(); @@ -1262,7 +1262,7 @@ namespace cv { namespace gpu { namespace mathfunc const int tid = threadIdx.y * blockDim.x + threadIdx.x; - DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits::all(0); + DstType res = tid < size ? result[tid] : VecTraits::all(0); smem[tid] = res.x; smem[tid + nthreads] = res.y; smem[tid + 2 * nthreads] = res.z; @@ -1384,7 +1384,7 @@ namespace cv { namespace gpu { namespace mathfunc const int tid = threadIdx.y * blockDim.x + threadIdx.x; - DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits::all(0); + DstType res = tid < size ? result[tid] : VecTraits::all(0); smem[tid] = res.x; smem[tid + nthreads] = res.y; smem[tid + 2 * nthreads] = res.z; diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index bee31e5c13..0104bd02a2 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -41,7 +41,6 @@ //M*/ #include "precomp.hpp" -#include using namespace cv; using namespace cv::gpu; diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 732d36b8d0..abfc89476f 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -276,11 +276,11 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp minMaxMaskCaller }; CV_Assert(src.channels() == 1); + CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); - bool double_ok = hasGreaterOrEqualVersion(1, 3) && - hasNativeDoubleSupport(getDevice()); - CV_Assert(src.type() != CV_64F || double_ok); + CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && + hasNativeDoubleSupport(getDevice()))); double minVal_; if (!minVal) minVal = &minVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_; @@ -375,11 +375,11 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point minMaxLocMaskCaller }; CV_Assert(src.channels() == 1); + CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); - bool double_ok = hasGreaterOrEqualVersion(1, 3) && - hasNativeDoubleSupport(getDevice()); - CV_Assert(src.type() != CV_64F || double_ok); + CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && + hasNativeDoubleSupport(getDevice()))); double minVal_; if (!minVal) minVal = &minVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_; @@ -388,7 +388,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point Size valbuf_size, locbuf_size; getBufSizeRequired(src.cols, src.rows, src.elemSize(), valbuf_size.width, - valbuf_size.height, locbuf_size.width, locbuf_size.height); + valbuf_size.height, locbuf_size.width, locbuf_size.height); ensureSizeIsEnough(valbuf_size, CV_8U, valBuf); ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); @@ -459,9 +459,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) CV_Assert(src.channels() == 1); - bool double_ok = hasGreaterOrEqualVersion(1, 3) && - hasNativeDoubleSupport(getDevice()); - CV_Assert(src.type() != CV_64F || double_ok); + CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && + hasNativeDoubleSupport(getDevice()))); Size buf_size; getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height); diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index 03acb4a2bd..e0f2e004e2 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -57,6 +57,7 @@ #include #include #include +#include #include "opencv2/gpu/gpu.hpp" #include "opencv2/imgproc/imgproc.hpp" diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp index 8fea5401bb..3bc0e7318e 100644 --- a/tests/gpu/src/arithm.cpp +++ b/tests/gpu/src/arithm.cpp @@ -49,7 +49,7 @@ using namespace std; using namespace gpu; #define CHECK(pred, err) if (!(pred)) { \ - ts->printf(CvTS::LOG, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \ + ts->printf(CvTS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \ ts->set_failed_test_info(err); \ return; }