diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index dadad00d8e..13f237b9d2 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -425,10 +425,10 @@ namespace cv CV_EXPORTS Scalar sum(const GpuMat& m); //! finds global minimum and maximum array elements and returns their values - CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0); + CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat()); //! finds global minimum and maximum array elements and returns their values - CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, GpuMat& buf); + CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf); //! finds global minimum and maximum array elements and returns their values with locations CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0); diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index df7c550df8..c7ca547976 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -65,8 +65,8 @@ double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; } double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; } void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); } Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); } -void cv::gpu::minMax(const GpuMat&, double*, double*) { throw_nogpu(); } -void cv::gpu::minMax(const GpuMat&, double*, double*, GpuMat&) { throw_nogpu(); } +void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&) { throw_nogpu(); } +void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*) { throw_nogpu(); } void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, GpuMat&, GpuMat&) { throw_nogpu(); } int cv::gpu::countNonZero(const GpuMat&) { throw_nogpu(); return 0; } @@ -502,62 +502,68 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmax { void min_max_caller(const DevMem2D src, double* minval, double* maxval, PtrStep buf); template - void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval, PtrStep buf); + void min_max_mask_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval, PtrStep buf); + + template + void min_max_multipass_caller(const DevMem2D src, double* minval, double* maxval, PtrStep buf); + + template + void min_max_mask_multipass_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval, PtrStep buf); }}}} -void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) +void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask) { GpuMat buf; - minMax(src, minVal, maxVal, buf); + minMax(src, minVal, maxVal, mask, buf); } - -void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, GpuMat& buf) +void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) { using namespace mathfunc::minmax; + typedef void (*Caller)(const DevMem2D, double*, double*, PtrStep); + static const Caller callers[2][7] = + { { min_max_multipass_caller, min_max_multipass_caller, + min_max_multipass_caller, min_max_multipass_caller, + min_max_multipass_caller, min_max_multipass_caller, 0 }, + { min_max_caller, min_max_caller, + min_max_caller, min_max_caller, + min_max_caller, min_max_caller, min_max_caller } }; + + typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, PtrStep); + static const MaskedCaller masked_callers[2][7] = + { { min_max_mask_multipass_caller, min_max_mask_multipass_caller, + min_max_mask_multipass_caller, min_max_mask_multipass_caller, + min_max_mask_multipass_caller, min_max_mask_multipass_caller, 0 }, + { min_max_mask_caller, min_max_mask_caller, + min_max_mask_caller, min_max_mask_caller, + min_max_mask_caller, min_max_mask_caller, + min_max_mask_caller } }; + + + CV_Assert(src.channels() == 1); + CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); + CV_Assert(src.type() != CV_64F || hasNativeDoubleSupport(getDevice())); + double minVal_; if (!minVal) minVal = &minVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_; - - GpuMat src_ = src.reshape(1); Size bufSize; get_buf_size_required(src.elemSize(), bufSize.width, bufSize.height); buf.create(bufSize, CV_8U); - int device = getDevice(); - if (hasAtomicsSupport(device)) + if (mask.empty()) { - switch (src_.type()) - { - case CV_8U: min_max_caller(src_, minVal, maxVal, buf); break; - case CV_8S: min_max_caller(src_, minVal, maxVal, buf); break; - case CV_16U: min_max_caller(src_, minVal, maxVal, buf); break; - case CV_16S: min_max_caller(src_, minVal, maxVal, buf); break; - case CV_32S: min_max_caller(src_, minVal, maxVal, buf); break; - case CV_32F: min_max_caller(src_, minVal, maxVal, buf); break; - case CV_64F: - if (hasNativeDoubleSupport(device)) - { - min_max_caller(src_, minVal, maxVal, buf); - break; - } - default: CV_Error(CV_StsBadArg, "minMax: unsupported type"); - } + Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()]; + if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); + caller(src, minVal, maxVal, buf); } else { - switch (src_.type()) - { - case CV_8U: min_max_caller_2steps(src_, minVal, maxVal, buf); break; - case CV_8S: min_max_caller_2steps(src_, minVal, maxVal, buf); break; - case CV_16U: min_max_caller_2steps(src_, minVal, maxVal, buf); break; - case CV_16S: min_max_caller_2steps(src_, minVal, maxVal, buf); break; - case CV_32S: min_max_caller_2steps(src_, minVal, maxVal, buf); break; - case CV_32F: min_max_caller_2steps(src_, minVal, maxVal, buf); break; - default: CV_Error(CV_StsBadArg, "minMax: unsupported type"); - } + MaskedCaller caller = masked_callers[hasAtomicsSupport(getDevice())][src.type()]; + if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); + caller(src, mask, minVal, maxVal, buf); } } @@ -575,7 +581,7 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc { int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf); template - void min_max_loc_caller_2steps(const DevMem2D src, double* minval, double* maxval, + void min_max_loc_multipass_caller(const DevMem2D src, double* minval, double* maxval, int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf); }}}} @@ -627,12 +633,12 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point { switch (src.type()) { - case CV_8U: min_max_loc_caller_2steps(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; - case CV_8S: min_max_loc_caller_2steps(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; - case CV_16U: min_max_loc_caller_2steps(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; - case CV_16S: min_max_loc_caller_2steps(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; - case CV_32S: min_max_loc_caller_2steps(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; - case CV_32F: min_max_loc_caller_2steps(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; + case CV_8U: min_max_loc_multipass_caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; + case CV_8S: min_max_loc_multipass_caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; + case CV_16U: min_max_loc_multipass_caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; + case CV_16S: min_max_loc_multipass_caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; + case CV_32S: min_max_loc_multipass_caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; + case CV_32F: min_max_loc_multipass_caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf); break; default: CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type"); } } @@ -652,7 +658,7 @@ namespace cv { namespace gpu { namespace mathfunc { namespace countnonzero { int count_non_zero_caller(const DevMem2D src, PtrStep buf); template - int count_non_zero_caller_2steps(const DevMem2D src, PtrStep buf); + int count_non_zero_multipass_caller(const DevMem2D src, PtrStep buf); }}}} @@ -691,12 +697,12 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) { switch (src.type()) { - case CV_8U: return count_non_zero_caller_2steps(src, buf); - case CV_8S: return count_non_zero_caller_2steps(src, buf); - case CV_16U: return count_non_zero_caller_2steps(src, buf); - case CV_16S: return count_non_zero_caller_2steps(src, buf); - case CV_32S: return count_non_zero_caller_2steps(src, buf); - case CV_32F: return count_non_zero_caller_2steps(src, buf); + case CV_8U: return count_non_zero_multipass_caller(src, buf); + case CV_8S: return count_non_zero_multipass_caller(src, buf); + case CV_16U: return count_non_zero_multipass_caller(src, buf); + case CV_16S: return count_non_zero_multipass_caller(src, buf); + case CV_32S: return count_non_zero_multipass_caller(src, buf); + case CV_32F: return count_non_zero_multipass_caller(src, buf); } } diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index ce927d3cf3..3f515e65bf 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -480,8 +480,8 @@ namespace cv { namespace gpu { namespace mathfunc } - template - __global__ void min_max_kernel(const DevMem2D src, T* minval, T* maxval) + template + __global__ void min_max_kernel(const DevMem2D src, Mask mask, T* minval, T* maxval) { typedef typename MinMaxTypeTraits::best_type best_type; __shared__ best_type sminval[nthreads]; @@ -491,17 +491,21 @@ namespace cv { namespace gpu { namespace mathfunc unsigned int y0 = blockIdx.y * blockDim.y * ctheight + threadIdx.y; unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x; - T val; T mymin = numeric_limits_gpu::max(); T mymax = numeric_limits_gpu::min(); - for (unsigned int y = 0; y < ctheight && y0 + y * blockDim.y < src.rows; ++y) + unsigned int y_end = min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows); + unsigned int x_end = min(x0 + (ctwidth - 1) * blockDim.x + 1, src.cols); + for (unsigned int y = y0; y < y_end; y += blockDim.y) { - const T* ptr = (const T*)src.ptr(y0 + y * blockDim.y); - for (unsigned int x = 0; x < ctwidth && x0 + x * blockDim.x < src.cols; ++x) + const T* src_row = (const T*)src.ptr(y); + for (unsigned int x = x0; x < x_end; x += blockDim.x) { - val = ptr[x0 + x * blockDim.x]; - mymin = min(mymin, val); - mymax = max(mymax, val); + T val = src_row[x]; + if (mask(y, x)) + { + mymin = min(mymin, val); + mymax = max(mymax, val); + } } } @@ -559,6 +563,35 @@ namespace cv { namespace gpu { namespace mathfunc } + template + void min_max_mask_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval, PtrStep buf) + { + dim3 threads, grid; + estimate_thread_cfg(threads, grid); + estimate_kernel_consts(src.cols, src.rows, threads, grid); + + T* minval_buf = (T*)buf.ptr(0); + T* maxval_buf = (T*)buf.ptr(1); + + min_max_kernel<256, T, Mask8U><<>>(src, Mask8U(mask), minval_buf, maxval_buf); + cudaSafeCall(cudaThreadSynchronize()); + + T minval_, maxval_; + cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost)); + *minval = minval_; + *maxval = maxval_; + } + + template void min_max_mask_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + + template void min_max_caller(const DevMem2D src, double* minval, double* maxval, PtrStep buf) { @@ -569,7 +602,7 @@ namespace cv { namespace gpu { namespace mathfunc T* minval_buf = (T*)buf.ptr(0); T* maxval_buf = (T*)buf.ptr(1); - min_max_kernel<256, T><<>>(src, minval_buf, maxval_buf); + min_max_kernel<256, T, MaskTrue><<>>(src, MaskTrue(), minval_buf, maxval_buf); cudaSafeCall(cudaThreadSynchronize()); T minval_, maxval_; @@ -584,13 +617,12 @@ namespace cv { namespace gpu { namespace mathfunc template void min_max_caller(const DevMem2D, double*, double*, PtrStep); template void min_max_caller(const DevMem2D, double*, double*, PtrStep); template void min_max_caller(const DevMem2D, double*, double*, PtrStep); - template void min_max_caller(const DevMem2D, double*, double*, PtrStep); + template void min_max_caller(const DevMem2D, double*,double*, PtrStep); template void min_max_caller(const DevMem2D, double*, double*, PtrStep); - // This kernel will be used only when compute capability is 1.0 template - __global__ void min_max_kernel_2ndstep(T* minval, T* maxval, int size) + __global__ void min_max_pass2_kernel(T* minval, T* maxval, int size) { typedef typename MinMaxTypeTraits::best_type best_type; __shared__ best_type sminval[nthreads]; @@ -615,7 +647,7 @@ namespace cv { namespace gpu { namespace mathfunc template - void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval, PtrStep buf) + void min_max_mask_multipass_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval, PtrStep buf) { dim3 threads, grid; estimate_thread_cfg(threads, grid); @@ -624,8 +656,8 @@ namespace cv { namespace gpu { namespace mathfunc T* minval_buf = (T*)buf.ptr(0); T* maxval_buf = (T*)buf.ptr(1); - min_max_kernel<256, T><<>>(src, minval_buf, maxval_buf); - min_max_kernel_2ndstep<256, T><<<1, 256>>>(minval_buf, maxval_buf, grid.x * grid.y); + min_max_kernel<256, T, Mask8U><<>>(src, Mask8U(mask), minval_buf, maxval_buf); + min_max_pass2_kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, grid.x * grid.y); cudaSafeCall(cudaThreadSynchronize()); T minval_, maxval_; @@ -635,12 +667,41 @@ namespace cv { namespace gpu { namespace mathfunc *maxval = maxval_; } - template void min_max_caller_2steps(const DevMem2D, double*, double*, PtrStep); - template void min_max_caller_2steps(const DevMem2D, double*, double*, PtrStep); - template void min_max_caller_2steps(const DevMem2D, double*, double*, PtrStep); - template void min_max_caller_2steps(const DevMem2D, double*, double*, PtrStep); - template void min_max_caller_2steps(const DevMem2D, double*, double*, PtrStep); - template void min_max_caller_2steps(const DevMem2D, double*, double*, PtrStep); + template void min_max_mask_multipass_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_multipass_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_multipass_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_multipass_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_multipass_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + template void min_max_mask_multipass_caller(const DevMem2D, const PtrStep, double*, double*, PtrStep); + + + template + void min_max_multipass_caller(const DevMem2D src, double* minval, double* maxval, PtrStep buf) + { + dim3 threads, grid; + estimate_thread_cfg(threads, grid); + estimate_kernel_consts(src.cols, src.rows, threads, grid); + + T* minval_buf = (T*)buf.ptr(0); + T* maxval_buf = (T*)buf.ptr(1); + + min_max_kernel<256, T, MaskTrue><<>>(src, MaskTrue(), minval_buf, maxval_buf); + min_max_pass2_kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, grid.x * grid.y); + cudaSafeCall(cudaThreadSynchronize()); + + T minval_, maxval_; + cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost)); + *minval = minval_; + *maxval = maxval_; + } + + template void min_max_multipass_caller(const DevMem2D, double*, double*, PtrStep); + template void min_max_multipass_caller(const DevMem2D, double*, double*, PtrStep); + template void min_max_multipass_caller(const DevMem2D, double*, double*, PtrStep); + template void min_max_multipass_caller(const DevMem2D, double*, double*, PtrStep); + template void min_max_multipass_caller(const DevMem2D, double*, double*, PtrStep); + template void min_max_multipass_caller(const DevMem2D, double*, double*, PtrStep); } // namespace minmax @@ -861,7 +922,7 @@ namespace cv { namespace gpu { namespace mathfunc // This kernel will be used only when compute capability is 1.0 template - __global__ void min_max_loc_kernel_2ndstep(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int size) + __global__ void min_max_loc_pass2_kernel(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int size) { typedef typename MinMaxTypeTraits::best_type best_type; __shared__ best_type sminval[nthreads]; @@ -892,7 +953,7 @@ namespace cv { namespace gpu { namespace mathfunc template - void min_max_loc_caller_2steps(const DevMem2D src, double* minval, double* maxval, + void min_max_loc_multipass_caller(const DevMem2D src, double* minval, double* maxval, int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf) { dim3 threads, grid; @@ -905,7 +966,7 @@ namespace cv { namespace gpu { namespace mathfunc unsigned int* maxloc_buf = (unsigned int*)locbuf.ptr(1); min_max_loc_kernel<256, T><<>>(src, minval_buf, maxval_buf, minloc_buf, maxloc_buf); - min_max_loc_kernel_2ndstep<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y); + min_max_loc_pass2_kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y); cudaSafeCall(cudaThreadSynchronize()); T minval_, maxval_; @@ -921,12 +982,12 @@ namespace cv { namespace gpu { namespace mathfunc maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols; } - template void min_max_loc_caller_2steps(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); - template void min_max_loc_caller_2steps(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); - template void min_max_loc_caller_2steps(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); - template void min_max_loc_caller_2steps(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); - template void min_max_loc_caller_2steps(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); - template void min_max_loc_caller_2steps(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); + template void min_max_loc_multipass_caller(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); + template void min_max_loc_multipass_caller(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); + template void min_max_loc_multipass_caller(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); + template void min_max_loc_multipass_caller(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); + template void min_max_loc_multipass_caller(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); + template void min_max_loc_multipass_caller(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep); } // namespace minmaxloc @@ -1070,7 +1131,7 @@ namespace cv { namespace gpu { namespace mathfunc template - __global__ void count_non_zero_kernel_2ndstep(unsigned int* count, int size) + __global__ void count_non_zero_pass2_kernel(unsigned int* count, int size) { __shared__ unsigned int scount[nthreads]; unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x; @@ -1087,7 +1148,7 @@ namespace cv { namespace gpu { namespace mathfunc template - int count_non_zero_caller_2steps(const DevMem2D src, PtrStep buf) + int count_non_zero_multipass_caller(const DevMem2D src, PtrStep buf) { dim3 threads, grid; estimate_thread_cfg(threads, grid); @@ -1096,7 +1157,7 @@ namespace cv { namespace gpu { namespace mathfunc unsigned int* count_buf = (unsigned int*)buf.ptr(0); count_non_zero_kernel<256, T><<>>(src, count_buf); - count_non_zero_kernel_2ndstep<256, T><<<1, 256>>>(count_buf, grid.x * grid.y); + count_non_zero_pass2_kernel<256, T><<<1, 256>>>(count_buf, grid.x * grid.y); cudaSafeCall(cudaThreadSynchronize()); unsigned int count; @@ -1105,12 +1166,12 @@ namespace cv { namespace gpu { namespace mathfunc return count; } - template int count_non_zero_caller_2steps(const DevMem2D, PtrStep); - template int count_non_zero_caller_2steps(const DevMem2D, PtrStep); - template int count_non_zero_caller_2steps(const DevMem2D, PtrStep); - template int count_non_zero_caller_2steps(const DevMem2D, PtrStep); - template int count_non_zero_caller_2steps(const DevMem2D, PtrStep); - template int count_non_zero_caller_2steps(const DevMem2D, PtrStep); + template int count_non_zero_multipass_caller(const DevMem2D, PtrStep); + template int count_non_zero_multipass_caller(const DevMem2D, PtrStep); + template int count_non_zero_multipass_caller(const DevMem2D, PtrStep); + template int count_non_zero_multipass_caller(const DevMem2D, PtrStep); + template int count_non_zero_multipass_caller(const DevMem2D, PtrStep); + template int count_non_zero_multipass_caller(const DevMem2D, PtrStep); } // namespace countnonzero diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp index b9f0b1624c..bf212f645f 100644 --- a/tests/gpu/src/arithm.cpp +++ b/tests/gpu/src/arithm.cpp @@ -682,16 +682,16 @@ struct CV_GpuMinMaxTest: public CvTest { int depth_end; if (cv::gpu::hasNativeDoubleSupport(cv::gpu::getDevice())) depth_end = CV_64F; else depth_end = CV_32F; - for (int cn = 1; cn <= 4; ++cn) - for (int depth = CV_8U; depth <= depth_end; ++depth) + for (int depth = CV_8U; depth <= depth_end; ++depth) + { + for (int i = 0; i < 1; ++i) { - for (int i = 0; i < 1; ++i) - { - int rows = 1 + rand() % 1000; - int cols = 1 + rand() % 1000; - test(rows, cols, cn, depth); - } + int rows = 1 + rand() % 1000; + int cols = 1 + rand() % 1000; + test(rows, cols, 1, depth); + test_masked(rows, cols, 1, depth); } + } } void test(int rows, int cols, int cn, int depth) @@ -707,10 +707,59 @@ struct CV_GpuMinMaxTest: public CvTest double minVal, maxVal; cv::Point minLoc, maxLoc; + if (depth != CV_8S) + { + cv::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = std::numeric_limits::min(); + for (int i = 0; i < src.rows; ++i) + for (int j = 0; j < src.cols; ++j) + { + signed char val = src.at(i, j); + if (val < minVal) minVal = val; + if (val > maxVal) maxVal = val; + } + } + + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_, cv::gpu::GpuMat(), buf); + + if (abs(minVal - minVal_) > 1e-3f) + { + ts->printf(CvTS::CONSOLE, "\nfail: minVal=%f minVal_=%f rows=%d cols=%d depth=%d cn=%d\n", minVal, minVal_, rows, cols, depth, cn); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + } + if (abs(maxVal - maxVal_) > 1e-3f) + { + ts->printf(CvTS::CONSOLE, "\nfail: maxVal=%f maxVal_=%f rows=%d cols=%d depth=%d cn=%d\n", maxVal, maxVal_, rows, cols, depth, cn); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + } + } + + void test_masked(int rows, int cols, int cn, int depth) + { + cv::Mat src(rows, cols, CV_MAKE_TYPE(depth, cn)); + cv::RNG rng; + for (int i = 0; i < src.rows; ++i) + { + Mat row(1, src.cols * src.elemSize(), CV_8U, src.ptr(i)); + rng.fill(row, RNG::UNIFORM, Scalar(0), Scalar(255)); + } + + cv::Mat mask(src.size(), CV_8U); + rng.fill(mask, RNG::UNIFORM, Scalar(0), Scalar(2)); + + double minVal, maxVal; + cv::Point minLoc, maxLoc; + Mat src_ = src.reshape(1); if (depth != CV_8S) { - cv::minMaxLoc(src_, &minVal, &maxVal, &minLoc, &maxLoc); + cv::minMaxLoc(src_, &minVal, &maxVal, &minLoc, &maxLoc, mask); } else { @@ -721,14 +770,14 @@ struct CV_GpuMinMaxTest: public CvTest for (int j = 0; j < src_.cols; ++j) { char val = src_.at(i, j); - if (val < minVal) minVal = val; - if (val > maxVal) maxVal = val; + if (mask.at(i, j)) { if (val < minVal) minVal = val; } + if (mask.at(i, j)) { if (val > maxVal) maxVal = val; } } } double minVal_, maxVal_; cv::Point minLoc_, maxLoc_; - cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_, buf); + cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_, cv::gpu::GpuMat(mask), buf); if (abs(minVal - minVal_) > 1e-3f) {