diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu
index 0e45fa46c2..9e0e7f86c3 100644
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
@@ -328,13 +328,13 @@ namespace cv { namespace gpu { namespace mathfunc
         __shared__ best_type smaxval[nthreads];
         
         uint tid = threadIdx.y * blockDim.x + threadIdx.x;
-        uint idx = min(tid, gridDim.x * gridDim.y - 1);
+        uint idx = min(tid, size - 1);
 
         sminval[tid] = minval[idx];
         smaxval[tid] = maxval[idx];
         __syncthreads();
 
-		findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);
+        findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);
 
         if (tid == 0) 
         {
@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc
 
     // Returns required buffer sizes
     void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols, 
-                               int& b1rows, int& b2cols, int& b2rows)
+                            int& b1rows, int& b2cols, int& b2rows)
     {
         dim3 threads, grid;
         estimateThreadCfg(cols, rows, threads, grid);
@@ -623,7 +623,7 @@ namespace cv { namespace gpu { namespace mathfunc
 
     template <typename T>
     void minMaxLocCaller(const DevMem2D src, double* minval, double* maxval, 
-                            int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
+                         int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
     {
         dim3 threads, grid;
         estimateThreadCfg(src.cols, src.rows, threads, grid);
@@ -671,7 +671,7 @@ namespace cv { namespace gpu { namespace mathfunc
         __shared__ uint smaxloc[nthreads];
 
         uint tid = threadIdx.y * blockDim.x + threadIdx.x;
-        uint idx = min(tid, gridDim.x * gridDim.y - 1);
+        uint idx = min(tid, size - 1);
 
         sminval[tid] = minval[idx];
         smaxval[tid] = maxval[idx];
@@ -679,7 +679,7 @@ namespace cv { namespace gpu { namespace mathfunc
         smaxloc[tid] = maxloc[idx];
         __syncthreads();
 
-		findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
+        findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
 
         if (tid == 0) 
         {
@@ -1150,7 +1150,7 @@ namespace cv { namespace gpu { namespace mathfunc
 
         const int tid = threadIdx.y * blockDim.x + threadIdx.x;
 
-        DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);
+        DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);
         smem[tid] = res.x;
         smem[tid + nthreads] = res.y;
         __syncthreads();
@@ -1262,7 +1262,7 @@ namespace cv { namespace gpu { namespace mathfunc
 
         const int tid = threadIdx.y * blockDim.x + threadIdx.x;
 
-        DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);
+        DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);
         smem[tid] = res.x;
         smem[tid + nthreads] = res.y;
         smem[tid + 2 * nthreads] = res.z;
@@ -1384,7 +1384,7 @@ namespace cv { namespace gpu { namespace mathfunc
 
         const int tid = threadIdx.y * blockDim.x + threadIdx.x;
 
-        DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);
+        DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);
         smem[tid] = res.x;
         smem[tid + nthreads] = res.y;
         smem[tid + 2 * nthreads] = res.z;
diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp
index bee31e5c13..0104bd02a2 100644
--- a/modules/gpu/src/initialization.cpp
+++ b/modules/gpu/src/initialization.cpp
@@ -41,7 +41,6 @@
 //M*/
 
 #include "precomp.hpp"
-#include <functional>
 
 using namespace cv;
 using namespace cv::gpu;
diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp
index 732d36b8d0..abfc89476f 100644
--- a/modules/gpu/src/matrix_reductions.cpp
+++ b/modules/gpu/src/matrix_reductions.cpp
@@ -276,11 +276,11 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
             minMaxMaskCaller<double> };
 
     CV_Assert(src.channels() == 1);
+
     CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
 
-    bool double_ok = hasGreaterOrEqualVersion(1, 3) && 
-                     hasNativeDoubleSupport(getDevice());
-    CV_Assert(src.type() != CV_64F || double_ok);
+    CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && 
+                                       hasNativeDoubleSupport(getDevice())));
 
     double minVal_; if (!minVal) minVal = &minVal_;
     double maxVal_; if (!maxVal) maxVal = &maxVal_;
@@ -375,11 +375,11 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
             minMaxLocMaskCaller<double> };
 
     CV_Assert(src.channels() == 1);
+
     CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
 
-    bool double_ok = hasGreaterOrEqualVersion(1, 3) && 
-                     hasNativeDoubleSupport(getDevice());
-    CV_Assert(src.type() != CV_64F || double_ok);
+    CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && 
+                                       hasNativeDoubleSupport(getDevice())));
 
     double minVal_; if (!minVal) minVal = &minVal_;
     double maxVal_; if (!maxVal) maxVal = &maxVal_;
@@ -388,7 +388,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
 
     Size valbuf_size, locbuf_size;
     getBufSizeRequired(src.cols, src.rows, src.elemSize(), valbuf_size.width, 
-                          valbuf_size.height, locbuf_size.width, locbuf_size.height);
+                       valbuf_size.height, locbuf_size.width, locbuf_size.height);
     ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
     ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);
 
@@ -459,9 +459,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
 
     CV_Assert(src.channels() == 1);
 
-    bool double_ok = hasGreaterOrEqualVersion(1, 3) && 
-                     hasNativeDoubleSupport(getDevice());
-    CV_Assert(src.type() != CV_64F || double_ok);
+    CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && 
+                                       hasNativeDoubleSupport(getDevice())));
 
     Size buf_size;
     getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index 03acb4a2bd..e0f2e004e2 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -57,6 +57,7 @@
 #include <sstream>
 #include <exception>
 #include <iterator>
+#include <functional>
 
 #include "opencv2/gpu/gpu.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp
index 8fea5401bb..3bc0e7318e 100644
--- a/tests/gpu/src/arithm.cpp
+++ b/tests/gpu/src/arithm.cpp
@@ -49,7 +49,7 @@ using namespace std;
 using namespace gpu;
 
 #define CHECK(pred, err) if (!(pred)) { \
-    ts->printf(CvTS::LOG, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \
+    ts->printf(CvTS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \
     ts->set_failed_test_info(err); \
     return; }