From d6a7a6d5030a62c6b34b2dad8c89d3de1cc74baa Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Tue, 29 Oct 2013 18:14:41 +0400
Subject: [PATCH 01/28] VideoCapture: copy the captured frame, to avoid
 dangling Mats

Previously, VideoCapture::retrieve would return a Mat that referenced
the internal IplImage. Since the latter is rewritten every time a
frame is captured, it means that if the user captures two frames in a row,
the first frame would reference nothing. Similar if a user captures a frame,
then destroys the VideoCapture instance.

Note that the other branch of the if isn't affected, since flip allocates
a new Mat.
---
 modules/highgui/src/cap.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp
index c97db180fe..bbfcc85964 100644
--- a/modules/highgui/src/cap.cpp
+++ b/modules/highgui/src/cap.cpp
@@ -523,7 +523,7 @@ bool VideoCapture::retrieve(Mat& image, int channel)
         return false;
     }
     if(_img->origin == IPL_ORIGIN_TL)
-        image = Mat(_img);
+        Mat(_img).copyTo(image);
     else
     {
         Mat temp(_img);

From e1b2f593d6463674b9bb5be20f3da9570759993f Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@willowgarage.com>
Date: Fri, 1 Nov 2013 19:30:58 +0100
Subject: [PATCH 02/28] fix crash when path has spaces The paths are defined
 properly with an escape "\ " but you cannot have an escape and quotes when
 piping (otherwise, escapes are understood as 2 characters). So just remove
 the quotes.

---
 modules/java/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt
index a4d895a6b9..52193bea25 100644
--- a/modules/java/CMakeLists.txt
+++ b/modules/java/CMakeLists.txt
@@ -134,7 +134,7 @@ endforeach()
 set(step2_depends ${step1_depends} ${scripts_gen_javadoc} ${scripts_rst_parser} ${javadoc_rst_sources} ${generated_java_sources} ${handwrittren_java_sources})
 string(REPLACE ";" "," OPENCV_JAVA_MODULES_STR "${OPENCV_JAVA_MODULES}")
 add_custom_command(OUTPUT ${documented_java_files}
-                   COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_javadoc}" --modules ${OPENCV_JAVA_MODULES_STR} "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java" "${CMAKE_CURRENT_BINARY_DIR}" 2>"${CMAKE_CURRENT_BINARY_DIR}/get_javadoc_errors.log"
+                   COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_javadoc}" --modules ${OPENCV_JAVA_MODULES_STR} "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java" "${CMAKE_CURRENT_BINARY_DIR}" 2>${CMAKE_CURRENT_BINARY_DIR}/get_javadoc_errors.log
                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                    DEPENDS ${step2_depends}
                   )

From 8c1eb5bf0e09b0b29e935f9b43786e2d5a830477 Mon Sep 17 00:00:00 2001
From: peng xiao <hisenxpress@gmail.com>
Date: Mon, 4 Nov 2013 14:59:28 +0800
Subject: [PATCH 03/28] Overload detectMultiScale API for ocl::haar.

---
 modules/ocl/include/opencv2/ocl/ocl.hpp |  3 +++
 modules/ocl/src/haar.cpp                | 29 +++++++++++++++++++------
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index af24f0aca2..40746bf32c 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -898,6 +898,9 @@ namespace cv
 
             CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
                                         int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
+            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
+                double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
+                Size minSize = Size(), Size maxSize = Size());
         };
 
         class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index 31f6742811..b79ec0fdc7 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -1186,6 +1186,28 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
     return result_seq;
 }
 
+
+struct getRect
+{
+    Rect operator()(const CvAvgComp &e) const
+    {
+        return e.rect;
+    }
+};
+
+void cv::ocl::OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv::Rect>& faces,
+                                                        double scaleFactor, int minNeighbors, int flags,
+                                                        Size minSize, Size maxSize)
+{
+    CvSeq* _objects;
+    MemStorage storage(cvCreateMemStorage(0));
+    _objects = oclHaarDetectObjects(gimg, storage, scaleFactor, minNeighbors, flags, minSize, maxSize);
+    vector<CvAvgComp> vecAvgComp;
+    Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
+    faces.resize(vecAvgComp.size());
+    std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
+}
+
 struct OclBuffers
 {
     cl_mem stagebuffer;
@@ -1197,13 +1219,6 @@ struct OclBuffers
     cl_mem newnodebuffer;
 };
 
-struct getRect
-{
-    Rect operator()(const CvAvgComp &e) const
-    {
-        return e.rect;
-    }
-};
 
 void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv::Rect>& faces,
                                                         double scaleFactor, int minNeighbors, int flags,

From a8426e1c12b030356c75644439af4ac151c0932b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 4 Nov 2013 15:09:58 +0400
Subject: [PATCH 04/28] fixed ocl::cornerHarris, ocl::cornerMinEigenVal and
 their accuracy tests

---
 modules/ocl/src/opencl/imgproc_calcHarris.cl  |  4 ++-
 .../ocl/src/opencl/imgproc_calcMinEigenVal.cl |  4 ++-
 modules/ocl/test/test_imgproc.cpp             | 30 +++++++++----------
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/modules/ocl/src/opencl/imgproc_calcHarris.cl b/modules/ocl/src/opencl/imgproc_calcHarris.cl
index 3f53ddf9a5..02811dd69a 100644
--- a/modules/ocl/src/opencl/imgproc_calcHarris.cl
+++ b/modules/ocl/src/opencl/imgproc_calcHarris.cl
@@ -125,10 +125,12 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
         int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
         float dx_s = dx_con ? Dx[indexDx] : 0.0f;
         dx_data[i] = dx_s;
+
         bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
         int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dx_con ? Dy[indexDy] : 0.0f;
+        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
         dy_data[i] = dy_s;
+
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
         data[2][i] = dy_data[i] * dy_data[i];
diff --git a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
index c598246aec..7cb4c8ff3f 100644
--- a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
+++ b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
@@ -124,10 +124,12 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
         int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
         float dx_s = dx_con ? Dx[indexDx] : 0.0f;
         dx_data[i] = dx_s;
+
         bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
         int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dx_con ? Dy[indexDy] : 0.0f;
+        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
         dy_data[i] = dy_s;
+
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
         data[2][i] = dy_data[i] * dy_data[i];
diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 7e4b14ecae..634633a2a3 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -93,22 +93,14 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType,
         generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
     }
 
-    void Near(double threshold = 0.0, bool relative = false)
+    void Near(double threshold = 0.0)
     {
         Mat roi, whole;
         gdst_whole.download(whole);
         gdst_roi.download(roi);
 
-        if (relative)
-        {
-            EXPECT_MAT_NEAR_RELATIVE(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR_RELATIVE(dst_roi, roi, threshold);
-        }
-        else
-        {
-            EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-        }
+        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
+        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
     }
 };
 
@@ -207,11 +199,19 @@ struct CornerTestBase :
         Mat image = readImageType("gpu/stereobm/aloe-L.png", type);
         ASSERT_FALSE(image.empty());
 
+        bool isFP = CV_MAT_DEPTH(type) >= CV_32F;
+        float val = 255.0f;
+        if (isFP)
+        {
+            image.convertTo(image, -1, 1.0 / 255);
+            val /= 255.0f;
+        }
+
         Size roiSize = image.size();
         Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
 
         Size wholeSize = Size(roiSize.width + srcBorder.lef + srcBorder.rig, roiSize.height + srcBorder.top + srcBorder.bot);
-        src = randomMat(wholeSize, type, -255, 255, false);
+        src = randomMat(wholeSize, type, -val, val, false);
         src_roi = src(Rect(srcBorder.lef, srcBorder.top, roiSize.width, roiSize.height));
         image.copyTo(src_roi);
 
@@ -236,7 +236,7 @@ OCL_TEST_P(CornerMinEigenVal, Mat)
         cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType);
         ocl::cornerMinEigenVal(gsrc_roi, gdst_roi, blockSize, apertureSize, borderType);
 
-        Near(1e-5, true);
+        Near(1e-6);
     }
 }
 
@@ -256,7 +256,7 @@ OCL_TEST_P(CornerHarris, Mat)
         cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType);
         ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType);
 
-        Near(1e-5, true);
+        Near(1e-6);
     }
 }
 
@@ -522,7 +522,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerMinEigenVal, Combine(
                             Bool()));
 
 INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
-                            Values((MatType)CV_8UC1), // TODO does not work properly with CV_32FC1
+                            Values((MatType)CV_8UC1, CV_32FC1),
                             Values(3, 5),
                             Values( (int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, (int)BORDER_REFLECT_101),
                             Bool()));

From c89dfd333c8f93c5c40a12621d95ac300b9885d2 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 4 Nov 2013 15:30:00 +0400
Subject: [PATCH 05/28] fixed warnings in OpenCL kernels

---
 modules/ocl/src/opencl/bgfg_mog.cl            | 22 +++++++++----------
 modules/ocl/src/opencl/haarobjectdetect.cl    |  1 -
 .../src/opencl/haarobjectdetect_scaled2.cl    |  2 --
 modules/ocl/src/opencl/tvl1flow.cl            | 16 +++-----------
 4 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
index 8621ff31b0..a13a30e900 100644
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -48,22 +48,22 @@
 #define T_MEAN_VAR float
 #define CONVERT_TYPE convert_uchar_sat
 #define F_ZERO (0.0f)
-float cvt(uchar val)
+inline float cvt(uchar val)
 {
     return val;
 }
 
-float sqr(float val)
+inline float sqr(float val)
 {
     return val * val;
 }
 
-float sum(float val)
+inline float sum(float val)
 {
     return val;
 }
 
-float clamp1(float var, float learningRate, float diff, float minVar)
+static float clamp1(float var, float learningRate, float diff, float minVar)
 {
     return fmax(var + learningRate * (diff * diff - var), minVar);
 }
@@ -72,7 +72,7 @@ float clamp1(float var, float learningRate, float diff, float minVar)
 #define T_MEAN_VAR float4
 #define CONVERT_TYPE convert_uchar4_sat
 #define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
-float4 cvt(const uchar4 val)
+inline float4 cvt(const uchar4 val)
 {
     float4 result;
     result.x = val.x;
@@ -83,17 +83,17 @@ float4 cvt(const uchar4 val)
     return result;
 }
 
-float sqr(const float4 val)
+inline float sqr(const float4 val)
 {
     return val.x * val.x + val.y * val.y + val.z * val.z;
 }
 
-float sum(const float4 val)
+inline float sum(const float4 val)
 {
     return (val.x + val.y + val.z);
 }
 
-float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
 {
     float4 result;
     result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
@@ -116,14 +116,14 @@ typedef struct
     uchar c_shadowVal;
 }con_srtuct_t;
 
-void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
 {
     float val = ptr[(k * rows + y) * ptr_step + x];
     ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
     ptr[((k + 1) * rows + y) * ptr_step + x] = val;
 }
 
-void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
 {
     float4 val = ptr[(k * rows + y) * ptr_step + x];
     ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
@@ -412,7 +412,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __glob
 
             if (_weight < -prune)
             {
-                _weight = 0.0;
+                _weight = 0.0f;
                 nmodes--;
             }
 
diff --git a/modules/ocl/src/opencl/haarobjectdetect.cl b/modules/ocl/src/opencl/haarobjectdetect.cl
index 58ebb4c014..a62b3af8cb 100644
--- a/modules/ocl/src/opencl/haarobjectdetect.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect.cl
@@ -292,7 +292,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
     for(int scalei = 0; scalei <loopcount; scalei++)
     {
         int4 scaleinfo1= info[scalei];
-        int width = (scaleinfo1.x & 0xffff0000) >> 16;
         int height = scaleinfo1.x & 0xffff;
         int grpnumperline =(scaleinfo1.y & 0xffff0000) >> 16;
         int totalgrp = scaleinfo1.y & 0xffff;
diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
index 3ace4470aa..72b94038cd 100644
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -136,8 +136,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
     {
         int4 scaleinfo1;
         scaleinfo1 = info[scalei];
-        int width = (scaleinfo1.x & 0xffff0000) >> 16;
-        int height = scaleinfo1.x & 0xffff;
         int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
         int totalgrp = scaleinfo1.y & 0xffff;
         float factor = as_float(scaleinfo1.w);
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl
index ca60fb70f6..2787f00dcd 100644
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -69,23 +69,16 @@ __global float* dx, __global float* dy, int dx_step)
 
 }
 
-float bicubicCoeff(float x_)
+static float bicubicCoeff(float x_)
 {
 
     float x = fabs(x_);
     if (x <= 1.0f)
-    {
         return x * x * (1.5f * x - 2.5f) + 1.0f;
-    }
     else if (x < 2.0f)
-    {
         return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
-    }
     else
-    {
         return 0.0f;
-    }
-
 }
 
 __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
@@ -170,12 +163,10 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
 
 }
 
-float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
+static float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
 {
     int i0 = clamp(x, 0, cols - 1);
     int j0 = clamp(y, 0, rows - 1);
-    int i1 = clamp(x + 1, 0, cols - 1);
-    int j1 = clamp(y + 1, 0, rows - 1);
 
     return image[j0 * elemCntPerRow + i0];
 }
@@ -303,7 +294,7 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
 
 }
 
-float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
+static float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
 {
 
     if (x > 0 && y > 0)
@@ -407,5 +398,4 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
             error[y * I1wx_step + x] = n1 + n2;
         }
     }
-
 }

From 2a111f7a6c5033db275d82cc1a9129b89ae19678 Mon Sep 17 00:00:00 2001
From: peng xiao <hisenxpress@gmail.com>
Date: Tue, 5 Nov 2013 10:40:27 +0800
Subject: [PATCH 06/28] Let perf/accuracy test of ocl haar uses
 detectMultiScale api. Fix image to be used by perf test.

---
 modules/ocl/perf/perf_haar.cpp      | 42 ++---------------------------
 modules/ocl/test/test_objdetect.cpp | 11 +++-----
 samples/ocl/facedetect.cpp          |  6 ++---
 3 files changed, 8 insertions(+), 51 deletions(-)

diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp
index 9ccaf31563..1e6ba1b646 100644
--- a/modules/ocl/perf/perf_haar.cpp
+++ b/modules/ocl/perf/perf_haar.cpp
@@ -48,49 +48,11 @@
 using namespace perf;
 
 ///////////// Haar ////////////////////////
-namespace cv
-{
-namespace ocl
-{
-
-struct getRect
-{
-    Rect operator()(const CvAvgComp &e) const
-    {
-        return e.rect;
-    }
-};
-
-class CascadeClassifier_GPU : public OclCascadeClassifier
-{
-public:
-    void detectMultiScale(oclMat &image,
-                          CV_OUT std::vector<cv::Rect>& faces,
-                          double scaleFactor = 1.1,
-                          int minNeighbors = 3, int flags = 0,
-                          Size minSize = Size(),
-                          Size maxSize = Size())
-    {
-        (void)maxSize;
-        MemStorage storage(cvCreateMemStorage(0));
-        //CvMat img=image;
-        CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize);
-        vector<CvAvgComp> vecAvgComp;
-        Seq<CvAvgComp>(objs).copyTo(vecAvgComp);
-        faces.resize(vecAvgComp.size());
-        std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
-    }
-
-};
-
-}
-}
-
 PERF_TEST(HaarFixture, Haar)
 {
     vector<Rect> faces;
 
-    Mat img = imread(getDataPath("gpu/haarcascade/basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
+    Mat img = imread(getDataPath("gpu/haarcascade/group_1_640x480_VGA.pgm"), CV_LOAD_IMAGE_GRAYSCALE);
     ASSERT_TRUE(!img.empty()) << "can't open basketball1.png";
     declare.in(img);
 
@@ -107,7 +69,7 @@ PERF_TEST(HaarFixture, Haar)
     }
     else if (RUN_OCL_IMPL)
     {
-        ocl::CascadeClassifier_GPU faceCascade;
+        ocl::OclCascadeClassifier faceCascade;
         ocl::oclMat oclImg(img);
 
         ASSERT_TRUE(faceCascade.load(getDataPath("gpu/haarcascade/haarcascade_frontalface_alt.xml")))
diff --git a/modules/ocl/test/test_objdetect.cpp b/modules/ocl/test/test_objdetect.cpp
index 6f47d7470b..89f45b07c9 100644
--- a/modules/ocl/test/test_objdetect.cpp
+++ b/modules/ocl/test/test_objdetect.cpp
@@ -218,14 +218,9 @@ PARAM_TEST_CASE(Haar, int, CascadeName)
 
 OCL_TEST_P(Haar, FaceDetect)
 {
-    MemStorage storage(cvCreateMemStorage(0));
-    CvSeq *_objects;
-    _objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3,
-                                            flags, Size(30, 30), Size(0, 0));
-    vector<CvAvgComp> vecAvgComp;
-    Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
-    oclfaces.resize(vecAvgComp.size());
-    std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
+    cascade.detectMultiScale(d_img, oclfaces,  1.1, 3,
+                                flags,
+                                Size(30, 30), Size(0, 0));
 
     cpucascade.detectMultiScale(img, faces,  1.1, 3,
                                 flags,
diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp
index d20c937852..10c6c4f4d2 100644
--- a/samples/ocl/facedetect.cpp
+++ b/samples/ocl/facedetect.cpp
@@ -41,7 +41,7 @@ static double getTime()
 
 
 static void detect( Mat& img, vector<Rect>& faces,
-             ocl::OclCascadeClassifierBuf& cascade,
+             ocl::OclCascadeClassifier& cascade,
              double scale, bool calTime);
 
 
@@ -87,7 +87,7 @@ int main( int argc, const char** argv )
     outputName = cmd.get<string>("o");
     string cascadeName = cmd.get<string>("t");
     double scale = cmd.get<double>("c");
-    ocl::OclCascadeClassifierBuf cascade;
+    ocl::OclCascadeClassifier cascade;
     CascadeClassifier  cpu_cascade;
 
     if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
@@ -180,7 +180,7 @@ int main( int argc, const char** argv )
 }
 
 void detect( Mat& img, vector<Rect>& faces,
-             ocl::OclCascadeClassifierBuf& cascade,
+             ocl::OclCascadeClassifier& cascade,
              double scale, bool calTime)
 {
     ocl::oclMat image(img);

From 2df53d97c5ec7250d6cc551db45dfce0b1837ec3 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 4 Nov 2013 23:59:56 +0400
Subject: [PATCH 07/28] added ocl::repeat

---
 modules/core/src/copy.cpp               |  1 +
 modules/ocl/include/opencv2/ocl/ocl.hpp |  3 ++
 modules/ocl/perf/perf_arithm.cpp        | 37 ++++++++++++++++++
 modules/ocl/src/arithm.cpp              | 18 +++++++++
 modules/ocl/test/test_arithm.cpp        | 51 +++++++++++++++++++++++--
 5 files changed, 106 insertions(+), 4 deletions(-)

diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 8276ffddb6..cc26b3eb35 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -485,6 +485,7 @@ void repeat(InputArray _src, int ny, int nx, OutputArray _dst)
 {
     Mat src = _src.getMat();
     CV_Assert( src.dims <= 2 );
+    CV_Assert( ny > 0 && nx > 0 );
 
     _dst.create(src.rows*ny, src.cols*nx, src.type());
     Mat dst = _dst.getMat();
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index af24f0aca2..601885303c 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -631,6 +631,9 @@ namespace cv
         //! initializes a scaled identity matrix
         CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
 
+        //! fills the output array with repeated copies of the input array
+        CV_EXPORTS void repeat(const oclMat & src, int ny, int nx, oclMat & dst);
+
         //////////////////////////////// Filter Engine ////////////////////////////////
 
         /*!
diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp
index d71901e89d..24eab3b915 100644
--- a/modules/ocl/perf/perf_arithm.cpp
+++ b/modules/ocl/perf/perf_arithm.cpp
@@ -1051,3 +1051,40 @@ PERF_TEST_P(AbsFixture, Abs,
     else
         OCL_PERF_ELSE
 }
+
+///////////// Repeat ////////////////////////
+
+typedef Size_MatType RepeatFixture;
+
+PERF_TEST_P(RepeatFixture, Repeat,
+            ::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+    const int nx = 3, ny = 2;
+    const Size dstSize(srcSize.width * nx, srcSize.height * ny);
+
+    Mat src(srcSize, type), dst(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::repeat(oclSrc, ny, nx, oclDst);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::repeat(src, ny, nx, dst);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp
index 9b24b16b0b..f8a069082d 100644
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -1706,3 +1706,21 @@ void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar)
     openCLExecuteKernel(src.clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads,
                         args, -1, -1, buildOptions.c_str());
 }
+
+//////////////////////////////////////////////////////////////////////////////
+////////////////////////////////// Repeat ////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+
+void cv::ocl::repeat(const oclMat & src, int ny, int nx, oclMat & dst)
+{
+    CV_Assert(nx > 0 && ny > 0);
+    dst.create(src.rows * ny, src.cols * nx, src.type());
+
+    for (int y = 0; y < ny; ++y)
+        for (int x = 0; x < nx; ++x)
+        {
+            Rect roi(x * src.cols, y * src.rows, src.cols, src.rows);
+            oclMat hdr = dst(roi);
+            src.copyTo(hdr);
+        }
+}
diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp
index 11b945c5b2..17260580de 100644
--- a/modules/ocl/test/test_arithm.cpp
+++ b/modules/ocl/test/test_arithm.cpp
@@ -192,13 +192,13 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
         use_roi = GET_PARAM(2);
     }
 
-    void random_roi()
+    virtual void random_roi()
     {
         const int type = CV_MAKE_TYPE(depth, cn);
 
         Size roiSize = randomSize(1, MAX_VALUE);
-        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src1, src1_roi, roiSize, srcBorder, type, 2, 11);
+        Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
+        randomSubMat(src1, src1_roi, roiSize, src1Border, type, 2, 11);
 
         Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
         randomSubMat(src2, src2_roi, roiSize, src2Border, type, -1540, 1740);
@@ -214,7 +214,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
         cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
 
 
-        generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder);
+        generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, src1Border);
         generateOclMat(gsrc2_whole, gsrc2_roi, src2, roiSize, src2Border);
         generateOclMat(gdst1_whole, gdst1_roi, dst1, roiSize, dst1Border);
         generateOclMat(gdst2_whole, gdst2_roi, dst2, roiSize, dst2Border);
@@ -1522,6 +1522,48 @@ OCL_TEST_P(Norm, NORM_L2)
         }
 }
 
+//// Repeat
+
+struct RepeatTestCase :
+        public ArithmTestBase
+{
+    int nx, ny;
+
+    virtual void random_roi()
+    {
+        const int type = CV_MAKE_TYPE(depth, cn);
+
+        nx = randomInt(1, 4);
+        ny = randomInt(1, 4);
+
+        Size srcRoiSize = randomSize(1, MAX_VALUE);
+        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
+        randomSubMat(src1, src1_roi, srcRoiSize, srcBorder, type, 2, 11);
+
+        Size dstRoiSize(srcRoiSize.width * nx, srcRoiSize.height * ny);
+        Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
+        randomSubMat(dst1, dst1_roi, dstRoiSize, dst1Border, type, 5, 16);
+
+        generateOclMat(gsrc1_whole, gsrc1_roi, src1, srcRoiSize, srcBorder);
+        generateOclMat(gdst1_whole, gdst1_roi, dst1, dstRoiSize, dst1Border);
+    }
+};
+
+typedef RepeatTestCase Repeat;
+
+OCL_TEST_P(Repeat, Mat)
+{
+    for (int i = 0; i < LOOP_TIMES; ++i)
+    {
+        random_roi();
+
+        cv::repeat(src1_roi, ny, nx, dst1_roi);
+        cv::ocl::repeat(gsrc1_roi, ny, nx, gdst1_roi);
+
+        Near();
+    }
+}
+
 //////////////////////////////////////// Instantiation /////////////////////////////////////////
 
 INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool(), Bool()));
@@ -1557,5 +1599,6 @@ INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(Values(CV_8U, CV_8S, CV_16U
 INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
 INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
 INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
+INSTANTIATE_TEST_CASE_P(Arithm, Repeat, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
 
 #endif // HAVE_OPENCL

From ec77434190a15e19f11e1518d77cc047756a6b60 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Tue, 5 Nov 2013 13:37:01 +0400
Subject: [PATCH 08/28] Update .gitignore.

* OpenCV4Tegra/ is no longer relevant.
* We should only ignore the particular refman.rst that we generate.
---
 .gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0bcffd7260..643cc6de8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 *.pyc
 .DS_Store
-refman.rst
-OpenCV4Tegra/
+/modules/refman.rst
 tegra/
 *.user
 .sw[a-z]

From 4203979c8792abf35049e033fd3ac19a8abed7b1 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Tue, 5 Nov 2013 13:41:42 +0400
Subject: [PATCH 09/28] Sorted .gitignore.

---
 .gitignore | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index 643cc6de8b..94537d60e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,9 @@
-*.pyc
-.DS_Store
-/modules/refman.rst
-tegra/
-*.user
-.sw[a-z]
-.*.swp
-tags
 *.autosave
+*.pyc
+*.user
+.*.swp
+.DS_Store
+.sw[a-z]
+/modules/refman.rst
+tags
+tegra/

From e7e7e04dce59258843703ef5d44ad6b61d568aa6 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Tue, 5 Nov 2013 14:17:31 +0400
Subject: [PATCH 10/28] came back to relative error

---
 modules/ocl/test/test_imgproc.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 634633a2a3..c7099a10cd 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -93,14 +93,22 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType,
         generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
     }
 
-    void Near(double threshold = 0.0)
+    void Near(double threshold = 0.0, bool relative = false)
     {
         Mat roi, whole;
         gdst_whole.download(whole);
         gdst_roi.download(roi);
 
-        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
+        if (relative)
+        {
+            EXPECT_MAT_NEAR_RELATIVE(dst_whole, whole, threshold);
+            EXPECT_MAT_NEAR_RELATIVE(dst_roi, roi, threshold);
+        }
+        else
+        {
+            EXPECT_MAT_NEAR(dst_whole, whole, threshold);
+            EXPECT_MAT_NEAR(dst_roi, roi, threshold);
+        }
     }
 };
 
@@ -236,7 +244,7 @@ OCL_TEST_P(CornerMinEigenVal, Mat)
         cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType);
         ocl::cornerMinEigenVal(gsrc_roi, gdst_roi, blockSize, apertureSize, borderType);
 
-        Near(1e-6);
+        Near(1e-5, true);
     }
 }
 
@@ -256,7 +264,7 @@ OCL_TEST_P(CornerHarris, Mat)
         cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType);
         ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType);
 
-        Near(1e-6);
+        Near(1e-5, true);
     }
 }
 

From 3b0108e4c11c2b0db8acad8523060f7588b2ed95 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@aldebaran-robotics.com>
Date: Tue, 5 Nov 2013 11:18:20 +0100
Subject: [PATCH 11/28] fix the crash as suggested by @SpecLad

---
 modules/java/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt
index 52193bea25..8e8ed10590 100644
--- a/modules/java/CMakeLists.txt
+++ b/modules/java/CMakeLists.txt
@@ -134,7 +134,7 @@ endforeach()
 set(step2_depends ${step1_depends} ${scripts_gen_javadoc} ${scripts_rst_parser} ${javadoc_rst_sources} ${generated_java_sources} ${handwrittren_java_sources})
 string(REPLACE ";" "," OPENCV_JAVA_MODULES_STR "${OPENCV_JAVA_MODULES}")
 add_custom_command(OUTPUT ${documented_java_files}
-                   COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_javadoc}" --modules ${OPENCV_JAVA_MODULES_STR} "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java" "${CMAKE_CURRENT_BINARY_DIR}" 2>${CMAKE_CURRENT_BINARY_DIR}/get_javadoc_errors.log
+                   COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_javadoc}" --modules ${OPENCV_JAVA_MODULES_STR} "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java" "${CMAKE_CURRENT_BINARY_DIR}" 2> "${CMAKE_CURRENT_BINARY_DIR}/get_javadoc_errors.log"
                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                    DEPENDS ${step2_depends}
                   )

From 0b1df622156fbf4e071b631475fa3e400ba77a9a Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@aldebaran-robotics.com>
Date: Tue, 5 Nov 2013 12:02:22 +0100
Subject: [PATCH 12/28] add VERBATIM as advised by @SpecLad

---
 modules/java/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt
index 8e8ed10590..936c160e86 100644
--- a/modules/java/CMakeLists.txt
+++ b/modules/java/CMakeLists.txt
@@ -137,6 +137,7 @@ add_custom_command(OUTPUT ${documented_java_files}
                    COMMAND ${PYTHON_EXECUTABLE} "${scripts_gen_javadoc}" --modules ${OPENCV_JAVA_MODULES_STR} "${CMAKE_CURRENT_SOURCE_DIR}/generator/src/java" "${CMAKE_CURRENT_BINARY_DIR}" 2> "${CMAKE_CURRENT_BINARY_DIR}/get_javadoc_errors.log"
                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                    DEPENDS ${step2_depends}
+                   VERBATIM
                   )
 
 # step 3: copy files to destination

From 9a63508f5029938ec7bc8051d2a939216360c415 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Tue, 5 Nov 2013 15:13:30 +0400
Subject: [PATCH 13/28] Revert "disable SVM when AMD BLAS is not available"

This reverts commit d63a38e9bfad4081ec8107e8d38e03c03f6548d2.

Conflicts:
	modules/ocl/test/test_ml.cpp
---
 modules/ocl/src/svm.cpp      | 6 ------
 modules/ocl/test/test_ml.cpp | 4 ----
 2 files changed, 10 deletions(-)

diff --git a/modules/ocl/src/svm.cpp b/modules/ocl/src/svm.cpp
index a71047c58a..40b3d981d3 100644
--- a/modules/ocl/src/svm.cpp
+++ b/modules/ocl/src/svm.cpp
@@ -686,9 +686,6 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
     }
 
 #else
-    // TODO fix it
-    CV_Error(CV_StsNotImplemented, "This part of code contains mistakes. Install AMD BLAS in order to get a correct result or use CPU version of SVM");
-
     double degree1 = 0.0;
     if (params.kernel_type == CvSVM::POLY)
         degree1 = params.degree;
@@ -813,9 +810,6 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
     }
 
 #else
-    // TODO fix it
-    CV_Error(CV_StsNotImplemented, "This part of code contains mistakes. Install AMD BLAS in order to get a correct result or use CPU version of SVM");
-
     double degree1 = 0.0;
     if(params->kernel_type == CvSVM::POLY)
         degree1 = params->degree;
diff --git a/modules/ocl/test/test_ml.cpp b/modules/ocl/test/test_ml.cpp
index a064070890..00f9fa9410 100644
--- a/modules/ocl/test/test_ml.cpp
+++ b/modules/ocl/test/test_ml.cpp
@@ -126,8 +126,6 @@ OCL_TEST_P(KNN, Accuracy)
 INSTANTIATE_TEST_CASE_P(OCL_ML, KNN, Combine(Values(6, 5), Values(Size(200, 400), Size(300, 600)),
     Values(4, 3), Values(false, true)));
 
-#ifdef HAVE_CLAMDBLAS // TODO does not work non-blas version of SVM
-
 ////////////////////////////////SVM/////////////////////////////////////////////////
 
 PARAM_TEST_CASE(SVM_OCL, int, int, int)
@@ -308,6 +306,4 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, SVM_OCL, testing::Combine(
                             Values(2, 3, 4)
                         ));
 
-#endif // HAVE_CLAMDBLAS
-
 #endif // HAVE_OPENCL

From 7704dbf866d19c3be64a574b3e8e4d08707a223a Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Tue, 5 Nov 2013 15:15:26 +0400
Subject: [PATCH 14/28] ocl: svm: restore non BLAS version

---
 modules/ocl/src/svm.cpp | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/modules/ocl/src/svm.cpp b/modules/ocl/src/svm.cpp
index 40b3d981d3..3e51d86913 100644
--- a/modules/ocl/src/svm.cpp
+++ b/modules/ocl/src/svm.cpp
@@ -994,13 +994,15 @@ void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat&
     //int j;
     (this->*calc_func_ocl)( vcount, row_idx, results, src);
 
-// FIXIT #if defined HAVE_CLAMDBLAS
+#if !defined(HAVE_CLAMDBLAS)
+    // nothing
+#else
     const Qfloat max_val = (Qfloat)(FLT_MAX * 1e-3);
     int j;
     for( j = 0; j < vcount; j++ )
         if( results[j] > max_val )
             results[j] = max_val;
-// FIXIT #endif
+#endif
 }
 
 bool CvSVMKernel_ocl::create( const CvSVMParams* _params, Calc_ocl _calc_func, Calc _calc_func1 )
@@ -1072,12 +1074,13 @@ void CvSVMKernel_ocl::calc_poly( int vcount, const int row_idx, Qfloat* results,
 {
     calc_non_rbf_base( vcount, row_idx, results, src);
 
-//FIXIT #if defined HAVE_CLAMDBLAS
-
+#if !defined(HAVE_CLAMDBLAS)
+    // nothing
+#else
     CvMat R = cvMat( 1, vcount, QFLOAT_TYPE, results );
     if( vcount > 0 )
         cvPow( &R, &R, params->degree );
-//FIXIT #endif
+#endif
 }
 
 
@@ -1085,7 +1088,9 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul
 {
     calc_non_rbf_base( vcount, row_idx, results, src);
     // TODO: speedup this
-//FIXIT #if defined HAVE_CLAMDBLAS
+#if !defined(HAVE_CLAMDBLAS)
+    // nothing
+#else
     for(int j = 0; j < vcount; j++ )
     {
         Qfloat t = results[j];
@@ -1095,7 +1100,7 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul
         else
             results[j] = (Qfloat)((e - 1.) / (e + 1.));
     }
-//FIXIT #endif
+#endif
 }
 
 CvSVM_OCL::CvSVM_OCL()

From e544e34eed67a692338018b5e281f8466c04e8b1 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 1 Nov 2013 19:06:42 +0400
Subject: [PATCH 15/28] fixed ocl::copyMakeBorder accuracy test

---
 modules/ocl/test/test_imgproc.cpp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index e981d437e8..ab25e508e1 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -134,18 +134,23 @@ PARAM_TEST_CASE(CopyMakeBorder, MatDepth, // depth
 
     void random_roi()
     {
+        border = randomBorder(0, MAX_VALUE << 2);
+        val = randomScalar(-MAX_VALUE, MAX_VALUE);
+
         Size roiSize = randomSize(1, MAX_VALUE);
         Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
+        randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
 
         Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
-        randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, 5, 16);
+        dstBorder.top += border.top;
+        dstBorder.lef += border.lef;
+        dstBorder.rig += border.rig;
+        dstBorder.bot += border.bot;
+
+        randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
 
         generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
         generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
-
-        border = randomBorder(0, MAX_VALUE << 2);
-        val = randomScalar(-MAX_VALUE, MAX_VALUE);
     }
 
     void Near(double threshold = 0.0)
@@ -559,14 +564,11 @@ INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, Combine(
                             Bool()));
 
 INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
-                            testing::Range((MatDepth)CV_8U, (MatDepth)CV_USRTYPE1),
-                            testing::Values((Channels)1, (Channels)4),
+                            testing::Values((MatDepth)CV_8U, (MatDepth)CV_16S, (MatDepth)CV_32S, (MatDepth)CV_32F),
+                            testing::Values(Channels(1), Channels(3), (Channels)4),
                             Bool(), // border isolated or not
-                            Values((Border)BORDER_CONSTANT,
-                                   (Border)BORDER_REPLICATE,
-                                   (Border)BORDER_REFLECT,
-                                   (Border)BORDER_WRAP,
-                                   (Border)BORDER_REFLECT_101),
+                            Values((Border)BORDER_REPLICATE, (Border)BORDER_REFLECT,
+                                   (Border)BORDER_WRAP, (Border)BORDER_REFLECT_101),
                             Bool()));
 
 #endif // HAVE_OPENCL

From 691d5f418796018bb012aeaf93152fac0c193c26 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Thu, 31 Oct 2013 23:00:43 +0400
Subject: [PATCH 16/28] ocl: memory cleanup workaround: clFinish() before
 clReleaseMemObject() + 64kb memory guard

---
 modules/ocl/src/cl_operations.cpp | 53 ++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp
index d344689c4b..d96f3470ec 100644
--- a/modules/ocl/src/cl_operations.cpp
+++ b/modules/ocl/src/cl_operations.cpp
@@ -109,12 +109,15 @@ cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size)
     return buffer;
 }
 
+#define MEMORY_CORRUPTION_GUARD
+#ifdef MEMORY_CORRUPTION_GUARD
 //#define CHECK_MEMORY_CORRUPTION
-#ifdef CHECK_MEMORY_CORRUPTION
-//#define CHECK_MEMORY_CORRUPTION_PRINT_ERROR
+#define CHECK_MEMORY_CORRUPTION_PRINT_ERROR
 #define CHECK_MEMORY_CORRUPTION_RAISE_ERROR
-static const int __memory_corruption_check_bytes = 1024*1024;
+static const int __memory_corruption_guard_bytes = 64*1024;
+#ifdef CHECK_MEMORY_CORRUPTION
 static const int __memory_corruption_check_pattern = 0x14326547; // change pattern for sizeof(int)==8
+#endif
 struct CheckBuffers
 {
     cl_mem mainBuffer;
@@ -128,7 +131,7 @@ struct CheckBuffers
     CheckBuffers(cl_mem _mainBuffer, size_t _size, size_t _widthInBytes, size_t _height)
         : mainBuffer(_mainBuffer), size(_size), widthInBytes(_widthInBytes), height(_height)
     {
-        // notihng
+        // nothing
     }
 };
 static std::map<cl_mem, CheckBuffers> __check_buffers;
@@ -145,30 +148,33 @@ void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch,
 {
     cl_int status;
     size_t size = widthInBytes * height;
-#ifndef CHECK_MEMORY_CORRUPTION
+#ifndef MEMORY_CORRUPTION_GUARD
     *dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
                               size, 0, &status);
     openCLVerifyCall(status);
 #else
-    size_t allocSize = size + __memory_corruption_check_bytes * 2;
+    size_t allocSize = size + __memory_corruption_guard_bytes * 2;
     cl_mem mainBuffer = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
             allocSize, 0, &status);
     openCLVerifyCall(status);
-    cl_buffer_region r = {__memory_corruption_check_bytes, size};
+    cl_buffer_region r = {__memory_corruption_guard_bytes, size};
     *dev_ptr =  clCreateSubBuffer(mainBuffer,
             gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
             CL_BUFFER_CREATE_TYPE_REGION, &r,
             &status);
     openCLVerifyCall(status);
-    std::vector<int> tmp(__memory_corruption_check_bytes / sizeof(int),
+#ifdef CHECK_MEMORY_CORRUPTION
+    std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
             __memory_corruption_check_pattern);
-    CV_Assert(tmp.size() * sizeof(int) == __memory_corruption_check_bytes);
+    CV_Assert(tmp.size() * sizeof(int) == __memory_corruption_guard_bytes);
     openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
-            mainBuffer, CL_TRUE, 0, __memory_corruption_check_bytes, &tmp[0],
+            mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &tmp[0],
             0, NULL, NULL));
     openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
-            mainBuffer, CL_TRUE, __memory_corruption_check_bytes + size, __memory_corruption_check_bytes, &tmp[0],
+            mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + size, __memory_corruption_guard_bytes, &tmp[0],
             0, NULL, NULL));
+    clFinish(getClCommandQueue(ctx));
+#endif
     CheckBuffers data(mainBuffer, size, widthInBytes, height);
     __check_buffers.insert(std::pair<cl_mem, CheckBuffers>((cl_mem)*dev_ptr, data));
 #endif
@@ -224,40 +230,49 @@ void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset,
 
 void openCLFree(void *devPtr)
 {
+#ifdef MEMORY_CORRUPTION_GUARD
 #ifdef CHECK_MEMORY_CORRUPTION
     bool failBefore = false, failAfter = false;
+#endif
     CheckBuffers data;
     std::map<cl_mem, CheckBuffers>::iterator i = __check_buffers.find((cl_mem)devPtr);
     if (i != __check_buffers.end())
     {
         data = i->second;
+#ifdef CHECK_MEMORY_CORRUPTION
         Context* ctx = Context::getContext();
-        std::vector<uchar> checkBefore(__memory_corruption_check_bytes);
-        std::vector<uchar> checkAfter(__memory_corruption_check_bytes);
+        std::vector<uchar> checkBefore(__memory_corruption_guard_bytes);
+        std::vector<uchar> checkAfter(__memory_corruption_guard_bytes);
         openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(ctx),
-                data.mainBuffer, CL_TRUE, 0, __memory_corruption_check_bytes, &checkBefore[0],
+                data.mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &checkBefore[0],
                 0, NULL, NULL));
         openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(ctx),
-                data.mainBuffer, CL_TRUE, __memory_corruption_check_bytes + data.size, __memory_corruption_check_bytes, &checkAfter[0],
+                data.mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + data.size, __memory_corruption_guard_bytes, &checkAfter[0],
                 0, NULL, NULL));
+        clFinish(getClCommandQueue(ctx));
 
-        std::vector<int> tmp(__memory_corruption_check_bytes / sizeof(int),
+        std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
                 __memory_corruption_check_pattern);
 
-        if (memcmp(&checkBefore[0], &tmp[0], __memory_corruption_check_bytes) != 0)
+        if (memcmp(&checkBefore[0], &tmp[0], __memory_corruption_guard_bytes) != 0)
         {
             failBefore = true;
         }
-        if (memcmp(&checkAfter[0], &tmp[0], __memory_corruption_check_bytes) != 0)
+        if (memcmp(&checkAfter[0], &tmp[0], __memory_corruption_guard_bytes) != 0)
         {
             failAfter = true;
         }
+#else
+        // TODO FIXIT Attach clReleaseMemObject call to event completion callback
+        Context* ctx = Context::getContext();
+        clFinish(getClCommandQueue(ctx));
+#endif
         openCLSafeCall(clReleaseMemObject(data.mainBuffer));
         __check_buffers.erase(i);
     }
 #endif
     openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
-#ifdef CHECK_MEMORY_CORRUPTION
+#if defined(MEMORY_CORRUPTION_GUARD) && defined(CHECK_MEMORY_CORRUPTION)
     if (failBefore)
     {
 #ifdef CHECK_MEMORY_CORRUPTION_PRINT_ERROR

From 5a333bfff43506ff6ac03c66f3b91f7d65e56fbd Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Wed, 30 Oct 2013 18:22:18 +0400
Subject: [PATCH 17/28] ocl: update documentation

---
 ...mera_calibration_and_3D_reconstruction.rst |   6 -
 modules/ocl/doc/data_structures.rst           | 310 +++++++++---------
 modules/ocl/doc/image_processing.rst          |  12 +-
 modules/ocl/doc/introduction.rst              |  53 +--
 modules/ocl/doc/video_analysis.rst            |  56 ++--
 5 files changed, 221 insertions(+), 216 deletions(-)

diff --git a/modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst b/modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst
index 96ed6bbad3..824366927f 100644
--- a/modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst
+++ b/modules/ocl/doc/camera_calibration_and_3D_reconstruction.rst
@@ -86,8 +86,6 @@ Enables the stereo correspondence operator that finds the disparity for the spec
 
     :param disparity: Output disparity map. It is a  ``CV_8UC1``  image with the same size as the input images.
 
-    :param stream: Stream for the asynchronous version.
-
 
 ocl::StereoBM_OCL::checkIfGpuCallReasonable
 -----------------------------------------------
@@ -218,8 +216,6 @@ Enables the stereo correspondence operator that finds the disparity for the spec
 
     :param disparity: Output disparity map. If  ``disparity``  is empty, the output type is  ``CV_16SC1`` . Otherwise, the type is retained.
 
-    :param stream: Stream for the asynchronous version.
-
 ocl::StereoConstantSpaceBP
 ------------------------------
 .. ocv:class:: ocl::StereoConstantSpaceBP
@@ -330,5 +326,3 @@ Enables the stereo correspondence operator that finds the disparity for the spec
     :param right: Right image with the same size and the same type as the left one.
 
     :param disparity: Output disparity map. If  ``disparity``  is empty, the output type is  ``CV_16SC1`` . Otherwise, the output type is  ``disparity.type()`` .
-
-    :param stream: Stream for the asynchronous version.
\ No newline at end of file
diff --git a/modules/ocl/doc/data_structures.rst b/modules/ocl/doc/data_structures.rst
index 556efa961e..01a16739ba 100644
--- a/modules/ocl/doc/data_structures.rst
+++ b/modules/ocl/doc/data_structures.rst
@@ -5,185 +5,193 @@ Data Structures
 
 OpenCV C++ 1-D or 2-D dense array class ::
 
-        class CV_EXPORTS oclMat
-        {
-        public:
-            //! default constructor
-            oclMat();
-            //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
-            oclMat(int rows, int cols, int type);
-            oclMat(Size size, int type);
-            //! constucts oclMatrix and fills it with the specified value _s.
-            oclMat(int rows, int cols, int type, const Scalar &s);
-            oclMat(Size size, int type, const Scalar &s);
-            //! copy constructor
-            oclMat(const oclMat &m);
+    class CV_EXPORTS oclMat
+    {
+    public:
+        //! default constructor
+        oclMat();
+        //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+        oclMat(int rows, int cols, int type);
+        oclMat(Size size, int type);
+        //! constucts oclMatrix and fills it with the specified value _s.
+        oclMat(int rows, int cols, int type, const Scalar &s);
+        oclMat(Size size, int type, const Scalar &s);
+        //! copy constructor
+        oclMat(const oclMat &m);
 
-            //! constructor for oclMatrix headers pointing to user-allocated data
-            oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
-            oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
+        //! constructor for oclMatrix headers pointing to user-allocated data
+        oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
+        oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 
-            //! creates a matrix header for a part of the bigger matrix
-            oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
-            oclMat(const oclMat &m, const Rect &roi);
+        //! creates a matrix header for a part of the bigger matrix
+        oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
+        oclMat(const oclMat &m, const Rect &roi);
 
-            //! builds oclMat from Mat. Perfom blocking upload to device.
-            explicit oclMat (const Mat &m);
+        //! builds oclMat from Mat. Perfom blocking upload to device.
+        explicit oclMat (const Mat &m);
 
-            //! destructor - calls release()
-            ~oclMat();
+        //! destructor - calls release()
+        ~oclMat();
 
-            //! assignment operators
-            oclMat &operator = (const oclMat &m);
-            //! assignment operator. Perfom blocking upload to device.
-            oclMat &operator = (const Mat &m);
+        //! assignment operators
+        oclMat &operator = (const oclMat &m);
+        //! assignment operator. Perfom blocking upload to device.
+        oclMat &operator = (const Mat &m);
+        oclMat &operator = (const oclMatExpr& expr);
+
+        //! pefroms blocking upload data to oclMat.
+        void upload(const cv::Mat &m);
 
 
-            //! pefroms blocking upload data to oclMat.
-            void upload(const cv::Mat &m);
+        //! downloads data from device to host memory. Blocking calls.
+        operator Mat() const;
+        void download(cv::Mat &m) const;
 
+        //! convert to _InputArray
+        operator _InputArray();
 
-            //! downloads data from device to host memory. Blocking calls.
-            operator Mat() const;
-            void download(cv::Mat &m) const;
+        //! convert to _OutputArray
+        operator _OutputArray();
 
+        //! returns a new oclMatrix header for the specified row
+        oclMat row(int y) const;
+        //! returns a new oclMatrix header for the specified column
+        oclMat col(int x) const;
+        //! ... for the specified row span
+        oclMat rowRange(int startrow, int endrow) const;
+        oclMat rowRange(const Range &r) const;
+        //! ... for the specified column span
+        oclMat colRange(int startcol, int endcol) const;
+        oclMat colRange(const Range &r) const;
 
-            //! returns a new oclMatrix header for the specified row
-            oclMat row(int y) const;
-            //! returns a new oclMatrix header for the specified column
-            oclMat col(int x) const;
-            //! ... for the specified row span
-            oclMat rowRange(int startrow, int endrow) const;
-            oclMat rowRange(const Range &r) const;
-            //! ... for the specified column span
-            oclMat colRange(int startcol, int endcol) const;
-            oclMat colRange(const Range &r) const;
+        //! returns deep copy of the oclMatrix, i.e. the data is copied
+        oclMat clone() const;
 
-            //! returns deep copy of the oclMatrix, i.e. the data is copied
-            oclMat clone() const;
-            //! copies the oclMatrix content to "m".
-            // It calls m.create(this->size(), this->type()).
-            // It supports any data type
-            void copyTo( oclMat &m ) const;
-            //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
-            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
-            void copyTo( oclMat &m, const oclMat &mask ) const;
-            //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
-            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
-            void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
+        //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
+        // It calls m.create(this->size(), this->type()).
+        // It supports any data type
+        void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
 
-            void assignTo( oclMat &m, int type = -1 ) const;
+        //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
+        void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 
-            //! sets every oclMatrix element to s
-            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
-            oclMat &operator = (const Scalar &s);
-            //! sets some of the oclMatrix elements to s, according to the mask
-            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
-            oclMat &setTo(const Scalar &s, const oclMat &mask = oclMat());
-            //! creates alternative oclMatrix header for the same data, with different
-            // number of channels and/or different number of rows. see cvReshape.
-            oclMat reshape(int cn, int rows = 0) const;
+        void assignTo( oclMat &m, int type = -1 ) const;
 
-            //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
-            // previous data is unreferenced if needed.
-            void create(int rows, int cols, int type);
-            void create(Size size, int type);
-            //! decreases reference counter;
-            // deallocate the data when reference counter reaches 0.
-            void release();
+        //! sets every oclMatrix element to s
+        oclMat& operator = (const Scalar &s);
+        //! sets some of the oclMatrix elements to s, according to the mask
+        oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
+        //! creates alternative oclMatrix header for the same data, with different
+        // number of channels and/or different number of rows. see cvReshape.
+        oclMat reshape(int cn, int rows = 0) const;
 
-            //! swaps with other smart pointer
-            void swap(oclMat &mat);
+        //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
+        // previous data is unreferenced if needed.
+        void create(int rows, int cols, int type);
+        void create(Size size, int type);
 
-            //! locates oclMatrix header within a parent oclMatrix. See below
-            void locateROI( Size &wholeSize, Point &ofs ) const;
-            //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
-            oclMat &adjustROI( int dtop, int dbottom, int dleft, int dright );
-            //! extracts a rectangular sub-oclMatrix
-            // (this is a generalized form of row, rowRange etc.)
-            oclMat operator()( Range rowRange, Range colRange ) const;
-            oclMat operator()( const Rect &roi ) const;
+        //! allocates new oclMatrix with specified device memory type.
+        void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
+        void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
 
-            //! returns true if the oclMatrix data is continuous
-            // (i.e. when there are no gaps between successive rows).
-            // similar to CV_IS_oclMat_CONT(cvoclMat->type)
-            bool isContinuous() const;
-            //! returns element size in bytes,
-            // similar to CV_ELEM_SIZE(cvMat->type)
-            size_t elemSize() const;
-            //! returns the size of element channel in bytes.
-            size_t elemSize1() const;
-            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
-            int type() const;
-            //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
-            //! 3 channels element actually use 4 channel space
-            int ocltype() const;
-            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
-            int depth() const;
-            //! returns element type, similar to CV_MAT_CN(cvMat->type)
-            int channels() const;
-            //! returns element type, return 4 for 3 channels element,
-            //!becuase 3 channels element actually use 4 channel space
-            int oclchannels() const;
-            //! returns step/elemSize1()
-            size_t step1() const;
-            //! returns oclMatrix size:
-            // width == number of columns, height == number of rows
-            Size size() const;
-            //! returns true if oclMatrix data is NULL
-            bool empty() const;
+        //! decreases reference counter;
+        // deallocate the data when reference counter reaches 0.
+        void release();
 
-            //! returns pointer to y-th row
-            uchar *ptr(int y = 0);
-            const uchar *ptr(int y = 0) const;
+        //! swaps with other smart pointer
+        void swap(oclMat &mat);
 
-            //! template version of the above method
-            template<typename _Tp> _Tp *ptr(int y = 0);
-            template<typename _Tp> const _Tp *ptr(int y = 0) const;
+        //! locates oclMatrix header within a parent oclMatrix. See below
+        void locateROI( Size &wholeSize, Point &ofs ) const;
+        //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
+        oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+        //! extracts a rectangular sub-oclMatrix
+        // (this is a generalized form of row, rowRange etc.)
+        oclMat operator()( Range rowRange, Range colRange ) const;
+        oclMat operator()( const Rect &roi ) const;
 
-            //! matrix transposition
-            oclMat t() const;
+        oclMat& operator+=( const oclMat& m );
+        oclMat& operator-=( const oclMat& m );
+        oclMat& operator*=( const oclMat& m );
+        oclMat& operator/=( const oclMat& m );
 
-            /*! includes several bit-fields:
-              - the magic signature
-              - continuity flag
-              - depth
-              - number of channels
-              */
-            int flags;
-            //! the number of rows and columns
-            int rows, cols;
-            //! a distance between successive rows in bytes; includes the gap if any
-            size_t step;
-            //! pointer to the data(OCL memory object)
-            uchar *data;
+        //! returns true if the oclMatrix data is continuous
+        // (i.e. when there are no gaps between successive rows).
+        // similar to CV_IS_oclMat_CONT(cvoclMat->type)
+        bool isContinuous() const;
+        //! returns element size in bytes,
+        // similar to CV_ELEM_SIZE(cvMat->type)
+        size_t elemSize() const;
+        //! returns the size of element channel in bytes.
+        size_t elemSize1() const;
+        //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+        int type() const;
+        //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+        //! 3 channels element actually use 4 channel space
+        int ocltype() const;
+        //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+        int depth() const;
+        //! returns element type, similar to CV_MAT_CN(cvMat->type)
+        int channels() const;
+        //! returns element type, return 4 for 3 channels element,
+        //!becuase 3 channels element actually use 4 channel space
+        int oclchannels() const;
+        //! returns step/elemSize1()
+        size_t step1() const;
+        //! returns oclMatrix size:
+        // width == number of columns, height == number of rows
+        Size size() const;
+        //! returns true if oclMatrix data is NULL
+        bool empty() const;
 
-            //! pointer to the reference counter;
-            // when oclMatrix points to user-allocated data, the pointer is NULL
-            int *refcount;
+        //! returns pointer to y-th row
+        uchar* ptr(int y = 0);
+        const uchar *ptr(int y = 0) const;
 
-            //! helper fields used in locateROI and adjustROI
-            //datastart and dataend are not used in current version
-            uchar *datastart;
-            uchar *dataend;
+        //! template version of the above method
+        template<typename _Tp> _Tp *ptr(int y = 0);
+        template<typename _Tp> const _Tp *ptr(int y = 0) const;
 
-            //! OpenCL context associated with the oclMat object.
-            Context *clCxt;
-            //add offset for handle ROI, calculated in byte
-            int offset;
-            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
-            int wholerows;
-            int wholecols;
-        };
+        //! matrix transposition
+        oclMat t() const;
 
-Basically speaking, the oclMat is the mirror of Mat with the extension of ocl feature, the members have the same meaning and useage of Mat except following:
+        /*! includes several bit-fields:
+          - the magic signature
+          - continuity flag
+          - depth
+          - number of channels
+          */
+        int flags;
+        //! the number of rows and columns
+        int rows, cols;
+        //! a distance between successive rows in bytes; includes the gap if any
+        size_t step;
+        //! pointer to the data(OCL memory object)
+        uchar *data;
 
-datastart and dataend are replaced with wholerows and wholecols
+        //! pointer to the reference counter;
+        // when oclMatrix points to user-allocated data, the pointer is NULL
+        int *refcount;
 
-add clCxt for oclMat
+        //! helper fields used in locateROI and adjustROI
+        //datastart and dataend are not used in current version
+        uchar *datastart;
+        uchar *dataend;
 
-Only basic flags are supported in oclMat(i.e. depth number of channels)
+        //! OpenCL context associated with the oclMat object.
+        Context *clCxt;
+        //add offset for handle ROI, calculated in byte
+        int offset;
+        //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
+        int wholerows;
+        int wholecols;
+    };
 
-All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in oclMat. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
+Basically speaking, the ``oclMat`` is the mirror of ``Mat`` with the extension of OCL feature, the members have the same meaning and useage of ``Mat`` except following:
 
-For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
+* ``datastart`` and ``dataend`` are replaced with ``wholerows`` and ``wholecols``
+
+* Only basic flags are supported in ``oclMat`` (i.e. depth number of channels)
+
+* All the 3-channel matrix (i.e. RGB image) are represented by 4-channel matrix in ``oclMat``. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV ``Mat`` and ``oclMat``.
+    For example: If a ``oclMat`` has 3 channels, ``channels()`` returns 3 and ``oclchannels()`` returns 4
diff --git a/modules/ocl/doc/image_processing.rst b/modules/ocl/doc/image_processing.rst
index 247f355484..7dde475cc4 100644
--- a/modules/ocl/doc/image_processing.rst
+++ b/modules/ocl/doc/image_processing.rst
@@ -146,7 +146,7 @@ Returns void
 
 .. ocv:function:: void ocl::remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar())
 
-    :param src: Source image. Only CV_8UC1 and CV_32FC1 images are supported now.
+    :param src: Source image.
 
     :param dst: Destination image containing cornerness values. It has the same size as src and CV_32FC1 type.
 
@@ -156,11 +156,11 @@ Returns void
 
     :param interpolation: The interpolation method
 
-    :param bordertype: Pixel extrapolation method. Only BORDER_CONSTANT are supported now.
+    :param bordertype: Pixel extrapolation method.
 
     :param value: The border value if borderType==BORDER CONSTANT
 
-The function remap transforms the source image using the specified map: dst (x ,y) = src (map1(x , y) , map2(x , y)) where values of pixels with non-integer coordinates are computed using one of available interpolation methods. map1 and map2 can be encoded as separate floating-point maps in map1 and map2 respectively, or interleaved floating-point maps of (x,y) in map1. Supports CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1 , CV_32FC3 and CV_32FC4 data types.
+The function remap transforms the source image using the specified map: dst (x ,y) = src (map1(x , y) , map2(x , y)) where values of pixels with non-integer coordinates are computed using one of available interpolation methods. map1 and map2 can be encoded as separate floating-point maps in map1 and map2 respectively, or interleaved floating-point maps of (x,y) in map1.
 
 ocl::resize
 ------------------
@@ -222,7 +222,7 @@ ocl::cvtColor
 ------------------
 Returns void
 
-.. ocv:function:: void ocl::cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0)
+.. ocv:function:: void ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0)
 
     :param src: Source image.
 
@@ -250,7 +250,7 @@ Returns Threshold value
 
     :param type: Thresholding type
 
-The function applies fixed-level thresholding to a single-channel array. The function is typically used to get a bi-level (binary) image out of a grayscale image or for removing a noise, i.e. filtering out pixels with too small or too large values. There are several types of thresholding that the function supports that are determined by thresholdType. Supports only CV_32FC1 and CV_8UC1 data type.
+The function applies fixed-level thresholding to a single-channel array. The function is typically used to get a bi-level (binary) image out of a grayscale image or for removing a noise, i.e. filtering out pixels with too small or too large values. There are several types of thresholding that the function supports that are determined by thresholdType.
 
 ocl::buildWarpPlaneMaps
 -----------------------
@@ -311,4 +311,4 @@ Builds transformation maps for affine transformation.
 
     :param ymap: Y values with  ``CV_32FC1`` type.
 
-.. seealso:: :ocv:func:`ocl::warpAffine` , :ocv:func:`ocl::remap`
\ No newline at end of file
+.. seealso:: :ocv:func:`ocl::warpAffine` , :ocv:func:`ocl::remap`
diff --git a/modules/ocl/doc/introduction.rst b/modules/ocl/doc/introduction.rst
index 7dda96396f..2c050cb275 100644
--- a/modules/ocl/doc/introduction.rst
+++ b/modules/ocl/doc/introduction.rst
@@ -6,53 +6,68 @@ OpenCL Module Introduction
 General Information
 -------------------
 
-The OpenCV OCL module contains  a set of classes and functions that implement and accelerate select openCV functionality on OpenCL compatible devices. OpenCL is a Khronos standard, implemented by a variety of devices (CPUs, GPUs, FPGAs, ARM), abstracting the exact hardware details, while enabling vendors to provide native implementation for maximal acceleration on their hardware. The standard enjoys wide industry support, and the end user of the module will enjoy the data parallelism benefits that the specific platform/hardware may be capable of, in a platform/hardware independent manner.
+The OpenCV OCL module contains a set of classes and functions that implement and accelerate OpenCV functionality on OpenCL compatible devices. OpenCL is a Khronos standard, implemented by a variety of devices (CPUs, GPUs, FPGAs, ARM), abstracting the exact hardware details, while enabling vendors to provide native implementation for maximal acceleration on their hardware. The standard enjoys wide industry support, and the end user of the module will enjoy the data parallelism benefits that the specific platform/hardware may be capable of, in a platform/hardware independent manner.
 
-While in the future we hope to validate (and enable) the OCL module in all OpenCL capable devices, we currently develop and test on GPU devices only. This includes both discrete GPUs (NVidia, AMD), as well as integrated chips(AMD APU and intel HD devices). Performance of any particular algorithm will depend on the particular platform characteristics and capabilities. However, currently (as of 2.4.4), accuracy and  mathematical correctness has been verified to be identical to that of the pure CPU implementation on all tested GPU devices and platforms (both windows and linux).
+While in the future we hope to validate (and enable) the OCL module in all OpenCL capable devices, we currently develop and test on GPU devices only. This includes both discrete GPUs (NVidia, AMD), as well as integrated chips (AMD APU and Intel HD devices). Performance of any particular algorithm will depend on the particular platform characteristics and capabilities. However, currently, accuracy and  mathematical correctness has been verified to be identical to that of the pure CPU implementation on all tested GPU devices and platforms (both Windows and Linux).
 
 
-The OpenCV OCL module includes utility functions, low-level vision primitives, and high-level algorithms. The utility functions and low-level primitives provide a powerful infrastructure for developing fast vision algorithms taking advangtage of OCL whereas the high-level functionality (samples)includes some state-of-the-art algorithms (including LK Optical flow, and Face detection) ready to be used by the application developers. The module is also accompanied by an extensive performance and accuracy test suite.
+The OpenCV OCL module includes utility functions, low-level vision primitives, and high-level algorithms. The utility functions and low-level primitives provide a powerful infrastructure for developing fast vision algorithms taking advantage of OCL, whereas the high-level functionality (samples) includes some state-of-the-art algorithms (including LK Optical flow, and Face detection) ready to be used by the application developers. The module is also accompanied by an extensive performance and accuracy test suite.
 
-The OpenCV OCL module is designed for ease of use and does not require any knowledge of OpenCL. At a minimuml level, it can be viewed as a set of accelerators, that can take advantage of the high compute throughput that GPU/APU devices can provide. However, it can also be viewed as a starting point to really integratethe  built-in functionality with your own custom OpenCL kernels, with or without modifying the source of OpenCV-OCL. Of course, knowledge of OpenCL will certainly help, however we hope that OpenCV-OCL module, and  the kernels it contains in source code, can be very useful as a means of actually learning openCL. Such a knowledge would be necessary to further fine-tune any of the existing OpenCL kernels, or for extending the framework with new kernels. As of OpenCV 2.4.4, we introduce interoperability with OpenCL, enabling easy use of custom OpenCL kernels within the OpenCV framework.
+The OpenCV OCL module is designed for ease of use and does not require any knowledge of OpenCL. At a minimum level, it can be viewed as a set of accelerators, that can take advantage of the high compute throughput that GPU/APU devices can provide. However, it can also be viewed as a starting point to really integrate the built-in functionality with your own custom OpenCL kernels, with or without modifying the source of OpenCV-OCL. Of course, knowledge of OpenCL will certainly help, however we hope that OpenCV-OCL module, and the kernels it contains in source code, can be very useful as a means of actually learning openCL. Such a knowledge would be necessary to further fine-tune any of the existing OpenCL kernels, or for extending the framework with new kernels. As of OpenCV 2.4.4, we introduce interoperability with OpenCL, enabling easy use of custom OpenCL kernels within the OpenCV framework.
 
-To use the OCL module, you need to make sure that you have the OpenCL SDK provided with your device vendor. To correctly run the OCL module, you need to have the OpenCL runtime provide by the device vendor, typically the device driver.
+To correctly run the OCL module, you need to have the OpenCL runtime provided by the device vendor, typically the device driver.
 
-To enable OCL support, configure OpenCV using CMake with WITH\_OPENCL=ON. When the flag is set and if OpenCL SDK is installed, the full-featured OpenCV OCL module is built. Otherwise, the module may be not built. If you have AMD'S FFT and BLAS library, you can select it with WITH\_OPENCLAMDFFT=ON, WITH\_OPENCLAMDBLAS=ON.
+To enable OCL support, configure OpenCV using CMake with ``WITH_OPENCL=ON``. When the flag is set and if OpenCL SDK is installed, the full-featured OpenCV OCL module is built. Otherwise, the module may be not built. If you have AMD'S FFT and BLAS library, you can select it with ``WITH_OPENCLAMDFFT=ON``, ``WITH_OPENCLAMDBLAS=ON``.
 
-The ocl module can be found under the "modules" directory. In "modules/ocl/src" you can find the source code for the cpp class that wrap around the direct kernel invocation. The kernels themselves can be found in "modules/ocl/src/kernels."  Samples can be found under "samples/ocl." Accuracy tests can be found in "modules/ocl/test," and performance tests under "module/ocl/perf."
+The ocl module can be found under the "modules" directory. In "modules/ocl/src" you can find the source code for the cpp class that wrap around the direct kernel invocation. The kernels themselves can be found in "modules/ocl/src/opencl".  Samples can be found under "samples/ocl". Accuracy tests can be found in "modules/ocl/test", and performance tests under "module/ocl/perf".
 
 
 
+Right now, the user can select OpenCL device by specifying the environment variable ``OPENCV_OPENCL_DEVICE``. Variable format:
 
-Right now, the user should define the cv::ocl::Info class in the application and call cv::ocl::getDevice before any cv::ocl::func. This operation initialize OpenCL runtime and set the first found device as computing device. If there are more than one device and you want to use undefault device, you can call cv::ocl::setDevice then.
+.. code-block:: cpp
 
-In the current version, all the thread share the same context and device so the multi-devices are not supported. We will add this feature soon. If a function support 4-channel operator, it should support 3-channel operator as well, because All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in oclMat. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
+    <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<DeviceName or ID>
+
+**Note:** Device ID range is: 0..9 (only one digit, 10 - it is a part of name)
+
+Samples:
+
+.. code-block:: cpp
+
+    '' = ':' = '::' = ':GPU|CPU:'
+    'AMD:GPU|CPU:'
+    'AMD::Tahiti'
+    ':GPU:1'
+    ':CPU:2'
+
+Also the user can use ``cv::ocl::setDevice`` function (with ``cv::ocl::getOpenCLPlatforms`` and ``cv::ocl::getOpenCLDevices``). This function initializes OpenCL runtime and setup the passed device as computing device.
+
+In the current version, all the thread share the same context and device so the multi-devices are not supported. We will add this feature soon. If a function support 4-channel operator, it should support 3-channel operator as well, because All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in ``oclMat``. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and ``oclMat``.
 
 Developer Notes
 -------------------
 
-In a heterogeneous device environment, there may be cost associated with data transfer. This would be the case, for example, when data needs to be moved from host memory (accessible to the CPU), to device memory (accessible to a discrete GPU). in the case of integrated graphics chips, there may be performance issues, relating to memory coherency between access from the GPU "part" of the integrated device, or the CPU "part." For best performance, in either case, it is recommended that you do not introduce dat transfers between CPU and the discrete GPU, except in the beginning and the end of the algorithmic pipeline.
+In a heterogeneous device environment, there may be cost associated with data transfer. This would be the case, for example, when data needs to be moved from host memory (accessible to the CPU), to device memory (accessible to a discrete GPU). in the case of integrated graphics chips, there may be performance issues, relating to memory coherency between access from the GPU "part" of the integrated device, or the CPU "part." For best performance, in either case, it is recommended that you do not introduce data transfers between CPU and the discrete GPU, except in the beginning and the end of the algorithmic pipeline.
 
 Some tidbits:
 
 1. OpenCL version should be larger than 1.1 with FULL PROFILE.
 
-2. Currently (2.4.4) the user call the cv::ocl::getDevice before any other function in the ocl module. This will initialize the OpenCL runtime and set the first found device as computing device. If there are more than one device and you want to use undefault device, you can call cv::ocl::setDevice thereafter.
-
 2. Currently there's only one OpenCL context and command queue. We hope to implement multi device and multi queue support in the future.
 
 3. Many kernels use 256 as its workgroup size if possible, so the max work group size of the device must larger than 256. All GPU devices we are aware of indeed support 256 workitems in a workgroup, however non GPU devices may not. This will be improved in the future.
 
-4. If the device does not support double arithetic, we revert to float.
+4. If the device does not support double arithmetic, then functions' implementation generates an error.
 
-5. The oclMat uses buffer object, not image object.
+5. The ``oclMat`` uses buffer object, not image object.
 
-6. All the 3-channel matrices(i.e. RGB image) are represented by 4-channel matrices in oclMat, with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
+6. All the 3-channel matrices (i.e. RGB image) are represented by 4-channel matrices in ``oclMat``, with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and ``oclMat``.
 
-7. All the matrix in oclMat is aligned in column(now the alignment factor is 32 byte). It means, if a matrix is n columns m rows with the element size x byte, we will assign ALIGNMENT(x*n) bytes for each column with the last ALIGNMENT(x*n) - x*n bytes unused, so there's small holes after each column if its size is not the multiply of ALIGN.
+7. All the matrix in ``oclMat`` is aligned in column (now the alignment factor for ``step`` is 32+ byte). It means, m.cols * m.elemSize() <= m.step.
 
-8. Data transfer between Mat and oclMat: If the CPU matrix is aligned in column, we will use faster API to transfer between Mat and oclMat, otherwise, we will use clEnqueueRead/WriteBufferRect to transfer data to guarantee the alignment. 3-channel matrix is an exception, it's directly transferred to a temp buffer and then padded to 4-channel matrix(also aligned) when uploading and do the reverse operation when downloading.
+8. Data transfer between Mat and ``oclMat``: If the CPU matrix is aligned in column, we will use faster API to transfer between Mat and ``oclMat``, otherwise, we will use clEnqueueRead/WriteBufferRect to transfer data to guarantee the alignment. 3-channel matrix is an exception, it's directly transferred to a temp buffer and then padded to 4-channel matrix(also aligned) when uploading and do the reverse operation when downloading.
 
-9. Data transfer between Mat and oclMat: ROI is a feature of OpenCV, which allow users process a sub rectangle of a matrix. When a CPU matrix which has ROI will be transfered to GPU, the whole matrix will be transfered and set ROI as CPU's. In a word, we always transfer the whole matrix despite whether it has ROI or not.
+9. Data transfer between Mat and ``oclMat``: ROI is a feature of OpenCV, which allow users process a sub rectangle of a matrix. When a CPU matrix which has ROI will be transfered to GPU, the whole matrix will be transfered and set ROI as CPU's. In a word, we always transfer the whole matrix despite whether it has ROI or not.
 
-10. All the kernel file should locate in ocl/src/kernels/ with the extension ".cl". ALL the kernel files are transformed to pure characters at compilation time in kernels.cpp, and the file name without extension is the name of the characters.
+10. All the kernel file should locate in "modules/ocl/src/opencl/" with the extension ".cl". All the kernel files are transformed to pure characters at compilation time in opencl_kernels.cpp, and the file name without extension is the name of the program sources.
diff --git a/modules/ocl/doc/video_analysis.rst b/modules/ocl/doc/video_analysis.rst
index 599c0f9b63..70ff66b0be 100644
--- a/modules/ocl/doc/video_analysis.rst
+++ b/modules/ocl/doc/video_analysis.rst
@@ -117,7 +117,6 @@ Computes a dense optical flow using the Gunnar Farneback's algorithm.
     :param frame1: Second 8-bit gray-scale input image
     :param flowx: Flow horizontal component
     :param flowy: Flow vertical component
-    :param s: Stream
 
 .. seealso:: :ocv:func:`calcOpticalFlowFarneback`
 
@@ -230,8 +229,6 @@ Interpolates frames (images) using provided optical flow (displacement field).
 
     :param buf: Temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat: occlusion masks for first frame, occlusion masks for second, interpolated forward horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, interpolated backward vertical flow.
 
-    :param stream: Stream for the asynchronous version.
-
 ocl::KalmanFilter
 --------------------
 .. ocv:class:: ocl::KalmanFilter
@@ -418,8 +415,6 @@ Updates the background model and returns the foreground mask.
 
     :param fgmask: The output foreground mask as an 8-bit binary image.
 
-    :param stream: Stream for the asynchronous version.
-
 
 ocl::MOG::getBackgroundImage
 --------------------------------
@@ -429,8 +424,6 @@ Computes a background image.
 
     :param backgroundImage: The output background image.
 
-    :param stream: Stream for the asynchronous version.
-
 
 ocl::MOG::release
 ---------------------
@@ -443,7 +436,9 @@ ocl::MOG2
 -------------
 .. ocv:class:: ocl::MOG2 : public ocl::BackgroundSubtractor
 
-Gaussian Mixture-based Background/Foreground Segmentation Algorithm. ::
+  Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
+
+  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_. ::
 
     class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
     {
@@ -485,45 +480,42 @@ Gaussian Mixture-based Background/Foreground Segmentation Algorithm. ::
         /* hidden */
     };
 
-  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_.
+  .. ocv:member:: float backgroundRatio
 
-  Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
+      Threshold defining whether the component is significant enough to be included into the background model. ``cf=0.1 => TB=0.9`` is default. For ``alpha=0.001``, it means that the mode should exist for approximately 105 frames before it is considered foreground.
 
-    .. ocv:member:: float backgroundRatio
+  .. ocv:member:: float varThreshold
 
-        Threshold defining whether the component is significant enough to be included into the background model. ``cf=0.1 => TB=0.9`` is default. For ``alpha=0.001``, it means that the mode should exist for approximately 105 frames before it is considered foreground.
+      Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the existing components (corresponds to ``Tg``). If it is not close to any component, a new component is generated. ``3 sigma => Tg=3*3=9`` is default. A smaller ``Tg`` value generates more components. A higher ``Tg`` value may result in a small number of components but they can grow too large.
 
-    .. ocv:member:: float varThreshold
+  .. ocv:member:: float fVarInit
 
-        Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the existing components (corresponds to ``Tg``). If it is not close to any component, a new component is generated. ``3 sigma => Tg=3*3=9`` is default. A smaller ``Tg`` value generates more components. A higher ``Tg`` value may result in a small number of components but they can grow too large.
+      Initial variance for the newly generated components. It affects the speed of adaptation. The parameter value is based on your estimate of the typical standard deviation from the images. OpenCV uses 15 as a reasonable value.
 
-    .. ocv:member:: float fVarInit
+  .. ocv:member:: float fVarMin
 
-        Initial variance for the newly generated components. It affects the speed of adaptation. The parameter value is based on your estimate of the typical standard deviation from the images. OpenCV uses 15 as a reasonable value.
+      Parameter used to further control the variance.
 
-    .. ocv:member:: float fVarMin
+  .. ocv:member:: float fVarMax
 
-        Parameter used to further control the variance.
+      Parameter used to further control the variance.
 
-    .. ocv:member:: float fVarMax
+  .. ocv:member:: float fCT
 
-        Parameter used to further control the variance.
+      Complexity reduction parameter. This parameter defines the number of samples needed to accept to prove the component exists. ``CT=0.05`` is a default value for all the samples. By setting ``CT=0`` you get an algorithm very similar to the standard Stauffer&Grimson algorithm.
 
-    .. ocv:member:: float fCT
+  .. ocv:member:: uchar nShadowDetection
 
-        Complexity reduction parameter. This parameter defines the number of samples needed to accept to prove the component exists. ``CT=0.05`` is a default value for all the samples. By setting ``CT=0`` you get an algorithm very similar to the standard Stauffer&Grimson algorithm.
+      The value for marking shadow pixels in the output foreground mask. Default value is 127.
 
-    .. ocv:member:: uchar nShadowDetection
+  .. ocv:member:: float fTau
 
-        The value for marking shadow pixels in the output foreground mask. Default value is 127.
+      Shadow threshold. The shadow is detected if the pixel is a darker version of the background. ``Tau`` is a threshold defining how much darker the shadow can be. ``Tau= 0.5`` means that if a pixel is more than twice darker then it is not shadow. See [ShadowDetect2003]_.
 
-    .. ocv:member:: float fTau
+  .. ocv:member:: bool bShadowDetection
 
-        Shadow threshold. The shadow is detected if the pixel is a darker version of the background. ``Tau`` is a threshold defining how much darker the shadow can be. ``Tau= 0.5`` means that if a pixel is more than twice darker then it is not shadow. See [ShadowDetect2003]_.
+      Parameter defining whether shadow detection should be enabled.
 
-    .. ocv:member:: bool bShadowDetection
-
-        Parameter defining whether shadow detection should be enabled.
 
 .. seealso:: :ocv:class:`BackgroundSubtractorMOG2`
 
@@ -549,8 +541,6 @@ Updates the background model and returns the foreground mask.
 
     :param fgmask: The output foreground mask as an 8-bit binary image.
 
-    :param stream: Stream for the asynchronous version.
-
 
 ocl::MOG2::getBackgroundImage
 ---------------------------------
@@ -560,11 +550,9 @@ Computes a background image.
 
     :param backgroundImage: The output background image.
 
-    :param stream: Stream for the asynchronous version.
-
 
 ocl::MOG2::release
 ----------------------
 Releases all inner buffer's memory.
 
-.. ocv:function:: void ocl::MOG2::release()
\ No newline at end of file
+.. ocv:function:: void ocl::MOG2::release()

From 3952a0df44ea69906f17040d22e0dca903d095eb Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Thu, 31 Oct 2013 15:05:00 +0400
Subject: [PATCH 18/28] ocl: update comments in ocl.hpp

---
 modules/ocl/include/opencv2/ocl/ocl.hpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index af24f0aca2..5dd4d053c4 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -308,16 +308,13 @@ namespace cv
             void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
 
             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
-            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 
             void assignTo( oclMat &m, int type = -1 ) const;
 
             //! sets every oclMatrix element to s
-            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
             oclMat& operator = (const Scalar &s);
             //! sets some of the oclMatrix elements to s, according to the mask
-            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
             //! creates alternative oclMatrix header for the same data, with different
             // number of channels and/or different number of rows. see cvReshape.

From 53d1873776ad119b376b77d08bb48bfaca2d8e2c Mon Sep 17 00:00:00 2001
From: peng xiao <hisenxpress@gmail.com>
Date: Wed, 6 Nov 2013 11:19:26 +0800
Subject: [PATCH 19/28] Revert back test image.

---
 modules/ocl/perf/perf_haar.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp
index 1e6ba1b646..b7a1dd1a43 100644
--- a/modules/ocl/perf/perf_haar.cpp
+++ b/modules/ocl/perf/perf_haar.cpp
@@ -52,7 +52,7 @@ PERF_TEST(HaarFixture, Haar)
 {
     vector<Rect> faces;
 
-    Mat img = imread(getDataPath("gpu/haarcascade/group_1_640x480_VGA.pgm"), CV_LOAD_IMAGE_GRAYSCALE);
+    Mat img = imread(getDataPath("gpu/haarcascade/basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
     ASSERT_TRUE(!img.empty()) << "can't open basketball1.png";
     declare.in(img);
 

From 03646e7e01899ecc96f196b5dd92f8dfc41f69a1 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Wed, 6 Nov 2013 13:20:02 +0400
Subject: [PATCH 20/28] ocl: workaround for subbuffer memory leaks

---
 modules/ocl/src/cl_operations.cpp | 77 +++++++++++++++++++------------
 1 file changed, 47 insertions(+), 30 deletions(-)

diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp
index d96f3470ec..7ed1a79c8d 100644
--- a/modules/ocl/src/cl_operations.cpp
+++ b/modules/ocl/src/cl_operations.cpp
@@ -148,35 +148,52 @@ void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch,
 {
     cl_int status;
     size_t size = widthInBytes * height;
+    bool useSubBuffers =
 #ifndef MEMORY_CORRUPTION_GUARD
-    *dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
-                              size, 0, &status);
-    openCLVerifyCall(status);
+            false;
 #else
-    size_t allocSize = size + __memory_corruption_guard_bytes * 2;
-    cl_mem mainBuffer = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
-            allocSize, 0, &status);
-    openCLVerifyCall(status);
-    cl_buffer_region r = {__memory_corruption_guard_bytes, size};
-    *dev_ptr =  clCreateSubBuffer(mainBuffer,
-            gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
-            CL_BUFFER_CREATE_TYPE_REGION, &r,
-            &status);
-    openCLVerifyCall(status);
-#ifdef CHECK_MEMORY_CORRUPTION
-    std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
-            __memory_corruption_check_pattern);
-    CV_Assert(tmp.size() * sizeof(int) == __memory_corruption_guard_bytes);
-    openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
-            mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &tmp[0],
-            0, NULL, NULL));
-    openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
-            mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + size, __memory_corruption_guard_bytes, &tmp[0],
-            0, NULL, NULL));
-    clFinish(getClCommandQueue(ctx));
+            true;
 #endif
-    CheckBuffers data(mainBuffer, size, widthInBytes, height);
-    __check_buffers.insert(std::pair<cl_mem, CheckBuffers>((cl_mem)*dev_ptr, data));
+    const DeviceInfo& devInfo = ctx->getDeviceInfo();
+    if (useSubBuffers && devInfo.isIntelDevice)
+    {
+        useSubBuffers = false; // TODO FIXIT We observe memory leaks then we working with sub-buffers
+                               // on the CPU device of Intel OpenCL SDK (Linux). We will investigate this later.
+    }
+    if (!useSubBuffers)
+    {
+        *dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+                                  size, 0, &status);
+        openCLVerifyCall(status);
+    }
+#ifdef MEMORY_CORRUPTION_GUARD
+    else
+    {
+        size_t allocSize = size + __memory_corruption_guard_bytes * 2;
+        cl_mem mainBuffer = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+                allocSize, 0, &status);
+        openCLVerifyCall(status);
+        cl_buffer_region r = {__memory_corruption_guard_bytes, size};
+        *dev_ptr = clCreateSubBuffer(mainBuffer,
+                gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+                CL_BUFFER_CREATE_TYPE_REGION, &r,
+                &status);
+        openCLVerifyCall(status);
+#ifdef CHECK_MEMORY_CORRUPTION
+        std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
+                __memory_corruption_check_pattern);
+        CV_Assert(tmp.size() * sizeof(int) == __memory_corruption_guard_bytes);
+        openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
+                mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &tmp[0],
+                0, NULL, NULL));
+        openCLVerifyCall(clEnqueueWriteBuffer(getClCommandQueue(ctx),
+                mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + size, __memory_corruption_guard_bytes, &tmp[0],
+                0, NULL, NULL));
+        clFinish(getClCommandQueue(ctx));
+#endif
+        CheckBuffers data(mainBuffer, size, widthInBytes, height);
+        __check_buffers.insert(std::pair<cl_mem, CheckBuffers>((cl_mem)*dev_ptr, data));
+    }
 #endif
     *pitch = widthInBytes;
 }
@@ -230,6 +247,7 @@ void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset,
 
 void openCLFree(void *devPtr)
 {
+    openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
 #ifdef MEMORY_CORRUPTION_GUARD
 #ifdef CHECK_MEMORY_CORRUPTION
     bool failBefore = false, failAfter = false;
@@ -270,9 +288,7 @@ void openCLFree(void *devPtr)
         openCLSafeCall(clReleaseMemObject(data.mainBuffer));
         __check_buffers.erase(i);
     }
-#endif
-    openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
-#if defined(MEMORY_CORRUPTION_GUARD) && defined(CHECK_MEMORY_CORRUPTION)
+#if defined(CHECK_MEMORY_CORRUPTION)
     if (failBefore)
     {
 #ifdef CHECK_MEMORY_CORRUPTION_PRINT_ERROR
@@ -291,7 +307,8 @@ void openCLFree(void *devPtr)
         CV_Error(CV_StsInternal, "Memory corruption detected: after buffer");
 #endif
     }
-#endif
+#endif // CHECK_MEMORY_CORRUPTION
+#endif // MEMORY_CORRUPTION_GUARD
 }
 
 cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName)

From 24f369c4ac546b5ce6126de8b7a3f6ebf7d4f49b Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Wed, 6 Nov 2013 14:24:18 +0400
Subject: [PATCH 21/28] Android Manager Version++.

---
 .../android_binary_package/O4A_SDK.rst        |  4 +--
 .../service/engine/AndroidManifest.xml        |  4 +--
 platforms/android/service/readme.txt          | 28 +++++++++----------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
index df18e19c57..27dd815817 100644
--- a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
+++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
@@ -51,7 +51,7 @@ The structure of package contents looks as follows:
     OpenCV-2.4.7-android-sdk
     |_ apk
     |   |_ OpenCV_2.4.7_binary_pack_armv7a.apk
-    |   |_ OpenCV_2.4.7_Manager_2.13_XXX.apk
+    |   |_ OpenCV_2.4.7_Manager_2.14_XXX.apk
     |
     |_ doc
     |_ samples
@@ -295,7 +295,7 @@ Well, running samples from Eclipse is very simple:
   .. code-block:: sh
     :linenos:
 
-    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.7_Manager_2.13_armv7a-neon.apk
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.7_Manager_2.14_armv7a-neon.apk
 
   .. note:: ``armeabi``, ``armv7a-neon``, ``arm7a-neon-android8``, ``mips`` and ``x86`` stand for
             platform targets:
diff --git a/platforms/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml
index 8d7894797e..162d31eb02 100644
--- a/platforms/android/service/engine/AndroidManifest.xml
+++ b/platforms/android/service/engine/AndroidManifest.xml
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
     package="org.opencv.engine"
-    android:versionCode="213@ANDROID_PLATFORM_VERSION_CODE@"
-    android:versionName="2.13" >
+    android:versionCode="214@ANDROID_PLATFORM_VERSION_CODE@"
+    android:versionName="2.14" >
 
     <uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" />
     <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
diff --git a/platforms/android/service/readme.txt b/platforms/android/service/readme.txt
index 6255df5e93..1e757a0e5f 100644
--- a/platforms/android/service/readme.txt
+++ b/platforms/android/service/readme.txt
@@ -14,20 +14,20 @@ manually using adb tool:
 
 .. code-block:: sh
 
-    adb install OpenCV-2.4.6-android-sdk/apk/OpenCV_2.4.6_Manager_2.9_<platform>.apk
+    adb install OpenCV-2.4.7-android-sdk/apk/OpenCV_2.4.7_Manager_2.14_<platform>.apk
 
 Use the table below to determine proper OpenCV Manager package for your device:
 
-+------------------------------+--------------+---------------------------------------------------+
-| Hardware Platform            | Android ver. | Package name                                      |
-+==============================+==============+===================================================+
-| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_armv7a-neon.apk          |
-+------------------------------+--------------+---------------------------------------------------+
-| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.6_Manager_2.9_armv7a-neon-android8.apk |
-+------------------------------+--------------+---------------------------------------------------+
-| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_armeabi.apk              |
-+------------------------------+--------------+---------------------------------------------------+
-| Intel x86                    |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_x86.apk                  |
-+------------------------------+--------------+---------------------------------------------------+
-| MIPS                         |    >= 2.3    | OpenCV_2.4.6_Manager_2.9_mips.apk                 |
-+------------------------------+--------------+---------------------------------------------------+
++------------------------------+--------------+----------------------------------------------------+
+| Hardware Platform            | Android ver. | Package name                                       |
++==============================+==============+====================================================+
+| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.7_Manager_2.14_armv7a-neon.apk          |
++------------------------------+--------------+----------------------------------------------------+
+| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.7_Manager_2.14_armv7a-neon-android8.apk |
++------------------------------+--------------+----------------------------------------------------+
+| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.7_Manager_2.14_armeabi.apk              |
++------------------------------+--------------+----------------------------------------------------+
+| Intel x86                    |    >= 2.3    | OpenCV_2.4.7_Manager_2.14_x86.apk                  |
++------------------------------+--------------+----------------------------------------------------+
+| MIPS                         |    >= 2.3    | OpenCV_2.4.7_Manager_2.14_mips.apk                 |
++------------------------------+--------------+----------------------------------------------------+

From 5304e9f259268a1e85841ba7c162440b3780c5b3 Mon Sep 17 00:00:00 2001
From: Konstantin Matskevich <konstantin.matskevich@itseez.com>
Date: Wed, 6 Nov 2013 12:17:06 +0400
Subject: [PATCH 22/28] extending openCL info dump

---
 .../opencv2/ocl/private/opencl_dumpinfo.hpp   | 61 ++++++++++++++-----
 modules/ocl/perf/main.cpp                     | 12 ++++
 2 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp b/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
index beb3d27525..942fdf4539 100644
--- a/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
@@ -39,7 +39,7 @@
 //
 //M*/
 
-#if !defined(DUMP_INFO_STDOUT) && !defined(DUMP_INFO_XML)
+#if !defined(DUMP_INFO_STDOUT) && !defined(DUMP_INFO_XML) && !defined(DUMP_DEVICES_INFO_STDOUT) && !defined(DUMP_DEVICES_INFO_XML)
 #error Invalid usage
 #endif
 
@@ -51,6 +51,14 @@
 #define DUMP_INFO_XML(...)
 #endif
 
+#if !defined(DUMP_DEVICES_INFO_STDOUT)
+#define DUMP_DEVICES_INFO_STDOUT(...)
+#endif
+
+#if !defined(DUMP_DEVICES_INFO_XML)
+#define DUMP_DEVICES_INFO_XML(...)
+#endif
+
 #include <sstream>
 
 static std::string bytesToStringRepr(size_t value)
@@ -85,43 +93,64 @@ static void dumpOpenCLDevice()
     using namespace cv::ocl;
     try
     {
+        cv::ocl::PlatformsInfo platforms;
+        cv::ocl::getOpenCLPlatforms(platforms);
+        DUMP_INFO_STDOUT("OpenCL Platforms","");
+        DUMP_INFO_XML("OpenCL Platforms","");
+        const char* deviceTypeStr;
+        for(unsigned int i=0; i < platforms.size(); i++)
+        {
+            DUMP_INFO_STDOUT("    ", platforms.at(i)->platformName);
+            DUMP_INFO_XML("", platforms.at(i)->platformName);
+            cv::ocl::DevicesInfo devices;
+            cv::ocl::getOpenCLDevices(devices);
+            for(unsigned int j=0; j < devices.size(); j++)
+            {
+                deviceTypeStr = devices.at(j)->deviceType == CVCL_DEVICE_TYPE_CPU
+                            ? ("CPU") : (devices.at(j)->deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
+                DUMP_DEVICES_INFO_STDOUT(deviceTypeStr, j, devices.at(j)->deviceName, devices.at(j)->deviceVersion);
+                DUMP_DEVICES_INFO_XML(deviceTypeStr, j, devices.at(j)->deviceName, devices.at(j)->deviceVersion);
+            }
+        }
+        DUMP_INFO_STDOUT("Current OpenCL device","");
+        DUMP_INFO_XML("Current OpenCL device","");
+
         const cv::ocl::DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo();
 
-        const char* deviceTypeStr = deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU
-                        ? "CPU" :
-                        (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
-        DUMP_INFO_STDOUT("Device type", deviceTypeStr);
-        DUMP_INFO_XML("cv_ocl_deviceType", deviceTypeStr);
-
-        DUMP_INFO_STDOUT("Platform name", deviceInfo.platform->platformName);
+        DUMP_INFO_STDOUT("    Platform", deviceInfo.platform->platformName);
         DUMP_INFO_XML("cv_ocl_platformName", deviceInfo.platform->platformName);
 
-        DUMP_INFO_STDOUT("Device name", deviceInfo.deviceName);
+        deviceTypeStr = deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU
+                        ? "CPU" : (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
+        DUMP_INFO_STDOUT("    Type", deviceTypeStr);
+        DUMP_INFO_XML("cv_ocl_deviceType", deviceTypeStr);
+
+        DUMP_INFO_STDOUT("    Name", deviceInfo.deviceName);
         DUMP_INFO_XML("cv_ocl_deviceName", deviceInfo.deviceName);
 
-        DUMP_INFO_STDOUT("Device version", deviceInfo.deviceVersion);
+        DUMP_INFO_STDOUT("    Version", deviceInfo.deviceVersion);
         DUMP_INFO_XML("cv_ocl_deviceVersion", deviceInfo.deviceVersion);
 
-        DUMP_INFO_STDOUT("Compute units", deviceInfo.maxComputeUnits);
+        DUMP_INFO_STDOUT("    Compute units", deviceInfo.maxComputeUnits);
         DUMP_INFO_XML("cv_ocl_maxComputeUnits", deviceInfo.maxComputeUnits);
 
-        DUMP_INFO_STDOUT("Max work group size", deviceInfo.maxWorkGroupSize);
+        DUMP_INFO_STDOUT("    Max work group size", deviceInfo.maxWorkGroupSize);
         DUMP_INFO_XML("cv_ocl_maxWorkGroupSize", deviceInfo.maxWorkGroupSize);
 
         std::string localMemorySizeStr = bytesToStringRepr(deviceInfo.localMemorySize);
-        DUMP_INFO_STDOUT("Local memory size", localMemorySizeStr.c_str());
+        DUMP_INFO_STDOUT("    Local memory size", localMemorySizeStr.c_str());
         DUMP_INFO_XML("cv_ocl_localMemorySize", deviceInfo.localMemorySize);
 
         std::string maxMemAllocSizeStr = bytesToStringRepr(deviceInfo.maxMemAllocSize);
-        DUMP_INFO_STDOUT("Max memory allocation size", maxMemAllocSizeStr.c_str());
+        DUMP_INFO_STDOUT("    Max memory allocation size", maxMemAllocSizeStr.c_str());
         DUMP_INFO_XML("cv_ocl_maxMemAllocSize", deviceInfo.maxMemAllocSize);
 
         const char* doubleSupportStr = deviceInfo.haveDoubleSupport ? "Yes" : "No";
-        DUMP_INFO_STDOUT("Double support", doubleSupportStr);
+        DUMP_INFO_STDOUT("    Double support", doubleSupportStr);
         DUMP_INFO_XML("cv_ocl_haveDoubleSupport", deviceInfo.haveDoubleSupport);
 
         const char* isUnifiedMemoryStr = deviceInfo.isUnifiedMemory ? "Yes" : "No";
-        DUMP_INFO_STDOUT("Unified memory", isUnifiedMemoryStr);
+        DUMP_INFO_STDOUT("    Unified memory", isUnifiedMemoryStr);
         DUMP_INFO_XML("cv_ocl_isUnifiedMemory", deviceInfo.isUnifiedMemory);
     }
     catch (...)
diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp
index 836f8ee9bd..a5e2386835 100644
--- a/modules/ocl/perf/main.cpp
+++ b/modules/ocl/perf/main.cpp
@@ -53,6 +53,18 @@
         ::testing::Test::RecordProperty((propertyXMLName), ss.str()); \
     } while (false)
 
+#define DUMP_DEVICES_INFO_STDOUT(deviceType, deviceIndex, deviceName, deviceVersion) \
+    do { \
+        std::cout << "        " << (deviceType) << " " << (deviceIndex) << " : " << (deviceName) << " : " << deviceVersion << std::endl; \
+    } while (false)
+
+#define DUMP_DEVICES_INFO_XML(deviceType, deviceIndex, deviceName, deviceVersion) \
+    do { \
+        std::stringstream ss; \
+        ss << ":" << deviceIndex << ":" << deviceName << ":" << deviceVersion; \
+        ::testing::Test::RecordProperty((deviceType), ss.str()); \
+    } while (false)
+
 #include "opencv2/ocl/private/opencl_dumpinfo.hpp"
 
 static const char * impls[] =

From a1de91a4fd9362af7ce07386816a03206cfce51b Mon Sep 17 00:00:00 2001
From: Harris Gasparakis <harris.gasparakis@amd.com>
Date: Tue, 5 Nov 2013 07:04:04 -0500
Subject: [PATCH 23/28] Cleaned up adaptive bilateral filtering, added support
 for gaussian interpolation, updated sample and docs

---
 modules/imgproc/doc/filtering.rst             | 17 ++--
 .../include/opencv2/imgproc/imgproc.hpp       |  2 +-
 modules/imgproc/src/smooth.cpp                | 85 +++++++++++++++----
 modules/ocl/doc/image_filtering.rst           | 12 ++-
 modules/ocl/include/opencv2/ocl/ocl.hpp       |  9 +-
 modules/ocl/src/filtering.cpp                 | 30 +++++--
 .../opencl/filtering_adaptive_bilateral.cl    | 63 +++++++++-----
 modules/ocl/test/test_filters.cpp             |  4 +-
 samples/ocl/adaptive_bilateral_filter.cpp     | 31 ++++---
 9 files changed, 173 insertions(+), 80 deletions(-)

diff --git a/modules/imgproc/doc/filtering.rst b/modules/imgproc/doc/filtering.rst
index 1816c6a439..efab258d4a 100755
--- a/modules/imgproc/doc/filtering.rst
+++ b/modules/imgproc/doc/filtering.rst
@@ -416,24 +416,23 @@ adaptiveBilateralFilter
 -----------------------
 Applies the adaptive bilateral filter to an image.
 
-.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT )
+.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, double maxSigmaColor = 20.0, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT )
 
 .. ocv:pyfunction:: cv2.adaptiveBilateralFilter(src, ksize, sigmaSpace[, dst[, anchor[, borderType]]]) -> dst
 
-    :param src: Source 8-bit, 1-channel or 3-channel image.
+    :param src: The source image
 
-    :param dst: Destination image of the same size and type as  ``src`` .
+    :param dst: The destination image; will have the same size and the same type as src
 
-    :param ksize: filter kernel size.
+    :param ksize: The kernel size. This is the neighborhood where the local variance will be calculated, and where pixels will contribute (in a weighted manner).
 
-    :param sigmaSpace: Filter sigma in the coordinate space. It has similar meaning with ``sigmaSpace`` in ``bilateralFilter``.
+    :param sigmaSpace: Filter sigma in the coordinate space. Larger value of the parameter means that farther pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
 
-    :param anchor: anchor point; default value ``Point(-1,-1)`` means that the anchor is at the kernel center. Only default value is supported now.
+    :param maxSigmaColor: Maximum allowed sigma color (will clamp the value calculated in the ksize neighborhood. Larger value of the parameter means that more dissimilar pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
 
-    :param borderType: border mode used to extrapolate pixels outside of the image.
-
-The function applies adaptive bilateral filtering to the input image. This filter is similar to ``bilateralFilter``, in that dissimilarity from and distance to the center pixel is punished. Instead of using ``sigmaColor``, we employ the variance of pixel values in the neighbourhood.
+    :param borderType: Pixel extrapolation method.
 
+A main part of our strategy will be to load each raw pixel once, and reuse it to calculate all pixels in the output (filtered) image that need this pixel value. The math of the filter is that of the usual bilateral filter, except that the sigma color is calculated in the neighborhood, and clamped by the optional input value.
 
 
 blur
diff --git a/modules/imgproc/include/opencv2/imgproc/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc/imgproc.hpp
index 05af41ca99..2fcccfe30d 100644
--- a/modules/imgproc/include/opencv2/imgproc/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc/imgproc.hpp
@@ -400,7 +400,7 @@ CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
                                    int borderType=BORDER_DEFAULT );
 //! smooths the image using adaptive bilateral filter
 CV_EXPORTS_W void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize,
-                                           double sigmaSpace, Point anchor=Point(-1, -1),
+                                           double sigmaSpace, double maxSigmaColor = 20.0, Point anchor=Point(-1, -1),
                                            int borderType=BORDER_DEFAULT );
 //! smooths the image using the box filter. Each pixel is processed in O(1) time
 CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index e7fe8d296f..bbce3deedf 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -2279,15 +2279,24 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,
 
 namespace cv
 {
-#define CALCVAR 1
-#define FIXED_WEIGHT 0
+#ifndef ABF_CALCVAR
+#define ABF_CALCVAR 1
+#endif
+
+#ifndef ABF_FIXED_WEIGHT
+#define ABF_FIXED_WEIGHT 0
+#endif
+
+#ifndef ABF_GAUSSIAN
+#define ABF_GAUSSIAN 1
+#endif
 
 class adaptiveBilateralFilter_8u_Invoker :
     public ParallelLoopBody
 {
 public:
-    adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, Point _anchor) :
-        temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), anchor(_anchor)
+    adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, double _maxSigmaColor, Point _anchor) :
+        temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), maxSigma_Color(_maxSigmaColor), anchor(_anchor)
     {
         if( sigma_space <= 0 )
             sigma_space = 1;
@@ -2300,7 +2309,11 @@ public:
         for(int y=-h; y<=h; y++)
             for(int x=-w; x<=w; x++)
         {
+#if ABF_GAUSSIAN
+            space_weight[idx++] = (float)exp ( -0.5*(x * x + y * y)/sigma2);
+#else
             space_weight[idx++] = (float)(sigma2 / (sigma2 + x * x + y * y));
+#endif
         }
     }
     virtual void operator()(const Range& range) const
@@ -2336,7 +2349,7 @@ public:
                     int startLMJ = 0;
                     int endLMJ  = ksize.width  - 1;
                     int howManyAll = (anX *2 +1)*(ksize.width );
-#if CALCVAR
+#if ABF_CALCVAR
                     for(int x = startLMJ; x< endLMJ; x++)
                     {
                         tptr = temp->ptr(startY + x) +j;
@@ -2348,8 +2361,14 @@ public:
                         }
                     }
                     var = ( (sumValSqr * howManyAll)- sumVal * sumVal )  /  ( (float)(howManyAll*howManyAll));
+
+                    if(var < 0.01)
+                        var = 0.01f;
+                    else if(var > (float)(maxSigma_Color*maxSigma_Color) )
+                        var =  (float)(maxSigma_Color*maxSigma_Color) ;
+
 #else
-                    var = 900.0;
+                    var = maxSigmaColor*maxSigmaColor;
 #endif
                     startLMJ = 0;
                     endLMJ = ksize.width;
@@ -2360,13 +2379,18 @@ public:
                         tptr = temp->ptr(startY + x) +j;
                         for(int y=-anX; y<=anX; y++)
                         {
-#if FIXED_WEIGHT
+#if ABF_FIXED_WEIGHT
                             weight = 1.0;
 #else
                             currVal = tptr[cn*(y+anX)];
                             currWRTCenter = currVal - currValCenter;
 
-                            weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];;
+#if ABF_GAUSSIAN
+                            weight = exp ( -0.5f * currWRTCenter * currWRTCenter/var ) * space_weight[x*ksize.width+y+anX];
+#else
+                            weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];
+#endif
+
 #endif
                             tmpSum += ((float)tptr[cn*(y+anX)] * weight);
                             totalWeight += weight;
@@ -2401,7 +2425,8 @@ public:
                     int startLMJ = 0;
                     int endLMJ  = ksize.width - 1;
                     int howManyAll = (anX *2 +1)*(ksize.width);
-#if CALCVAR
+#if ABF_CALCVAR
+                    float max_var = (float)( maxSigma_Color*maxSigma_Color);
                     for(int x = startLMJ; x< endLMJ; x++)
                     {
                         tptr = temp->ptr(startY + x) +j;
@@ -2416,11 +2441,27 @@ public:
                             sumValSqr_r += (currVal_r *currVal_r);
                         }
                     }
-                    var_b = ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b )  /  ( (float)(howManyAll*howManyAll));
-                    var_g = ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g )  /  ( (float)(howManyAll*howManyAll));
-                    var_r = ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r )  /  ( (float)(howManyAll*howManyAll));
+                    var_b =  ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b )  /  ( (float)(howManyAll*howManyAll));
+                    var_g =  ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g )  /  ( (float)(howManyAll*howManyAll));
+                    var_r =  ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r )  /  ( (float)(howManyAll*howManyAll));
+
+                    if(var_b < 0.01)
+                        var_b = 0.01f;
+                    else if(var_b > max_var )
+                        var_b =  (float)(max_var) ;
+
+                    if(var_g < 0.01)
+                        var_g = 0.01f;
+                    else if(var_g > max_var )
+                        var_g =  (float)(max_var) ;
+
+                    if(var_r < 0.01)
+                        var_r = 0.01f;
+                    else if(var_r > max_var )
+                        var_r =  (float)(max_var) ;
+
 #else
-                    var_b = 900.0; var_g = 900.0;var_r = 900.0;
+                    var_b = maxSigma_Color*maxSigma_Color; var_g = maxSigma_Color*maxSigma_Color; var_r = maxSigma_Color*maxSigma_Color;
 #endif
                     startLMJ = 0;
                     endLMJ = ksize.width;
@@ -2431,7 +2472,7 @@ public:
                         tptr = temp->ptr(startY + x) +j;
                         for(int y=-anX; y<=anX; y++)
                         {
-#if FIXED_WEIGHT
+#if ABF_FIXED_WEIGHT
                             weight_b = 1.0;
                             weight_g = 1.0;
                             weight_r = 1.0;
@@ -2442,9 +2483,16 @@ public:
                             currWRTCenter_r = currVal_r - currValCenter_r;
 
                             float cur_spw = space_weight[x*ksize.width+y+anX];
+
+#if ABF_GAUSSIAN
+                            weight_b = exp( -0.5f * currWRTCenter_b * currWRTCenter_b/ var_b ) * cur_spw;
+                            weight_g = exp( -0.5f * currWRTCenter_g * currWRTCenter_g/ var_g ) * cur_spw;
+                            weight_r = exp( -0.5f * currWRTCenter_r * currWRTCenter_r/ var_r ) * cur_spw;
+#else
                             weight_b = var_b / ( var_b + (currWRTCenter_b * currWRTCenter_b) ) * cur_spw;
                             weight_g = var_g / ( var_g + (currWRTCenter_g * currWRTCenter_g) ) * cur_spw;
                             weight_r = var_r / ( var_r + (currWRTCenter_r * currWRTCenter_r) ) * cur_spw;
+#endif
 #endif
                             tmpSum_b += ((float)tptr[cn*(y+anX)]   * weight_b);
                             tmpSum_g += ((float)tptr[cn*(y+anX)+1] * weight_g);
@@ -2468,10 +2516,11 @@ private:
     Mat *dest;
     Size ksize;
     double sigma_space;
+    double maxSigma_Color;
     Point anchor;
     vector<float> space_weight;
 };
-static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType )
+static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, double maxSigmaColor, Point anchor, int borderType )
 {
     Size size = src.size();
 
@@ -2481,12 +2530,12 @@ static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, do
     Mat temp;
     copyMakeBorder(src, temp, anchor.x, anchor.y, anchor.x, anchor.y, borderType);
 
-    adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, anchor);
+    adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, maxSigmaColor, anchor);
     parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
 }
 }
 void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize,
-                                  double sigmaSpace, Point anchor, int borderType )
+                                  double sigmaSpace, double maxSigmaColor, Point anchor, int borderType )
 {
     Mat src = _src.getMat();
     _dst.create(src.size(), src.type());
@@ -2496,7 +2545,7 @@ void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize,
 
     anchor = normalizeAnchor(anchor,ksize);
     if( src.depth() == CV_8U )
-        adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, anchor, borderType );
+        adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, maxSigmaColor, anchor, borderType );
     else
         CV_Error( CV_StsUnsupportedFormat,
         "Adaptive Bilateral filtering is only implemented for 8u images" );
diff --git a/modules/ocl/doc/image_filtering.rst b/modules/ocl/doc/image_filtering.rst
index cbec29b114..92a6c575f4 100644
--- a/modules/ocl/doc/image_filtering.rst
+++ b/modules/ocl/doc/image_filtering.rst
@@ -497,23 +497,21 @@ ocl::adaptiveBilateralFilter
 --------------------------------
 Returns void
 
-.. ocv:function:: void ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT)
+.. ocv:function:: void ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor = 20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT)
 
     :param src: The source image
 
     :param dst: The destination image; will have the same size and the same type as src
 
-    :param ksize: The kernel size
+    :param ksize: The kernel size. This is the neighborhood where the local variance will be calculated, and where pixels will contribute (in a weighted manner).
 
     :param sigmaSpace: Filter sigma in the coordinate space. Larger value of the parameter means that farther pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
 
+    :param maxSigmaColor: Maximum allowed sigma color (will clamp the value calculated in the ksize neighborhood. Larger value of the parameter means that more dissimilar pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
+
     :param borderType: Pixel extrapolation method.
 
-A main part of our strategy will be to load each raw pixel once, and reuse it to calculate all pixels in the output (filtered) image that need this pixel value.
-
-.. math::
-
-    \emph{O}_i = \frac{1}{W_i}\sum\limits_{j\in{N(i)}}{\frac{1}{1+\frac{(V_i-V_j)^2}{\sigma_{N{'}(i)}^2}}*\frac{1}{1+\frac{d(i,j)^2}{\sum^2}}}V_j
+A main part of our strategy will be to load each raw pixel once, and reuse it to calculate all pixels in the output (filtered) image that need this pixel value. The math of the filter is that of the usual bilateral filter, except that the sigma color is calculated in the neighborhood, and clamped by the optional input value.
 
 Local memory organization
 
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index 4070ed13d4..fa88251ee1 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -553,11 +553,12 @@ namespace cv
         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
 
         //! Applies an adaptive bilateral filter to the input image
-        //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
-        //  the function calculates a constant at each window based on local standard deviation,
-        //  and use this constant to do filtering.
+        //  Unlike the usual bilateral filter that uses fixed value for sigmaColor,
+        //  the adaptive version calculates the local variance in he ksize neighborhood
+        //  and use this as sigmaColor, for the value filtering. However, the local standard deviation is
+        //  clamped to the maxSigmaColor.
         //  supports 8UC1, 8UC3
-        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
+        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor=20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
 
         //! computes exponent of each matrix element (dst = e**src)
         // supports only CV_32FC1, CV_64FC1 type
diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp
index 59146c1093..8a78e5a838 100644
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -20,6 +20,7 @@
 //    Zero Lin, Zero.Lin@amd.com
 //    Zhang Ying, zhangying913@gmail.com
 //    Yao Wang, bitwangyaoyao@gmail.com
+//    Harris Gasparakis, harris.gasparakis@amd.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -1407,7 +1408,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Adaptive Bilateral Filter
 
-void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
+void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor, Point anchor, int borderType)
 {
     CV_Assert((ksize.width & 1) && (ksize.height & 1));  // ksize must be odd
     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);  // source must be 8bit RGB image
@@ -1418,10 +1419,24 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
     int idx = 0;
     int w = ksize.width / 2;
     int h = ksize.height / 2;
-    for(int y=-h; y<=h; y++)
-        for(int x=-w; x<=w; x++)
+
+    int ABF_GAUSSIAN_ocl = 1;
+
+    if(ABF_GAUSSIAN_ocl)
     {
-        lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            lut.at<float>(idx++) = expf( (float)(-0.5 * (x * x + y * y)/sigma2));
+        }
+    }
+    else
+    {
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            lut.at<float>(idx++) = (float) (sigma2 / (sigma2 + x * x + y * y));
+        }
     }
 
     oclMat dlut(lut);
@@ -1429,7 +1444,7 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
     int cn = src.oclchannels();
 
     normalizeAnchor(anchor, ksize);
-    const static String kernelName = "edgeEnhancingFilter";
+    const static String kernelName = "adaptiveBilateralFilter";
 
     dst.create(src.size(), src.type());
 
@@ -1478,9 +1493,10 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
 
     //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
     sprintf(build_options,
-        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
+        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d -D MAX_VAR_VAL=%f -D ABF_GAUSSIAN=%d"
         " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
-        static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
+        static_cast<int>(EXTRA), static_cast<float>(maxSigmaColor*maxSigmaColor), static_cast<int>(ABF_GAUSSIAN_ocl),
+        static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
 
     std::vector<pair<size_t , const void *> > args;
     args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
diff --git a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
index 8ed7fac3d0..81b29617ce 100644
--- a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
+++ b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
@@ -85,7 +85,7 @@
 #endif
 
 __kernel void
-edgeEnhancingFilter_C4_D0(
+adaptiveBilateralFilter_C4_D0(
     __global const uchar4 * restrict src,
     __global uchar4 *dst,
     float alpha,
@@ -173,14 +173,14 @@ edgeEnhancingFilter_C4_D0(
         //find variance of all data
         int startLMj;
         int endLMj ;
-#if CALCVAR
         // Top row: don't sum the very last element
         for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
         {
+#if CALCVAR
             startLMj = extraCnt;
             endLMj =  ksY+extraCnt-1;
-            sumVal =0;
-            sumValSqr=0;
+            sumVal = (int4)0;
+            sumValSqr= (int4)0;
             for(int j = startLMj; j < endLMj; j++)
                 for(int i=-anX; i<=anX; i++)
                 {
@@ -190,9 +190,10 @@ edgeEnhancingFilter_C4_D0(
                     sumValSqr += mul24(currVal, currVal);
                 }
 
-            var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+            var[extraCnt] = clamp( convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ), (float4)(0.1f, 0.1f, 0.1f, 0.1f), (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL)) ;
+
 #else
-        var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
+            var[extraCnt] = (float4)(MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL, MAX_VAR_VAL);
 #endif
         }
 
@@ -221,32 +222,48 @@ edgeEnhancingFilter_C4_D0(
 #else
                     weight = 1.0f;
 #endif
-#else
+#else // !FIXED_WEIGHT
                     currVal = convert_int4(data[j][col+anX+i]);
                     currWRTCenter = currVal-currValCenter;
 
+#if ABF_GAUSSIAN
+
+#if VAR_PER_CHANNEL
+                    weight = exp( (float4)(-0.5f, -0.5f, -0.5f, -0.5f) * convert_float4(currWRTCenter * currWRTCenter) / var[extraCnt] )*
+                        (float4)(lut[lut_j*lut_step+anX+i]);
+#else
+                    weight = exp( -0.5f * (mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
+                        mul24(currWRTCenter.z, currWRTCenter.z) ) / (var[extraCnt].x+var[extraCnt].y+var[extraCnt].z) ) * lut[lut_j*lut_step+anX+i];
+#endif
+
+#else // !ABF_GAUSSIAN
+
 #if VAR_PER_CHANNEL
                     weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) *
                         (float4)(lut[lut_j*lut_step+anX+i]);
 #else
-                    weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
-                        mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
-#endif
+                    weight = ((float)lut[lut_j*lut_step+anX+i]) /(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +
+                        mul24(currWRTCenter.z, currWRTCenter.z))/(var[extraCnt].x+var[extraCnt].y+var[extraCnt].z));
 #endif
+
+#endif //ABF_GAUSSIAN
+
+
+
+#endif  // FIXED_WEIGHT
+
                     tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
                     totalWeight += weight;
                 }
             }
 
-            tmp_sum[extraCnt] /= totalWeight;
-
             if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
-                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
+                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4_rtz( (tmp_sum[extraCnt] / (float4)totalWeight) + (float4)0.5f);
 
 #if VAR_PER_CHANNEL
             totalWeight = (float4)(0,0,0,0);
 #else
-            totalWeight = 0;
+            totalWeight = 0.0f;
 #endif
         }
     }
@@ -254,7 +271,7 @@ edgeEnhancingFilter_C4_D0(
 
 
 __kernel void
-edgeEnhancingFilter_C1_D0(
+adaptiveBilateralFilter_C1_D0(
     __global const uchar * restrict src,
     __global uchar *dst,
     float alpha,
@@ -343,10 +360,11 @@ edgeEnhancingFilter_C1_D0(
         //find variance of all data
         int startLMj;
         int endLMj;
-#if CALCVAR
+
         // Top row: don't sum the very last element
         for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
         {
+#if CALCVAR
             startLMj = extraCnt;
             endLMj =  ksY+extraCnt-1;
             sumVal = 0;
@@ -361,9 +379,9 @@ edgeEnhancingFilter_C1_D0(
                     sumValSqr += mul24(currVal, currVal);
                 }
             }
-            var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+            var[extraCnt] =  clamp((float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) , 0.1f, (float)(MAX_VAR_VAL) );
 #else
-        var[extraCnt] = (float)(900.0);
+            var[extraCnt] = (float)(MAX_VAR_VAL);
 #endif
         }
 
@@ -389,19 +407,20 @@ edgeEnhancingFilter_C1_D0(
                     currVal	= (int)(data[j][col+anX+i])	;
                     currWRTCenter = currVal-currValCenter;
 
+#if ABF_GAUSSIAN
+                    weight = exp( -0.5f * (float)mul24(currWRTCenter,currWRTCenter)/var[extraCnt]) * lut[lut_j*lut_step+anX+i] ;
+#else
                     weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
+#endif
 #endif
                     tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
                     totalWeight += weight;
                 }
             }
 
-            tmp_sum[extraCnt] /= totalWeight;
-
-
             if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
             {
-                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
+                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = convert_uchar_rtz(tmp_sum[extraCnt]/totalWeight+0.5f);
             }
 
             totalWeight = 0;
diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp
index d2edf6d219..04776bb704 100644
--- a/modules/ocl/test/test_filters.cpp
+++ b/modules/ocl/test/test_filters.cpp
@@ -338,8 +338,8 @@ OCL_TEST_P(AdaptiveBilateral, Mat)
     {
         random_roi();
 
-        adaptiveBilateralFilter(src_roi, dst_roi, kernelSize, 5, Point(-1, -1), borderType); // TODO anchor
-        ocl::adaptiveBilateralFilter(gsrc_roi, gdst_roi, kernelSize, 5, Point(-1, -1), borderType);
+        adaptiveBilateralFilter(src_roi, dst_roi, kernelSize, 5, 1, Point(-1, -1), borderType); // TODO anchor
+        ocl::adaptiveBilateralFilter(gsrc_roi, gdst_roi, kernelSize, 5, 1, Point(-1, -1), borderType);
 
         Near();
     }
diff --git a/samples/ocl/adaptive_bilateral_filter.cpp b/samples/ocl/adaptive_bilateral_filter.cpp
index b8ad3edfb0..42a2f772b0 100644
--- a/samples/ocl/adaptive_bilateral_filter.cpp
+++ b/samples/ocl/adaptive_bilateral_filter.cpp
@@ -12,7 +12,9 @@ int main( int argc, const char** argv )
 {
     const char* keys =
         "{ i | input   |          | specify input image }"
-        "{ k | ksize   |     5    | specify kernel size }"
+        "{ k | ksize   |     11   | specify kernel size }"
+        "{ s | sSpace  |     3    | specify sigma space }"
+        "{ c | sColor  |     30   | specify max color }"
         "{ h | help    | false    | print help message }";
 
     CommandLineParser cmd(argc, argv, keys);
@@ -26,27 +28,36 @@ int main( int argc, const char** argv )
 
     string src_path = cmd.get<string>("i");
     int ks = cmd.get<int>("k");
-    const char * winName[] = {"input", "adaptive bilateral CPU", "adaptive bilateral OpenCL", "bilateralFilter OpenCL"};
+    const char * winName[] = {"input", "ABF OpenCL", "BF OpenCL"};
 
-    Mat src = imread(src_path), abFilterCPU;
+    Mat src = imread(src_path);
     if (src.empty())
     {
         cout << "error read image: " << src_path << endl;
         return EXIT_FAILURE;
     }
 
+    double sigmaSpace = cmd.get<int>("s");
+
+    // sigma for checking pixel values. This is used as is in the "normal" bilateral filter,
+    // and it is used as an upper clamp on the adaptive case.
+    double sigmacolor = cmd.get<int>("c");
+
     ocl::oclMat dsrc(src), dABFilter, dBFilter;
-
     Size ksize(ks, ks);
-    adaptiveBilateralFilter(src,abFilterCPU, ksize, 10);
-    ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, 10);
-    ocl::bilateralFilter(dsrc, dBFilter, ks, 30, 9);
 
+    // ksize is the total width/height of neighborhood used to calculate local variance.
+    // sigmaSpace is not a priori related to ksize/2.
+    ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, sigmaSpace, sigmacolor);
+    ocl::bilateralFilter(dsrc, dBFilter, ks, sigmacolor, sigmaSpace);
     Mat abFilter = dABFilter, bFilter = dBFilter;
+
+    ocl::finish();
+
     imshow(winName[0], src);
-    imshow(winName[1], abFilterCPU);
-    imshow(winName[2], abFilter);
-    imshow(winName[3], bFilter);
+    imshow(winName[1], abFilter);
+    imshow(winName[2], bFilter);
+
     waitKey();
 
     return EXIT_SUCCESS;

From 56d943388d2bd1a273b16b4958ba2f6684fc33c1 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Tue, 5 Nov 2013 20:03:49 +0400
Subject: [PATCH 24/28] speeded up ocl::distanceToCenters

---
 modules/ocl/doc/ml_machine_learning.rst |  12 +--
 modules/ocl/include/opencv2/ocl/ocl.hpp |   2 +-
 modules/ocl/perf/perf_imgproc.cpp       |  41 +++++----
 modules/ocl/src/kmeans.cpp              |  94 ++++++++++-----------
 modules/ocl/src/opencl/kmeans_kernel.cl | 107 ++++++++++--------------
 modules/ocl/test/test_kmeans.cpp        |  67 +++++++--------
 6 files changed, 141 insertions(+), 182 deletions(-)

diff --git a/modules/ocl/doc/ml_machine_learning.rst b/modules/ocl/doc/ml_machine_learning.rst
index eb72cbeef4..ad0e30397f 100644
--- a/modules/ocl/doc/ml_machine_learning.rst
+++ b/modules/ocl/doc/ml_machine_learning.rst
@@ -91,11 +91,7 @@ ocl::distanceToCenters
 ----------------------
 For each samples in ``source``, find its closest neighour in ``centers``.
 
-.. ocv:function:: void ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers, int distType = NORM_L2SQR, const oclMat &indices = oclMat())
-
-    :param dists: The output distances calculated from each sample to the best matched center.
-
-    :param labels: The output index of best matched center for each row of sample.
+.. ocv:function:: void ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR)
 
     :param src: Floating-point matrix of input samples. One row per sample.
 
@@ -103,10 +99,8 @@ For each samples in ``source``, find its closest neighour in ``centers``.
 
     :param distType: Distance metric to calculate distances. Supports ``NORM_L1`` and ``NORM_L2SQR``.
 
-    :param indices: Optional source indices. If not empty:
+    :param dists: The output distances calculated from each sample to the best matched center.
 
-            * only the indexed source samples will be processed
-            * outputs, i.e., ``dists`` and ``labels``, have the same size of indices
-            * outputs are in the same order of indices instead of the order of src
+    :param labels: The output index of best matched center for each row of sample.
 
 The method is a utility function which maybe used for multiple clustering algorithms such as K-means.
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index 4070ed13d4..3b4219acba 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -878,7 +878,7 @@ namespace cv
         // supports NORM_L1 and NORM_L2 distType
         // if indices is provided, only the indexed rows will be calculated and their results are in the same
         // order of indices
-        CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers, int distType = NORM_L2SQR, const oclMat &indices = oclMat());
+        CV_EXPORTS void distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR);
 
         //!Does k-means procedure on GPU
         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp
index c57950ff10..8c12fb456f 100644
--- a/modules/ocl/perf/perf_imgproc.cpp
+++ b/modules/ocl/perf/perf_imgproc.cpp
@@ -872,58 +872,57 @@ PERF_TEST_P(columnSumFixture, columnSum, OCL_TYPICAL_MAT_SIZES)
 
 //////////////////////////////distanceToCenters////////////////////////////////////////////////
 
-CV_ENUM(DistType, NORM_L1, NORM_L2SQR);
+CV_ENUM(DistType, NORM_L1, NORM_L2SQR)
+
 typedef tuple<Size, DistType> distanceToCentersParameters;
 typedef TestBaseWithParam<distanceToCentersParameters> distanceToCentersFixture;
 
 static void distanceToCentersPerfTest(Mat& src, Mat& centers, Mat& dists, Mat& labels, int distType)
 {
     Mat batch_dists;
-    cv::batchDistance(src,centers,batch_dists, CV_32FC1, noArray(), distType);
+    cv::batchDistance(src, centers, batch_dists, CV_32FC1, noArray(), distType);
+
     std::vector<float> dists_v;
     std::vector<int> labels_v;
-    for(int i = 0; i<batch_dists.rows; i++)
+
+    for (int i = 0; i < batch_dists.rows; i++)
     {
         Mat r = batch_dists.row(i);
         double mVal;
         Point mLoc;
+
         minMaxLoc(r, &mVal, NULL, &mLoc, NULL);
-        dists_v.push_back((float)mVal);
+        dists_v.push_back(static_cast<float>(mVal));
         labels_v.push_back(mLoc.x);
     }
-    Mat temp_dists(dists_v);
-    Mat temp_labels(labels_v);
-    temp_dists.reshape(1,1).copyTo(dists);
-    temp_labels.reshape(1,1).copyTo(labels);
+
+    Mat(dists_v).copyTo(dists);
+    Mat(labels_v).copyTo(labels);
 }
 
 PERF_TEST_P(distanceToCentersFixture, distanceToCenters, ::testing::Combine(::testing::Values(cv::Size(256,256), cv::Size(512,512)), DistType::all()) )
 {
     Size size = get<0>(GetParam());
     int distType = get<1>(GetParam());
-    Mat src(size, CV_32FC1);
-    Mat centers(size, CV_32FC1);
-    Mat dists(cv::Size(src.rows,1), CV_32FC1);
-    Mat labels(cv::Size(src.rows,1), CV_32SC1);
+
+    Mat src(size, CV_32FC1), centers(size, CV_32FC1);
+    Mat dists(src.rows, 1, CV_32FC1), labels(src.rows, 1, CV_32SC1);
+
     declare.in(src, centers, WARMUP_RNG).out(dists, labels);
+
     if (RUN_OCL_IMPL)
     {
-        ocl::oclMat ocl_src(src);
-        ocl::oclMat ocl_centers(centers);
-        ocl::oclMat ocl_dists(dists);
-        ocl::oclMat ocl_labels(labels);
+        ocl::oclMat ocl_src(src), ocl_centers(centers);
 
-        OCL_TEST_CYCLE() ocl::distanceToCenters(ocl_dists,ocl_labels,ocl_src, ocl_centers, distType);
-
-        ocl_dists.download(dists);
-        ocl_labels.download(labels);
+        OCL_TEST_CYCLE() ocl::distanceToCenters(ocl_src, ocl_centers, dists, labels, distType);
 
         SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE);
         SANITY_CHECK(labels);
     }
     else if (RUN_PLAIN_IMPL)
     {
-        TEST_CYCLE() distanceToCentersPerfTest(src,centers,dists,labels,distType);
+        TEST_CYCLE() distanceToCentersPerfTest(src, centers, dists, labels, distType);
+
         SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE);
         SANITY_CHECK(labels);
     }
diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp
index 58a68a750d..31fb2503d7 100644
--- a/modules/ocl/src/kmeans.cpp
+++ b/modules/ocl/src/kmeans.cpp
@@ -160,63 +160,66 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers,
     }
 }
 
-void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers, int distType, const oclMat &indices)
+void cv::ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType)
 {
-    CV_Assert(src.cols*src.oclchannels() == centers.cols*centers.oclchannels());
+    CV_Assert(src.cols * src.channels() == centers.cols * centers.channels());
     CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
-    bool is_label_row_major = false;
-    ensureSizeIsEnough(1, src.rows, CV_32FC1, dists);
-    if(labels.empty() || (!labels.empty() && labels.rows == src.rows && labels.cols == 1))
-    {
-        ensureSizeIsEnough(src.rows, 1, CV_32SC1, labels);
-        is_label_row_major = true;
-    }
     CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);
 
+    dists.create(src.rows, 1, CV_32FC1);
+    labels.create(src.rows, 1, CV_32SC1);
+
     std::stringstream build_opt_ss;
-    build_opt_ss
-        << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST")
-        << (indices.empty() ? "" : " -D USE_INDEX");
+    build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST");
 
-    String build_opt = build_opt_ss.str();
+    int src_step = src.step / src.elemSize1();
+    int centers_step = centers.step / centers.elemSize1();
+    int feature_width = centers.cols * centers.oclchannels();
+    int src_offset = src.offset / src.elemSize1();
+    int centers_offset = centers.offset / centers.elemSize1();
 
-    const int src_step = (int)(src.oclchannels() * src.step / src.elemSize());
-    const int centers_step = (int)(centers.oclchannels() * centers.step / centers.elemSize());
-
-    const int colsNumb = centers.cols*centers.oclchannels();
-
-    const int label_step   = is_label_row_major ? (int)(labels.step / labels.elemSize()) : 1;
-    String kernelname = "distanceToCenters";
-
-    const int number_of_input = indices.empty() ? src.rows : indices.size().area();
-
-    const int src_offset = (int)src.offset/src.elemSize();
-    const int centers_offset = (int)centers.offset/centers.elemSize();
-
-    size_t globalThreads[3] = {number_of_input, 1, 1};
+    int all_dist_count = src.rows * centers.rows;
+    oclMat all_dist(1, all_dist_count, CV_32FC1);
 
     vector<pair<size_t, const void *> > args;
     args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
     args.push_back(make_pair(sizeof(cl_mem), (void *)&centers.data));
-    if(!indices.empty())
-    {
-        args.push_back(make_pair(sizeof(cl_mem), (void *)&indices.data));
-    }
-    args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data));
-    args.push_back(make_pair(sizeof(cl_mem), (void *)&dists.data));
-    args.push_back(make_pair(sizeof(cl_int), (void *)&colsNumb));
+    args.push_back(make_pair(sizeof(cl_mem), (void *)&all_dist.data));
+
+    args.push_back(make_pair(sizeof(cl_int), (void *)&feature_width));
     args.push_back(make_pair(sizeof(cl_int), (void *)&src_step));
     args.push_back(make_pair(sizeof(cl_int), (void *)&centers_step));
-    args.push_back(make_pair(sizeof(cl_int), (void *)&label_step));
-    args.push_back(make_pair(sizeof(cl_int), (void *)&number_of_input));
+    args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
     args.push_back(make_pair(sizeof(cl_int), (void *)&centers.rows));
+
     args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset));
     args.push_back(make_pair(sizeof(cl_int), (void *)&centers_offset));
 
+    size_t globalThreads[3] = { all_dist_count, 1, 1 };
+
     openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
-        kernelname, globalThreads, NULL, args, -1, -1, build_opt.c_str());
+                        "distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str());
+
+    Mat all_dist_cpu;
+    all_dist.download(all_dist_cpu);
+
+    for (int i = 0; i < src.rows; ++i)
+    {
+        Point p;
+        double minVal;
+
+        Rect roi(i * centers.rows, 0, centers.rows, 1);
+        Mat hdr(all_dist_cpu, roi);
+
+        cv::minMaxLoc(hdr, &minVal, NULL, &p);
+
+        dists.at<float>(i, 0) = static_cast<float>(minVal);
+        labels.at<int>(i, 0) = p.x;
+    }
 }
+
 ///////////////////////////////////k - means /////////////////////////////////////////////////////////
+
 double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
                        TermCriteria criteria, int attempts, int flags, oclMat &_centers)
 {
@@ -429,28 +432,19 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
                 break;
 
             // assign labels
-            oclMat _dists(1, N, CV_64F);
-
-            _bestLabels.upload(_labels);
+            Mat dists(1, N, CV_64F);
             _centers.upload(centers);
+            distanceToCenters(_src, _centers, dists, _labels);
+            _bestLabels.upload(_labels);
 
-            distanceToCenters(_dists, _bestLabels, _src, _centers);
-
-            Mat dists;
-            _dists.download(dists);
-            _bestLabels.download(_labels);
             float* dist = dists.ptr<float>(0);
             compactness = 0;
             for( i = 0; i < N; i++ )
-            {
-                    compactness += (double)dist[i];
-            }
+                compactness += (double)dist[i];
         }
 
         if( compactness < best_compactness )
-        {
             best_compactness = compactness;
-        }
     }
 
     return best_compactness;
diff --git a/modules/ocl/src/opencl/kmeans_kernel.cl b/modules/ocl/src/opencl/kmeans_kernel.cl
index f62a08f636..244d52ca3f 100644
--- a/modules/ocl/src/opencl/kmeans_kernel.cl
+++ b/modules/ocl/src/opencl/kmeans_kernel.cl
@@ -44,81 +44,64 @@
 //
 //M*/
 
-#ifdef L1_DIST
-#  define DISTANCE(A, B) fabs((A) - (B))
-#elif defined L2SQR_DIST
-#  define DISTANCE(A, B) ((A) - (B)) * ((A) - (B))
-#else
-#  define DISTANCE(A, B) ((A) - (B)) * ((A) - (B))
-#endif
-
-inline float dist(__global const float * center, __global const float * src, int feature_cols)
+static float distance_(__global const float * center, __global const float * src, int feature_length)
 {
     float res = 0;
-    float4 tmp4;
-    int i;
-    for(i = 0; i < feature_cols / 4; i += 4, center += 4, src += 4)
-    {
-        tmp4 = vload4(0, center) - vload4(0, src);
+    float4 v0, v1, v2;
+    int i = 0;
+
 #ifdef L1_DIST
-        tmp4 = fabs(tmp4);
-#else
-        tmp4 *= tmp4;
+    float4 sum = (float4)(0.0f);
+#endif
+
+    for ( ; i <= feature_length - 4; i += 4)
+    {
+        v0 = vload4(0, center + i);
+        v1 = vload4(0, src + i);
+        v2 = v1 - v0;
+#ifdef L1_DIST
+        v0 = fabs(v2);
+        sum += v0;
+#else
+        res += dot(v2, v2);
 #endif
-        res += tmp4.x + tmp4.y + tmp4.z + tmp4.w;
     }
 
-    for(; i < feature_cols; ++i, ++center, ++src)
+#ifdef L1_DIST
+    res = sum.x + sum.y + sum.z + sum.w;
+#endif
+
+    for ( ; i < feature_length; ++i)
     {
-        res += DISTANCE(*src, *center);
+        float t0 = src[i];
+        float t1 = center[i];
+#ifdef L1_DIST
+        res += fabs(t0 - t1);
+#else
+        float t2 = t0 - t1;
+        res += t2 * t2;
+#endif
     }
+
     return res;
 }
 
-// to be distinguished with distanceToCenters in kmeans_kernel.cl
-__kernel void distanceToCenters(
-    __global const float *src,
-    __global const float *centers,
-#ifdef USE_INDEX
-    __global const int   *indices,
-#endif
-    __global int   *labels,
-    __global float *dists,
-    int feature_cols,
-    int src_step,
-    int centers_step,
-    int label_step,
-    int input_size,
-    int K,
-    int offset_src,
-    int offset_centers
-)
+__kernel void distanceToCenters(__global const float * src, __global const float * centers,
+                                __global float * dists, int feature_length,
+                                int src_step, int centers_step,
+                                int features_count, int centers_count,
+                                int src_offset, int centers_offset)
 {
     int gid = get_global_id(0);
-    float euDist, minval;
-    int minCentroid;
-    if(gid >= input_size)
+
+    if (gid < (features_count * centers_count))
     {
-        return;
+        int feature_index = gid / centers_count;
+        int center_index = gid % centers_count;
+
+        int center_idx = mad24(center_index, centers_step, centers_offset);
+        int src_idx = mad24(feature_index, src_step, src_offset);
+
+        dists[gid] = distance_(centers + center_idx, src + src_idx, feature_length);
     }
-    src += offset_src;
-    centers += offset_centers;
-#ifdef USE_INDEX
-    src += indices[gid] * src_step;
-#else
-    src += gid * src_step;
-#endif
-    minval = dist(centers, src, feature_cols);
-    minCentroid = 0;
-    for(int i = 1 ; i < K; i++)
-    {
-        euDist = dist(centers + i * centers_step, src, feature_cols);
-        if(euDist < minval)
-        {
-            minval = euDist;
-            minCentroid = i;
-        }
-    }
-    labels[gid * label_step] = minCentroid;
-    dists[gid] = minval;
 }
diff --git a/modules/ocl/test/test_kmeans.cpp b/modules/ocl/test/test_kmeans.cpp
index 6539c51c40..e1d0a17494 100644
--- a/modules/ocl/test/test_kmeans.cpp
+++ b/modules/ocl/test/test_kmeans.cpp
@@ -61,7 +61,7 @@ PARAM_TEST_CASE(Kmeans, int, int, int)
     int type;
     int K;
     int flags;
-    cv::Mat src ;
+    Mat src ;
     ocl::oclMat d_src, d_dists;
 
     Mat labels, centers;
@@ -73,7 +73,7 @@ PARAM_TEST_CASE(Kmeans, int, int, int)
         flags = GET_PARAM(2);
 
         // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
-        cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+        Size size = Size(MWIDTH, MHEIGHT);
         src.create(size, type);
         int row_idx = 0;
         const int max_neighbour = MHEIGHT / K - 1;
@@ -159,15 +159,15 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine(
 
 /////////////////////////////// DistanceToCenters //////////////////////////////////////////
 
-CV_ENUM(DistType, NORM_L1, NORM_L2SQR);
+CV_ENUM(DistType, NORM_L1, NORM_L2SQR)
 
 PARAM_TEST_CASE(distanceToCenters, DistType, bool)
 {
-    cv::Size size;
     int distType;
     bool useRoi;
-    cv::Mat src, centers, src_roi, centers_roi;
-    cv::ocl::oclMat ocl_src, ocl_centers, ocl_src_roi, ocl_centers_roi;
+
+    Mat src, centers, src_roi, centers_roi;
+    ocl::oclMat ocl_src, ocl_centers, ocl_src_roi, ocl_centers_roi;
 
     virtual void SetUp()
     {
@@ -177,70 +177,59 @@ PARAM_TEST_CASE(distanceToCenters, DistType, bool)
 
     void random_roi()
     {
-        Size roiSize_src = randomSize(10,1000);
-        Size roiSize_centers = randomSize(10, 1000);
-        roiSize_src.width = roiSize_centers.width;
+        Size roiSizeSrc = randomSize(1, MAX_VALUE);
+        Size roiSizeCenters = randomSize(1, MAX_VALUE);
+        roiSizeSrc.width = roiSizeCenters.width;
 
-        Border srcBorder = randomBorder(0, useRoi ? 500 : 0);
-        randomSubMat(src, src_roi, roiSize_src, srcBorder, CV_32FC1, -SHRT_MAX, SHRT_MAX);
+        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+        randomSubMat(src, src_roi, roiSizeSrc, srcBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
 
         Border centersBorder = randomBorder(0, useRoi ? 500 : 0);
-        randomSubMat(centers, centers_roi, roiSize_centers, centersBorder, CV_32FC1, -SHRT_MAX, SHRT_MAX);
+        randomSubMat(centers, centers_roi, roiSizeCenters, centersBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
 
-        for(int i = 0; i<centers.rows; i++)
-            centers.at<float>(i, randomInt(0,centers.cols-1)) = (float)randomDouble(SHRT_MAX, INT_MAX);
-
-        generateOclMat(ocl_src, ocl_src_roi, src, roiSize_src, srcBorder);
-        generateOclMat(ocl_centers, ocl_centers_roi, centers, roiSize_centers, centersBorder);
+        for (int i = 0; i < centers.rows; i++)
+            centers.at<float>(i, randomInt(0, centers.cols)) = (float)randomDouble(SHRT_MAX, INT_MAX);
 
+        generateOclMat(ocl_src, ocl_src_roi, src, roiSizeSrc, srcBorder);
+        generateOclMat(ocl_centers, ocl_centers_roi, centers, roiSizeCenters, centersBorder);
     }
-
 };
 
 OCL_TEST_P(distanceToCenters, Accuracy)
 {
-    for(int j = 0; j< LOOP_TIMES; j++)
+    for (int j = 0; j < LOOP_TIMES; j++)
     {
         random_roi();
 
-        cv::ocl::oclMat ocl_dists;
-        cv::ocl::oclMat ocl_labels;
-
-        cv::ocl::distanceToCenters(ocl_dists,ocl_labels,ocl_src_roi, ocl_centers_roi, distType);
-
         Mat labels, dists;
-        ocl_labels.download(labels);
-        ocl_dists.download(dists);
+        ocl::distanceToCenters(ocl_src_roi, ocl_centers_roi, dists, labels, distType);
 
-        ASSERT_EQ(ocl_dists.cols, ocl_labels.rows);
+        EXPECT_EQ(dists.size(), labels.size());
 
         Mat batch_dists;
-
         cv::batchDistance(src_roi, centers_roi, batch_dists, CV_32FC1, noArray(), distType);
 
-        std::vector<double> gold_dists_v;
+        std::vector<float> gold_dists_v;
+        gold_dists_v.reserve(batch_dists.rows);
 
-        for(int i = 0; i<batch_dists.rows; i++)
+        for (int i = 0; i < batch_dists.rows; i++)
         {
             Mat r = batch_dists.row(i);
             double mVal;
             Point mLoc;
             minMaxLoc(r, &mVal, NULL, &mLoc, NULL);
 
-            int ocl_label = *(int*)labels.row(i).col(0).data;
-            ASSERT_EQ(mLoc.x, ocl_label);
+            int ocl_label = labels.at<int>(i, 0);
+            EXPECT_EQ(mLoc.x, ocl_label);
 
-            gold_dists_v.push_back(mVal);
+            gold_dists_v.push_back(static_cast<float>(mVal));
         }
-        Mat gold_dists(gold_dists_v);
-        dists.convertTo(dists, CV_64FC1);
-        double relative_error = cv::norm(gold_dists.t(), dists, NORM_INF|NORM_RELATIVE);
+
+        double relative_error = cv::norm(Mat(gold_dists_v), dists, NORM_INF | NORM_RELATIVE);
         ASSERT_LE(relative_error, 1e-5);
     }
 }
 
-
-INSTANTIATE_TEST_CASE_P (OCL_ML, distanceToCenters, Combine(DistType::all(), Bool()) );
-
+INSTANTIATE_TEST_CASE_P (OCL_ML, distanceToCenters, Combine(DistType::all(), Bool()));
 
 #endif

From fa15769f39154ffdfbd5d5d8b82d376b37c9f5c2 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 4 Nov 2013 20:50:33 +0400
Subject: [PATCH 25/28] added CV_16SC2 && CV_16UC1 maps support to ocl::remap
 (nearest neighbour only)

---
 modules/ocl/src/imgproc.cpp                 | 20 ++++++----
 modules/ocl/src/opencl/imgproc_remap.cl     | 41 ++++++++++++++++++++-
 modules/ocl/src/opencl/imgproc_threshold.cl |  4 +-
 modules/ocl/test/test_warp.cpp              |  1 +
 4 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index 3539dfaf1a..193cb43a62 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -195,9 +195,14 @@ namespace cv
                 return;
             }
 
+            if (map1.empty())
+                map1.swap(map2);
+
             CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
-                      || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
-            CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) ||
+                      /*|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
+            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (interpolation == INTER_NEAREST &&
+                                                                    (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) )) ||
+                      (map1.type() == CV_32FC2 && !map2.data) ||
                       (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
             CV_Assert(!map2.data || map2.size() == map1.size());
             CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP
@@ -212,10 +217,14 @@ namespace cv
                                    "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
 
             string kernelName = "remap";
-            if ( map1.type() == CV_32FC2 && !map2.data )
+            if (map1.type() == CV_32FC2 && map2.empty())
                 kernelName += "_32FC2";
-            else if (map1.type() == CV_16SC2 && !map2.data)
+            else if (map1.type() == CV_16SC2)
+            {
                 kernelName += "_16SC2";
+                if (!map2.empty())
+                    kernelName += "_16UC1";
+            }
             else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
                 kernelName += "_2_32FC1";
             else
@@ -232,9 +241,6 @@ namespace cv
             if (interpolation != INTER_NEAREST)
             {
                 int wdepth = std::max(CV_32F, dst.depth());
-                if (!supportsDouble)
-                    wdepth = std::min(CV_32F, wdepth);
-
                 buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
                                        " -D convertToWT2=convert_%s2 -D WT2=%s2",
                                        typeMap[wdepth], channelMap[ocn],
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl
index 53c053947f..b623091ed8 100644
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -51,6 +51,13 @@
 #endif
 #endif
 
+enum
+{
+    INTER_BITS = 5,
+    INTER_TAB_SIZE = 1 << INTER_BITS,
+    INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE
+};
+
 #ifdef INTER_NEAREST
 #define convertToWT
 #endif
@@ -204,6 +211,36 @@ __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __g
     }
 }
 
+__kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * dst, __global short2 * map1, __global ushort * map2,
+        int src_offset, int dst_offset, int map1_offset, int map2_offset,
+        int src_step, int dst_step, int map1_step, int map2_step,
+        int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < dst_cols && y < dst_rows)
+    {
+        int dstIdx = mad24(y, dst_step, x + dst_offset);
+        int map1Idx = mad24(y, map1_step, x + map1_offset);
+        int map2Idx = mad24(y, map2_step, x + map2_offset);
+
+        int map2Value = convert_int(map2[map2Idx]) & (INTER_TAB_SIZE2 - 1);
+        int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
+        int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
+        int2 gxy = convert_int2(map1[map1Idx]) + (int2)(dx, dy);
+        int gx = gxy.x, gy = gxy.y;
+
+        if (NEED_EXTRAPOLATION(gx, gy))
+            EXTRAPOLATE(gxy, dst[dstIdx])
+        else
+        {
+            int srcIdx = mad24(gy, src_step, gx + src_offset);
+            dst[dstIdx] = src[srcIdx];
+        }
+    }
+}
+
 #elif INTER_LINEAR
 
 __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
@@ -229,7 +266,7 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
 
         float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
+        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
         WT scalar = convertToWT(nVal);
         WT a = scalar, b = scalar, c = scalar, d = scalar;
 
@@ -282,7 +319,7 @@ __kernel void remap_32FC2(__global T const * restrict  src, __global T * dst,
         int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
 
         float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
+        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
         WT scalar = convertToWT(nVal);
         WT a = scalar, b = scalar, c = scalar, d = scalar;
 
diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl
index 400ac806cf..6f97c04519 100644
--- a/modules/ocl/src/opencl/imgproc_threshold.cl
+++ b/modules/ocl/src/opencl/imgproc_threshold.cl
@@ -93,8 +93,8 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
 #endif
         else
         {
-            T array[VECSIZE];
-            VSTOREN(vecValue, 0, array);
+            __attribute__(( aligned(sizeof(VT)) )) T array[VECSIZE];
+            *((VT*)array) = vecValue;
             #pragma unroll
             for (int i = 0; i < VECSIZE; ++i)
                 if (gx + i < max_index)
diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index 05554ce3fa..b9231d1166 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -355,6 +355,7 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
                             Values(1, 2, 3, 4),
                             Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
                                    pair<MatType, MatType>((MatType)CV_32FC2, noType),
+                                   pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
                                    pair<MatType, MatType>((MatType)CV_16SC2, noType)),
                             Values((Border)BORDER_CONSTANT,
                                    (Border)BORDER_REPLICATE,

From e9edfd1c54335947297ed03a6eb21a72ff827986 Mon Sep 17 00:00:00 2001
From: Konstantin Matskevich <konstantin.matskevich@itseez.com>
Date: Wed, 6 Nov 2013 16:03:58 +0400
Subject: [PATCH 26/28] update

---
 .../opencv2/ocl/private/opencl_dumpinfo.hpp   | 88 ++++++++-----------
 modules/ocl/perf/main.cpp                     | 23 ++---
 modules/ocl/test/main.cpp                     | 12 +--
 3 files changed, 53 insertions(+), 70 deletions(-)

diff --git a/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp b/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
index 942fdf4539..a1b5300cf6 100644
--- a/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp
@@ -39,24 +39,16 @@
 //
 //M*/
 
-#if !defined(DUMP_INFO_STDOUT) && !defined(DUMP_INFO_XML) && !defined(DUMP_DEVICES_INFO_STDOUT) && !defined(DUMP_DEVICES_INFO_XML)
+#if !defined(DUMP_MESSAGE_STDOUT) && !defined(DUMP_PROPERTY_XML)
 #error Invalid usage
 #endif
 
-#if !defined(DUMP_INFO_STDOUT)
-#define DUMP_INFO_STDOUT(...)
+#if !defined(DUMP_PROPERTY_XML)
+#define DUMP_PROPERTY_XML(...)
 #endif
 
-#if !defined(DUMP_INFO_XML)
-#define DUMP_INFO_XML(...)
-#endif
-
-#if !defined(DUMP_DEVICES_INFO_STDOUT)
-#define DUMP_DEVICES_INFO_STDOUT(...)
-#endif
-
-#if !defined(DUMP_DEVICES_INFO_XML)
-#define DUMP_DEVICES_INFO_XML(...)
+#if !defined(DUMP_MESSAGE_STDOUT)
+#define DUMP_MESSAGE_STDOUT(...)
 #endif
 
 #include <sstream>
@@ -95,70 +87,68 @@ static void dumpOpenCLDevice()
     {
         cv::ocl::PlatformsInfo platforms;
         cv::ocl::getOpenCLPlatforms(platforms);
-        DUMP_INFO_STDOUT("OpenCL Platforms","");
-        DUMP_INFO_XML("OpenCL Platforms","");
+        DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
         const char* deviceTypeStr;
         for(unsigned int i=0; i < platforms.size(); i++)
         {
-            DUMP_INFO_STDOUT("    ", platforms.at(i)->platformName);
-            DUMP_INFO_XML("", platforms.at(i)->platformName);
-            cv::ocl::DevicesInfo devices;
-            cv::ocl::getOpenCLDevices(devices);
+            DUMP_MESSAGE_STDOUT("    " << platforms.at(i)->platformName);
+            const cv::ocl::DevicesInfo& devices = platforms.at(i)->devices;
             for(unsigned int j=0; j < devices.size(); j++)
             {
-                deviceTypeStr = devices.at(j)->deviceType == CVCL_DEVICE_TYPE_CPU
-                            ? ("CPU") : (devices.at(j)->deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
-                DUMP_DEVICES_INFO_STDOUT(deviceTypeStr, j, devices.at(j)->deviceName, devices.at(j)->deviceVersion);
-                DUMP_DEVICES_INFO_XML(deviceTypeStr, j, devices.at(j)->deviceName, devices.at(j)->deviceVersion);
+                const cv::ocl::DeviceInfo& current_device = *devices.at(j);
+                deviceTypeStr = current_device.deviceType == CVCL_DEVICE_TYPE_CPU
+                            ? ("CPU") : (current_device.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
+                DUMP_MESSAGE_STDOUT( "        " << deviceTypeStr << " : " << current_device.deviceName << " : " << current_device.deviceVersion );
+                DUMP_PROPERTY_XML("cv_ocl_platform_"<< i<<"_device_"<<j, "(Platform=" << current_device.platform->platformName << ")(Type="
+                    << deviceTypeStr <<")(Name="<< current_device.deviceName << ")(Version="<< current_device.deviceVersion<<")");
             }
         }
-        DUMP_INFO_STDOUT("Current OpenCL device","");
-        DUMP_INFO_XML("Current OpenCL device","");
+        DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
 
         const cv::ocl::DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo();
 
-        DUMP_INFO_STDOUT("    Platform", deviceInfo.platform->platformName);
-        DUMP_INFO_XML("cv_ocl_platformName", deviceInfo.platform->platformName);
+        DUMP_MESSAGE_STDOUT("    Platform = "<< deviceInfo.platform->platformName);
+        DUMP_PROPERTY_XML("cv_ocl_current_platformName", deviceInfo.platform->platformName);
 
         deviceTypeStr = deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU
                         ? "CPU" : (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
-        DUMP_INFO_STDOUT("    Type", deviceTypeStr);
-        DUMP_INFO_XML("cv_ocl_deviceType", deviceTypeStr);
+        DUMP_MESSAGE_STDOUT("    Type = "<< deviceTypeStr);
+        DUMP_PROPERTY_XML("cv_ocl_current_deviceType", deviceTypeStr);
 
-        DUMP_INFO_STDOUT("    Name", deviceInfo.deviceName);
-        DUMP_INFO_XML("cv_ocl_deviceName", deviceInfo.deviceName);
+        DUMP_MESSAGE_STDOUT("    Name = "<< deviceInfo.deviceName);
+        DUMP_PROPERTY_XML("cv_ocl_current_deviceName", deviceInfo.deviceName);
 
-        DUMP_INFO_STDOUT("    Version", deviceInfo.deviceVersion);
-        DUMP_INFO_XML("cv_ocl_deviceVersion", deviceInfo.deviceVersion);
+        DUMP_MESSAGE_STDOUT("    Version = " << deviceInfo.deviceVersion);
+        DUMP_PROPERTY_XML("cv_ocl_current_deviceVersion", deviceInfo.deviceVersion);
 
-        DUMP_INFO_STDOUT("    Compute units", deviceInfo.maxComputeUnits);
-        DUMP_INFO_XML("cv_ocl_maxComputeUnits", deviceInfo.maxComputeUnits);
+        DUMP_MESSAGE_STDOUT("    Compute units = "<< deviceInfo.maxComputeUnits);
+        DUMP_PROPERTY_XML("cv_ocl_current_maxComputeUnits", deviceInfo.maxComputeUnits);
 
-        DUMP_INFO_STDOUT("    Max work group size", deviceInfo.maxWorkGroupSize);
-        DUMP_INFO_XML("cv_ocl_maxWorkGroupSize", deviceInfo.maxWorkGroupSize);
+        DUMP_MESSAGE_STDOUT("    Max work group size = "<< deviceInfo.maxWorkGroupSize);
+        DUMP_PROPERTY_XML("cv_ocl_current_maxWorkGroupSize", deviceInfo.maxWorkGroupSize);
 
         std::string localMemorySizeStr = bytesToStringRepr(deviceInfo.localMemorySize);
-        DUMP_INFO_STDOUT("    Local memory size", localMemorySizeStr.c_str());
-        DUMP_INFO_XML("cv_ocl_localMemorySize", deviceInfo.localMemorySize);
+        DUMP_MESSAGE_STDOUT("    Local memory size = "<< localMemorySizeStr.c_str());
+        DUMP_PROPERTY_XML("cv_ocl_current_localMemorySize", deviceInfo.localMemorySize);
 
         std::string maxMemAllocSizeStr = bytesToStringRepr(deviceInfo.maxMemAllocSize);
-        DUMP_INFO_STDOUT("    Max memory allocation size", maxMemAllocSizeStr.c_str());
-        DUMP_INFO_XML("cv_ocl_maxMemAllocSize", deviceInfo.maxMemAllocSize);
+        DUMP_MESSAGE_STDOUT("    Max memory allocation size = "<< maxMemAllocSizeStr.c_str());
+        DUMP_PROPERTY_XML("cv_ocl_current_maxMemAllocSize", deviceInfo.maxMemAllocSize);
 
         const char* doubleSupportStr = deviceInfo.haveDoubleSupport ? "Yes" : "No";
-        DUMP_INFO_STDOUT("    Double support", doubleSupportStr);
-        DUMP_INFO_XML("cv_ocl_haveDoubleSupport", deviceInfo.haveDoubleSupport);
+        DUMP_MESSAGE_STDOUT("    Double support = "<< doubleSupportStr);
+        DUMP_PROPERTY_XML("cv_ocl_current_haveDoubleSupport", deviceInfo.haveDoubleSupport);
 
         const char* isUnifiedMemoryStr = deviceInfo.isUnifiedMemory ? "Yes" : "No";
-        DUMP_INFO_STDOUT("    Unified memory", isUnifiedMemoryStr);
-        DUMP_INFO_XML("cv_ocl_isUnifiedMemory", deviceInfo.isUnifiedMemory);
+        DUMP_MESSAGE_STDOUT("    Unified memory = "<< isUnifiedMemoryStr);
+        DUMP_PROPERTY_XML("cv_ocl_current_isUnifiedMemory", deviceInfo.isUnifiedMemory);
     }
     catch (...)
     {
-        DUMP_INFO_STDOUT("OpenCL device", "not available");
-        DUMP_INFO_XML("cv_ocl", "not available");
+        DUMP_MESSAGE_STDOUT("OpenCL device not available");
+        DUMP_PROPERTY_XML("cv_ocl", "not available");
     }
 }
 
-#undef DUMP_INFO_STDOUT
-#undef DUMP_INFO_XML
+#undef DUMP_MESSAGE_STDOUT
+#undef DUMP_PROPERTY_XML
diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp
index a5e2386835..5a67d1cbfd 100644
--- a/modules/ocl/perf/main.cpp
+++ b/modules/ocl/perf/main.cpp
@@ -42,28 +42,19 @@
 
 #include "perf_precomp.hpp"
 
-#define DUMP_INFO_STDOUT(propertyDisplayName, propertyValue) \
+#define DUMP_PROPERTY_XML(propertyName, propertyValue) \
     do { \
-        std::cout << (propertyDisplayName) << ": " << (propertyValue) << std::endl; \
+        std::stringstream ssName, ssValue;\
+        ssName << propertyName;\
+        ssValue << propertyValue; \
+        ::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \
     } while (false)
 
-#define DUMP_INFO_XML(propertyXMLName, propertyValue) \
+#define DUMP_MESSAGE_STDOUT(msg) \
     do { \
-        std::stringstream ss; ss << propertyValue; \
-        ::testing::Test::RecordProperty((propertyXMLName), ss.str()); \
+        std::cout << msg << std::endl; \
     } while (false)
 
-#define DUMP_DEVICES_INFO_STDOUT(deviceType, deviceIndex, deviceName, deviceVersion) \
-    do { \
-        std::cout << "        " << (deviceType) << " " << (deviceIndex) << " : " << (deviceName) << " : " << deviceVersion << std::endl; \
-    } while (false)
-
-#define DUMP_DEVICES_INFO_XML(deviceType, deviceIndex, deviceName, deviceVersion) \
-    do { \
-        std::stringstream ss; \
-        ss << ":" << deviceIndex << ":" << deviceName << ":" << deviceVersion; \
-        ::testing::Test::RecordProperty((deviceType), ss.str()); \
-    } while (false)
 
 #include "opencv2/ocl/private/opencl_dumpinfo.hpp"
 
diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp
index d76fa8483c..992aeb8bb1 100644
--- a/modules/ocl/test/main.cpp
+++ b/modules/ocl/test/main.cpp
@@ -42,15 +42,17 @@
 
 #include "test_precomp.hpp"
 
-#define DUMP_INFO_STDOUT(propertyDisplayName, propertyValue) \
+#define DUMP_PROPERTY_XML(propertyName, propertyValue) \
     do { \
-        std::cout << (propertyDisplayName) << ": " << (propertyValue) << std::endl; \
+        std::stringstream ssName, ssValue;\
+        ssName << propertyName;\
+        ssValue << propertyValue; \
+        ::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \
     } while (false)
 
-#define DUMP_INFO_XML(propertyXMLName, propertyValue) \
+#define DUMP_MESSAGE_STDOUT(msg) \
     do { \
-        std::stringstream ss; ss << propertyValue; \
-        ::testing::Test::RecordProperty((propertyXMLName), ss.str()); \
+        std::cout << msg << std::endl; \
     } while (false)
 
 #include "opencv2/ocl/private/opencl_dumpinfo.hpp"

From 2c38be079e7a2adf8f322384a24b0d36097bf83c Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Thu, 7 Nov 2013 17:13:30 +0400
Subject: [PATCH 27/28] Enabled CV_Assert and such to print the function name
 with Visual C++.

Also, I made a separate macro for the current function name, which
helps simplify a lot of code that uses it.
---
 .../contrib/src/detection_based_tracker.cpp   | 23 +++---------------
 modules/core/include/opencv2/core/core.hpp    | 19 ++++++++-------
 .../core/include/opencv2/core/internal.hpp    |  6 +----
 modules/core/src/gpumat.cpp                   |  9 ++-----
 modules/core/src/opengl_interop.cpp           |  6 +----
 modules/core/src/persistence.cpp              |  7 +-----
 modules/gpu/src/cuda/safe_call.hpp            | 24 ++++---------------
 modules/ocl/src/safe_call.hpp                 |  9 ++-----
 8 files changed, 25 insertions(+), 78 deletions(-)

diff --git a/modules/contrib/src/detection_based_tracker.cpp b/modules/contrib/src/detection_based_tracker.cpp
index 0b3fcce65b..381bde0ccc 100644
--- a/modules/contrib/src/detection_based_tracker.cpp
+++ b/modules/contrib/src/detection_based_tracker.cpp
@@ -169,7 +169,6 @@ bool DetectionBasedTracker::SeparateDetectionWork::run()
     return true;
 }
 
-#ifdef __GNUC__
 #define CATCH_ALL_AND_LOG(_block)                                                           \
 do {                                                                                        \
     try {                                                                                   \
@@ -177,29 +176,13 @@ do {
         break;                                                                              \
     }                                                                                       \
     catch(cv::Exception& e) {                                                               \
-        LOGE0("\n %s: ERROR: OpenCV Exception caught: \n'%s'\n\n", __func__, e.what());     \
+        LOGE0("\n %s: ERROR: OpenCV Exception caught: \n'%s'\n\n", CV_Func, e.what());     \
     } catch(std::exception& e) {                                                            \
-        LOGE0("\n %s: ERROR: Exception caught: \n'%s'\n\n", __func__, e.what());            \
+        LOGE0("\n %s: ERROR: Exception caught: \n'%s'\n\n", CV_Func, e.what());            \
     } catch(...) {                                                                          \
-        LOGE0("\n %s: ERROR: UNKNOWN Exception caught\n\n", __func__);                      \
+        LOGE0("\n %s: ERROR: UNKNOWN Exception caught\n\n", CV_Func);                      \
     }                                                                                       \
 } while(0)
-#else
-#define CATCH_ALL_AND_LOG(_block)                                                           \
-do {                                                                                        \
-    try {                                                                                   \
-        _block;                                                                             \
-        break;                                                                              \
-    }                                                                                       \
-    catch(cv::Exception& e) {                                                               \
-        LOGE0("\n ERROR: OpenCV Exception caught: \n'%s'\n\n", e.what());                   \
-    } catch(std::exception& e) {                                                            \
-        LOGE0("\n ERROR: Exception caught: \n'%s'\n\n", e.what());                          \
-    } catch(...) {                                                                          \
-        LOGE0("\n ERROR: UNKNOWN Exception caught\n\n");                                    \
-    }                                                                                       \
-} while(0)
-#endif
 
 void* workcycleObjectDetectorFunction(void* p)
 {
diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp
index af3a50c430..b6426d898c 100644
--- a/modules/core/include/opencv2/core/core.hpp
+++ b/modules/core/include/opencv2/core/core.hpp
@@ -164,7 +164,7 @@ public:
 
     int code; ///< error code @see CVStatus
     string err; ///< error description
-    string func; ///< function name. Available only when the compiler supports __func__ macro
+    string func; ///< function name. Available only when the compiler supports getting it
     string file; ///< source file name where the error has occured
     int line; ///< line number in the source file where the error has occured
 };
@@ -209,16 +209,19 @@ typedef int (CV_CDECL *ErrorCallback)( int status, const char* func_name,
 CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback,
                                         void* userdata=0, void** prevUserdata=0);
 
-#ifdef __GNUC__
-#define CV_Error( code, msg ) cv::error( cv::Exception(code, msg, __func__, __FILE__, __LINE__) )
-#define CV_Error_( code, args ) cv::error( cv::Exception(code, cv::format args, __func__, __FILE__, __LINE__) )
-#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Exception(CV_StsAssert, #expr, __func__, __FILE__, __LINE__) )
+
+#if defined __GNUC__
+#define CV_Func __func__
+#elif defined _MSC_VER
+#define CV_Func __FUNCTION__
 #else
-#define CV_Error( code, msg ) cv::error( cv::Exception(code, msg, "", __FILE__, __LINE__) )
-#define CV_Error_( code, args ) cv::error( cv::Exception(code, cv::format args, "", __FILE__, __LINE__) )
-#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Exception(CV_StsAssert, #expr, "", __FILE__, __LINE__) )
+#define CV_Func ""
 #endif
 
+#define CV_Error( code, msg ) cv::error( cv::Exception(code, msg, CV_Func, __FILE__, __LINE__) )
+#define CV_Error_( code, args ) cv::error( cv::Exception(code, cv::format args, CV_Func, __FILE__, __LINE__) )
+#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Exception(CV_StsAssert, #expr, CV_Func, __FILE__, __LINE__) )
+
 #ifdef _DEBUG
 #define CV_DbgAssert(expr) CV_Assert(expr)
 #else
diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp
index 2f26e7cb61..3cd2f90f65 100644
--- a/modules/core/include/opencv2/core/internal.hpp
+++ b/modules/core/include/opencv2/core/internal.hpp
@@ -774,11 +774,7 @@ namespace cv { namespace ogl {
 CV_EXPORTS bool checkError(const char* file, const int line, const char* func = "");
 }}
 
-#if defined(__GNUC__)
-    #define CV_CheckGlError() CV_DbgAssert( (cv::ogl::checkError(__FILE__, __LINE__, __func__)) )
-#else
-    #define CV_CheckGlError() CV_DbgAssert( (cv::ogl::checkError(__FILE__, __LINE__)) )
-#endif
+#define CV_CheckGlError() CV_DbgAssert( (cv::ogl::checkError(__FILE__, __LINE__, CV_Func)) )
 
 #endif //__cplusplus
 
diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp
index 53e118ad71..ff459f9a61 100644
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@@ -72,13 +72,8 @@ using namespace cv::gpu;
 
 namespace
 {
-#if defined(__GNUC__)
-    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
-    #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
-#else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__)
-    #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__)
-#endif
+#define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func)
+#define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
 
     inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
     {
diff --git a/modules/core/src/opengl_interop.cpp b/modules/core/src/opengl_interop.cpp
index 0cb9d3d0f5..72d5ffebdf 100644
--- a/modules/core/src/opengl_interop.cpp
+++ b/modules/core/src/opengl_interop.cpp
@@ -69,11 +69,7 @@ namespace
         #else
             void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
 
-            #if defined(__GNUC__)
-                #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
-            #else /* defined(__CUDACC__) || defined(__MSVC__) */
-                #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__)
-            #endif
+            #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func)
 
             void ___cudaSafeCall(cudaError_t err, const char* file, const int line, const char* func = "")
             {
diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp
index bf6a64c978..7759f708b6 100644
--- a/modules/core/src/persistence.cpp
+++ b/modules/core/src/persistence.cpp
@@ -415,13 +415,8 @@ cvCreateMap( int flags, int header_size, int elem_size,
     return map;
 }
 
-#ifdef __GNUC__
 #define CV_PARSE_ERROR( errmsg )                                    \
-    icvParseError( fs, __func__, (errmsg), __FILE__, __LINE__ )
-#else
-#define CV_PARSE_ERROR( errmsg )                                    \
-    icvParseError( fs, "", (errmsg), __FILE__, __LINE__ )
-#endif
+    icvParseError( fs, CV_Func, (errmsg), __FILE__, __LINE__ )
 
 static void
 icvParseError( CvFileStorage* fs, const char* func_name,
diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp
index 53d691f7cc..2eeaf85727 100644
--- a/modules/gpu/src/cuda/safe_call.hpp
+++ b/modules/gpu/src/cuda/safe_call.hpp
@@ -81,11 +81,7 @@ static inline void ___nppSafeCall(int err, const char *file, const int line, con
         cv::gpu::nppError(err, file, line, func);
 }
 
-#if defined(__GNUC__)
-    #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
-#else
-    #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__)
-#endif
+#define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
 
 // ncvSafeCall
 
@@ -95,11 +91,7 @@ static inline void ___ncvSafeCall(int err, const char *file, const int line, con
         cv::gpu::ncvError(err, file, line, func);
 }
 
-#if defined(__GNUC__)
-    #define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
-#else
-    #define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__)
-#endif
+#define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__, CV_Func)
 
 // cufftSafeCall
 
@@ -110,11 +102,7 @@ static inline void ___ncvSafeCall(int err, const char *file, const int line, con
             cv::gpu::cufftError(err, file, line, func);
     }
 
-    #if defined(__GNUC__)
-        #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
-    #else
-        #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__)
-    #endif
+#define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, CV_Func)
 #endif
 
 // cublasSafeCall
@@ -126,11 +114,7 @@ static inline void ___ncvSafeCall(int err, const char *file, const int line, con
             cv::gpu::cublasError(err, file, line, func);
     }
 
-    #if defined(__GNUC__)
-        #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
-    #else
-        #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__)
-    #endif
+#define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, CV_Func)
 #endif
 
 #endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
diff --git a/modules/ocl/src/safe_call.hpp b/modules/ocl/src/safe_call.hpp
index f772e1bb5d..679c89d372 100644
--- a/modules/ocl/src/safe_call.hpp
+++ b/modules/ocl/src/safe_call.hpp
@@ -48,13 +48,8 @@
 
 #include "opencv2/ocl/cl_runtime/cl_runtime.hpp"
 
-#if defined(__GNUC__)
-#define openCLSafeCall(expr)  ___openCLSafeCall(expr, __FILE__, __LINE__, __func__)
-#define openCLVerifyCall(res) ___openCLSafeCall(res, __FILE__, __LINE__, __func__)
-#else /* defined(__OPENCLCC__) || defined(__MSVC__) */
-#define openCLSafeCall(expr)  ___openCLSafeCall(expr, __FILE__, __LINE__)
-#define openCLVerifyCall(res) ___openCLSafeCall(res, __FILE__, __LINE__)
-#endif
+#define openCLSafeCall(expr)  ___openCLSafeCall(expr, __FILE__, __LINE__, CV_Func)
+#define openCLVerifyCall(res) ___openCLSafeCall(res, __FILE__, __LINE__, CV_Func)
 
 
 namespace cv

From 5852a913a8b27b91098d8fad714ce9457a535259 Mon Sep 17 00:00:00 2001
From: Kiran Pradeep <kiran.happy@gmail.com>
Date: Fri, 8 Nov 2013 14:09:34 +0530
Subject: [PATCH 28/28] CMake's get_filename_component with NAME_WE parameters,
 interprets first period as start of extension. For. e.g file name with out
 extension of 'this.is.a.text.file.txt' will be taken as 'this'. Hence using
 NAME with regex replacement to get 'this.is.a.text.file'

---
 modules/world/CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/modules/world/CMakeLists.txt b/modules/world/CMakeLists.txt
index f18635e620..f65447dc02 100644
--- a/modules/world/CMakeLists.txt
+++ b/modules/world/CMakeLists.txt
@@ -66,7 +66,9 @@ foreach(m ${OPENCV_MODULE_${the_module}_DEPS})
           endif()
         endif()
         string(REPLACE ".." "__" srcname "${srcname}")
-        get_filename_component(srcname_we ${srcname} NAME_WE)
+        #NAME_WE intentionally not used since it interprets first period as start of extension (http://cmake.org/Bug/view.php?id=12282)
+        get_filename_component(srcname_we "${srcname}" NAME)
+        string(REGEX REPLACE "\\.[^.]+$" "" srcname_we "${srcname_we}")
         string(REGEX REPLACE <SRC_NAME_WE> "${srcname_we}" objpath2 "${objpath1}")
         string(REGEX REPLACE <RELATIVE_SRC_NAME> "${srcname}" objpath3 "${objpath2}")
       endif()
@@ -92,7 +94,9 @@ macro(ios_include_3party_libs)
       endif()
 
       string(REPLACE ".." "__" srcname "${srcname}")
-      get_filename_component(srcname_we ${srcname} NAME_WE)
+      #NAME_WE intentionally not used since it interprets first period as start of extension (http://cmake.org/Bug/view.php?id=12282)
+      get_filename_component(srcname_we "${srcname}" NAME)
+      string(REGEX REPLACE "\\.[^.]+$" "" srcname_we "${srcname_we}")
       string(REGEX REPLACE <SRC_NAME_WE> "${srcname_we}" objpath2 "${objpath1}")
       string(REGEX REPLACE <RELATIVE_SRC_NAME> "${srcname}" objpath3 "${objpath2}")