diff --git a/.gitignore b/.gitignore
index e5ff0430c0..dbf89bb41f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.pyc
 .DS_Store
 refman.rst
+OpenCV4Tegra/
diff --git a/3rdparty/tbb/.gitignore b/3rdparty/tbb/.gitignore
new file mode 100644
index 0000000000..601e1b265e
--- /dev/null
+++ b/3rdparty/tbb/.gitignore
@@ -0,0 +1 @@
+tbb*.tgz
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 62709b8050..6cce10bacd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA          "Include XIMEA cameras support"               OFF
 OCV_OPTION(WITH_XINE           "Include Xine support (GPL)"                  OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_CLP            "Include Clp support (EPL)"                   OFF)
 OCV_OPTION(WITH_OPENCL         "Include OpenCL Runtime support"              OFF  IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_OPENCLAMDFFT   "Include AMD OpenCL FFT library support"      OFF  IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     OFF  IF (NOT ANDROID AND NOT IOS) )
+
 
 # OpenCV build components
 # ===================================================
@@ -282,25 +285,37 @@ ocv_include_directories(${OPENCV_CONFIG_FILE_INCLUDE_DIR})
 
 
 # ----------------------------------------------------------------------------
-#  Autodetect if we are in a SVN repository
+#  Autodetect if we are in a GIT repository
 # ----------------------------------------------------------------------------
-find_host_program(SVNVERSION_PATH svnversion)
-mark_as_advanced(force SVNVERSION_PATH)
-if(SVNVERSION_PATH)
-    message(STATUS "Extracting svn version, please wait...")
-    execute_process(COMMAND ${SVNVERSION_PATH} -n ${OpenCV_SOURCE_DIR} OUTPUT_VARIABLE SVNVERSION_RESULT)
 
-    if(SVNVERSION_RESULT MATCHES "exported")
-        # This is NOT a svn repository:
-        set(OPENCV_SVNVERSION "")
-        message(STATUS "SVNVERSION: exported")
-    else()
-        set(OPENCV_SVNVERSION " svn:${SVNVERSION_RESULT}")
-        message(STATUS "SVNVERSION: ${OPENCV_SVNVERSION}")
-    endif()
+# don't use FindGit because it requires CMake 2.8.2
+set(git_names git eg) # eg = easy git
+# Prefer .cmd variants on Windows unless running in a Makefile in the MSYS shell
+if(WIN32)
+  if(NOT CMAKE_GENERATOR MATCHES "MSYS")
+    set(git_names git.cmd git eg.cmd eg)
+  endif()
+endif()
+
+find_host_program(GIT_EXECUTABLE NAMES ${git_names} PATH_SUFFIXES Git/cmd Git/bin DOC "git command line client")
+mark_as_advanced(GIT_EXECUTABLE)
+
+if(GIT_EXECUTABLE)
+  execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
+    WORKING_DIRECTORY "${OpenCV_SOURCE_DIR}"
+    OUTPUT_VARIABLE OPENCV_GIT_HASH_SORT
+    RESULT_VARIABLE GIT_RESULT
+    ERROR_QUIET
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+  if(GIT_RESULT EQUAL 0)
+    set(OPENCV_VCSVERSION "commit:${OPENCV_GIT_HASH_SORT}")
+  else()
+    set(OPENCV_VCSVERSION "exported")
+  endif()
 else()
-    # We don't have svnversion:
-    set(OPENCV_SVNVERSION "")
+  # We don't have git:
+  set(OPENCV_VCSVERSION "")
 endif()
 
 
@@ -396,6 +411,12 @@ if(WITH_OPENCL)
   if(OPENCL_FOUND)
     set(HAVE_OPENCL 1)
   endif()
+  if(WITH_OPENCLAMDFFT)
+    set(HAVE_CLAMDFFT 1)
+  endif()
+  if(WITH_OPENCLAMDBLAS)
+    set(HAVE_CLAMDBLAS 1)
+  endif()
 endif()
 
 # ----------------------------------------------------------------------------
@@ -465,8 +486,8 @@ include(cmake/OpenCVGenConfig.cmake REQUIRED)
 # ----------------------------------------------------------------------------
 status("")
 status("General configuration for OpenCV ${OPENCV_VERSION} =====================================")
-if(OPENCV_SVNVERSION)
-  status("Version control:" ${OPENCV_SVNVERSION})
+if(OPENCV_VCSVERSION)
+  status("  Version control:" ${OPENCV_VCSVERSION})
 endif()
 
 # ========================== build platform ==========================
diff --git a/android/android.toolchain.cmake b/android/android.toolchain.cmake
index 324074c8f2..3c89806ac0 100644
--- a/android/android.toolchain.cmake
+++ b/android/android.toolchain.cmake
@@ -4,7 +4,7 @@
 #  See home page: http://code.google.com/p/android-cmake/
 #
 #  The file is mantained by the OpenCV project. And also can be found at
-#  http://code.opencv.org/svn/opencv/trunk/opencv/android/android.toolchain.cmake
+#  http://code.opencv.org/projects/opencv/repository/revisions/master/changes/android/android.toolchain.cmake
 #
 #  Usage Linux:
 #   $ export ANDROID_NDK=/absolute/path/to/the/android-ndk
@@ -182,6 +182,7 @@
 #     [+] added mips architecture support
 #   - modified August 2012
 #     [+] updated for NDK r8b
+#     [~] all intermediate files generated by toolchain are moved into CMakeFiles
 # ------------------------------------------------------------------------------
 
 cmake_minimum_required( VERSION 2.6.3 )
@@ -854,45 +855,48 @@ elseif( X86 )
 endif()
 
 #linker flags
-list( APPEND ANDROID_SYSTEM_LIB_DIRS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}" "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
+if( NOT DEFINED __ndklibspath )
+ set( __ndklibspath "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/ndklibs/${ANDROID_NDK_ABI_NAME}" )
+endif()
+list( APPEND ANDROID_SYSTEM_LIB_DIRS "${__ndklibspath}" "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
 set( ANDROID_LINKER_FLAGS "" )
 #STL
 if( ANDROID_USE_STLPORT )
  if( EXISTS "${__stlLibPath}/libstlport_static.a" )
-  __COPY_IF_DIFFERENT( "${__stlLibPath}/libstlport_static.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
+  __COPY_IF_DIFFERENT( "${__stlLibPath}/libstlport_static.a" "${__ndklibspath}/libstlport_static.a" )
  endif()
- if( EXISTS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
+ if( EXISTS "${__ndklibspath}/libstlport_static.a" )
   set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--start-group -lstlport_static" )
  endif()
 else( ANDROID_USE_STLPORT )
  if( EXISTS "${__stlLibPath}/libgnustl_static.a" )
-  __COPY_IF_DIFFERENT( "${__stlLibPath}/libgnustl_static.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+  __COPY_IF_DIFFERENT( "${__stlLibPath}/libgnustl_static.a" "${__ndklibspath}/libstdc++.a" )
  elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" )
-  __COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+  __COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
  elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" )
-  __COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+  __COPY_IF_DIFFERENT( "${__stlLibPath}/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
  elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${__stlLibPath}/thumb/libstdc++.a" )
-  __COPY_IF_DIFFERENT( "${__stlLibPath}/thumb/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+  __COPY_IF_DIFFERENT( "${__stlLibPath}/thumb/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
  elseif( EXISTS "${__stlLibPath}/libstdc++.a" )
-  __COPY_IF_DIFFERENT( "${__stlLibPath}/libstdc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+  __COPY_IF_DIFFERENT( "${__stlLibPath}/libstdc++.a" "${__ndklibspath}/libstdc++.a" )
  endif()
- if( EXISTS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+ if( EXISTS "${__ndklibspath}/libstdc++.a" )
   set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -lstdc++" )
  endif()
 
  #gcc exception & rtti support
  if( EXISTS "${__stlLibPath}/libsupc++.a" )
-  __COPY_IF_DIFFERENT( "${__stlLibPath}/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+  __COPY_IF_DIFFERENT( "${__stlLibPath}/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
  elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" )
-  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
  elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
-  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
  elseif( ANDROID_ARCH_NAME STREQUAL "arm" AND EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" )
-  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
  elseif( EXISTS "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" )
-  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+  __COPY_IF_DIFFERENT( "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" "${__ndklibspath}/libsupc++.a" )
  endif()
- if( EXISTS "${CMAKE_BINARY_DIR}/systemlibs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+ if( EXISTS "${__ndklibspath}/libsupc++.a" )
   set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -lsupc++" )
  endif()
 endif( ANDROID_USE_STLPORT )
@@ -1038,13 +1042,14 @@ endmacro()
 # export toolchain settings for the try_compile() command
 if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
  set( __toolchain_config "")
- foreach( __var ANDROID_ABI ANDROID_FORCE_ARM_BUILD ANDROID_NATIVE_API_LEVEL ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_SET_OBSOLETE_VARIABLES LIBRARY_OUTPUT_PATH_ROOT ANDROID_USE_STLPORT ANDROID_FORBID_SYGWIN ANDROID_NDK ANDROID_STANDALONE_TOOLCHAIN ANDROID_FUNCTION_LEVEL_LINKING )
+ foreach( __var ANDROID_ABI ANDROID_FORCE_ARM_BUILD ANDROID_NATIVE_API_LEVEL ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_SET_OBSOLETE_VARIABLES LIBRARY_OUTPUT_PATH_ROOT ANDROID_USE_STLPORT ANDROID_FORBID_SYGWIN ANDROID_NDK ANDROID_STANDALONE_TOOLCHAIN ANDROID_FUNCTION_LEVEL_LINKING __ndklibspath )
   if( DEFINED ${__var} )
    set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" )\n" )
   endif()
  endforeach()
- file( WRITE "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/android.toolchain.config.cmake" "${__toolchain_config}" )
+ file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" )
  unset( __toolchain_config )
+ unset( __ndklibspath )
 endif()
 
 
@@ -1073,6 +1078,7 @@ endif()
 # Can be set only at the first run:
 #   ANDROID_NDK
 #   ANDROID_STANDALONE_TOOLCHAIN
+#   ANDROID_TOOLCHAIN_NAME : "arm-linux-androideabi-4.4.3" or "arm-linux-androideabi-4.6" or "mipsel-linux-android-4.4.3" or "mipsel-linux-android-4.6" or "x86-4.4.3" or "x86-4.6"
 # Obsolete:
 #   ANDROID_API_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL
 #   ARM_TARGET : superseded by ANDROID_ABI
@@ -1105,7 +1111,6 @@ endif()
 #   ANDROID_COMPILER_VERSION : GCC version used
 #   ANDROID_CXX_FLAGS : C/C++ compiler flags required by Android platform
 #   ANDROID_SUPPORTED_ABIS : list of currently allowed values for ANDROID_ABI
-#   ANDROID_TOOLCHAIN_NAME : "standalone", "arm-linux-androideabi-4.4.3" or "x86-4.4.3" or something similar.
 #   ANDROID_TOOLCHAIN_MACHINE_NAME : "arm-linux-androideabi", "arm-eabi" or "i686-android-linux"
 #   ANDROID_TOOLCHAIN_ROOT : path to the top level of toolchain (standalone or placed inside NDK)
 #   ANDROID_SUPPORTED_NATIVE_API_LEVELS : list of native API levels found inside NDK
diff --git a/android/scripts/package.sh b/android/scripts/package.sh
deleted file mode 100644
index 41a9dad0be..0000000000
--- a/android/scripts/package.sh
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/bin/sh
-cd `dirname $0`/..
-
-ANDROID_DIR=`pwd`
-
-rm -rf package
-mkdir -p package
-cd package
-
-PRG_DIR=`pwd`
-mkdir opencv
-
-# neon-enabled build
-#cd $PRG_DIR
-#mkdir build-neon
-#cd build-neon
-
-#cmake -DANDROID_ABI="armeabi-v7a with NEON" -DBUILD_DOCS=OFF -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_ANDROID_EXAMPLES=OFF -DCMAKE_TOOLCHAIN_FILE="$ANDROID_DIR/android.toolchain.cmake" -DCMAKE_INSTALL_PREFIX="$PRG_DIR/opencv" "$ANDROID_DIR/.."  || exit 1
-#make -j8 install/strip || exit 1
-
-#cd "$PRG_DIR/opencv"
-#rm -rf doc include src .classpath .project AndroidManifest.xml default.properties share/OpenCV/haarcascades share/OpenCV/lbpcascades share/OpenCV/*.cmake share/OpenCV/OpenCV.mk
-#mv libs/armeabi-v7a libs/armeabi-v7a-neon
-#mv share/OpenCV/3rdparty/libs/armeabi-v7a share/OpenCV/3rdparty/libs/armeabi-v7a-neon
-
-
-# armeabi-v7a build
-cd "$PRG_DIR"
-mkdir build
-cd build
-
-cmake -DANDROID_ABI="armeabi-v7a" -DBUILD_DOCS=OFF -DBUILD_TESTS=ON -DBUILD_EXAMPLES=OFF -DBUILD_ANDROID_EXAMPLES=ON -DCMAKE_TOOLCHAIN_FILE="$ANDROID_DIR/android.toolchain.cmake" -DCMAKE_INSTALL_PREFIX="$PRG_DIR/opencv" "$ANDROID_DIR/.."  || exit 1
-make -j8 install/strip || exit 1
-
-cd "$PRG_DIR/opencv"
-rm -rf doc include src .classpath .project AndroidManifest.xml default.properties project.properties share/OpenCV/haarcascades share/OpenCV/lbpcascades share/OpenCV/*.cmake share/OpenCV/OpenCV.mk
-
-
-# armeabi build
-cd "$PRG_DIR/build"
-rm -rf CMakeCache.txt
-
-cmake -DANDROID_ABI="armeabi" -DBUILD_DOCS=ON -DBUILD_TESTS=ON -DBUILD_EXAMPLES=OFF -DBUILD_ANDROID_EXAMPLES=ON -DINSTALL_ANDROID_EXAMPLES=ON -DCMAKE_TOOLCHAIN_FILE="$ANDROID_DIR/android.toolchain.cmake" -DCMAKE_INSTALL_PREFIX="$PRG_DIR/opencv" "$ANDROID_DIR/.."  || exit 1
-make -j8 install/strip docs || exit 1
-
-find doc -name "*.pdf" -exec cp {} $PRG_DIR/opencv/doc \;
-
-cd $PRG_DIR
-rm -rf opencv/doc/CMakeLists.txt
-cp "$ANDROID_DIR/README.android" opencv/
-cp "$ANDROID_DIR/../README" opencv/
-
-
-# get opencv version
-CV_VERSION=`grep -o "[0-9]\+\.[0-9]\+\.[0-9]\+" opencv/share/OpenCV/OpenCVConfig-version.cmake`
-OPENCV_NAME=OpenCV-$CV_VERSION
-mv opencv $OPENCV_NAME
-
-#samples
-cp -r "$ANDROID_DIR/../samples/android" "$PRG_DIR/samples"
-cd "$PRG_DIR/samples"
-
-#enable for loops over items with spaces in their name
-IFS="
-"
-for dir in `ls -1`
-do
-  if [ -f "$dir/default.properties" ]
-  then
-    HAS_REFERENCE=`cat "$dir/project.properties" | grep -c android.library.reference.1`
-    if [ $HAS_REFERENCE = 1 ]
-    then
-      echo -n > "$dir/project.properties"
-      android update project --name "$dir" --target "android-8" --library "../../$OPENCV_NAME" --path "$dir"
-      #echo 'android update project --name "$dir" --target "android-8" --library "../opencv$CV_VERSION" --path "$dir"'
-    fi
-  else
-    if [ -f "$dir/default.properties" ]
-    then
-      HAS_REFERENCE=`cat "$dir/default.properties" | grep -c android.library.reference.1`
-      if [ $HAS_REFERENCE = 1 ]
-      then
-        echo -n > "$dir/default.properties"
-        android update project --name "$dir" --target "android-8" --library "../../$OPENCV_NAME" --path "$dir"
-        #echo 'android update project --name "$dir" --target "android-8" --library "../opencv$CV_VERSION" --path "$dir"'
-      fi
-    else
-      rm -rf "$dir"
-    fi
-  fi
-done
-
-echo "OPENCV_MK_PATH:=../../$OPENCV_NAME/share/OpenCV/OpenCV.mk" > includeOpenCV.mk
-
-
-#clean samples
-cd "$PRG_DIR/samples"
-#remove ignored files/folders
-svn status --no-ignore | grep ^I | cut -c9- | xargs -d \\n rm -rf
-#remove unversioned files/folders
-svn status | grep ^\? | cut -c9- | xargs -d \\n rm -rf
-
-
-#generate "gen" folders to eliminate eclipse warnings
-cd "$PRG_DIR/samples"
-for dir in `ls -1`
-do
-  if [ -d "$dir" ]
-  then
-    mkdir "$dir/gen"
-  fi
-done
-
-
-#generate folders "gen" and "res" for opencv (dummy eclipse stiff)
-cd $PRG_DIR
-mkdir "$OPENCV_NAME/gen"
-mkdir "$OPENCV_NAME/res"
-
-# pack all files
-cd $PRG_DIR
-PRG_NAME=OpenCV-$CV_VERSION-tp-android-bin.tar.bz2
-tar cjpf $PRG_NAME --exclude-vcs $OPENCV_NAME samples || exit -1
-echo
-echo "Package $PRG_NAME is successfully created"
diff --git a/android/service/doc/LoaderCallbackInterface.rst b/android/service/doc/LoaderCallbackInterface.rst
index 7b1aabd812..5a88e599fa 100644
--- a/android/service/doc/LoaderCallbackInterface.rst
+++ b/android/service/doc/LoaderCallbackInterface.rst
@@ -8,7 +8,7 @@ Loader Callback Interface
     Interface for callback object in case of asynchronous initialization of OpenCV
 
 void onManagerConnected()
-------------------------
+-------------------------
 
 .. method:: void onManagerConnected(int status)
 
diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake
index 903b55b652..96473a5003 100644
--- a/cmake/OpenCVDetectOpenCL.cmake
+++ b/cmake/OpenCVDetectOpenCL.cmake
@@ -2,8 +2,19 @@ if(APPLE)
     set(OPENCL_FOUND YES)
     set(OPENCL_LIBRARIES "-framework OpenCL")
 else()
-    find_package(OpenCL QUIET)
-
+    #find_package(OpenCL QUIET)
+	if(WITH_OPENCLAMDFFT)
+            find_path(CLAMDFFT_INCLUDE_DIR
+                NAMES clAmdFft.h)
+            find_library(CLAMDFFT_LIBRARIES
+                NAMES clAmdFft.Runtime)
+	endif()
+	if(WITH_OPENCLAMDBLAS)
+            find_path(CLAMDBLAS_INCLUDE_DIR
+                NAMES clAmdBlas.h)
+            find_library(CLAMDBLAS_LIBRARIES
+                NAMES clAmdBlas)
+	endif()
     # Try AMD/ATI Stream SDK
     if (NOT OPENCL_FOUND)
         set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT})
diff --git a/cmake/OpenCVExtraTargets.cmake b/cmake/OpenCVExtraTargets.cmake
index 3799ad5def..08d5f406a5 100644
--- a/cmake/OpenCVExtraTargets.cmake
+++ b/cmake/OpenCVExtraTargets.cmake
@@ -10,7 +10,7 @@ ADD_CUSTOM_TARGET(uninstall "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/c
 if(ENABLE_SOLUTION_FOLDERS)
   set_target_properties(uninstall PROPERTIES FOLDER "CMakeTargets")
 endif()
-  
+
 
 # ----------------------------------------------------------------------------
 # Source package, for "make package_source"
@@ -26,11 +26,11 @@ if(BUILD_PACKAGE)
     set(TAR_TRANSFORM "\"s,^,${TARBALL_NAME}/,\"")
     add_custom_target(package_source
       #TODO: maybe we should not remove dll's
-      COMMAND ${TAR_CMD} --transform ${TAR_TRANSFORM} -cjpf ${CMAKE_CURRENT_BINARY_DIR}/${TARBALL_NAME}.tar.bz2 --exclude=".svn" --exclude="*.pyc" --exclude="*.vcproj" --exclude="*/lib/*" --exclude="*.dll" ./
+      COMMAND ${TAR_CMD} --transform ${TAR_TRANSFORM} -cjpf ${CMAKE_CURRENT_BINARY_DIR}/${TARBALL_NAME}.tar.bz2 --exclude=".svn" --exclude=".git" --exclude="*.pyc" --exclude="*.vcproj" --exclude="*/lib/*" --exclude="*.dll" ./
       WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
   else()
     add_custom_target(package_source
-      COMMAND zip -9 -r ${CMAKE_CURRENT_BINARY_DIR}/${TARBALL_NAME}.zip . -x '*/.svn/*' '*.vcproj' '*.pyc'
+      COMMAND zip -9 -r ${CMAKE_CURRENT_BINARY_DIR}/${TARBALL_NAME}.zip . -x '*/.svn/*' '*/.git/*' '*.vcproj' '*.pyc'
       WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
   endif()
   if(ENABLE_SOLUTION_FOLDERS)
diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.cmake
index 1012008059..fb779c887d 100644
--- a/cmake/templates/cvconfig.h.cmake
+++ b/cmake/templates/cvconfig.h.cmake
@@ -175,6 +175,12 @@
 /* OpenCL Support */
 #cmakedefine HAVE_OPENCL
 
+/* AMD's OpenCL Fast Fourier Transform Library*/ 
+#cmakedefine HAVE_CLAMDFFT
+
+/* AMD's Basic Linear Algebra Subprograms Library*/
+#cmakedefine HAVE_CLAMDBLAS
+
 /* NVidia Cuda Fast Fourier Transform (FFT) API*/
 #cmakedefine HAVE_CUFFT
 
diff --git a/doc/license.txt b/doc/license.txt
index af0330790c..8824228d03 100644
--- a/doc/license.txt
+++ b/doc/license.txt
@@ -1,4 +1,4 @@
-IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 
+IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 
  By downloading, copying, installing or using the software you agree to this license.
  If you do not agree to this license, do not download, install,
diff --git a/doc/opencv_cheatsheet.tex b/doc/opencv_cheatsheet.tex
index cbbd6cf43a..30fdd8320a 100644
--- a/doc/opencv_cheatsheet.tex
+++ b/doc/opencv_cheatsheet.tex
@@ -7,7 +7,7 @@
 %
 %    creating matrices
 %        from scratch
-%        from previously allocated data: plain arrays, vectors 
+%        from previously allocated data: plain arrays, vectors
 %        converting to/from old-style structures
 %
 %    element access, iteration through matrix elements
@@ -30,7 +30,7 @@
 %        color space transformations
 %        histograms & back projections
 %        contours
-%        
+%
 %    i/o:
 %        displaying images
 %        saving/loading to/from file (XML/YAML & image file formats)
@@ -40,19 +40,19 @@
 %        findcontours, bounding box, convex hull, min area rect,
 %            transformations, to/from homogeneous coordinates
 %        matching point sets: homography, fundamental matrix, rigid transforms
-%        
+%
 %    3d:
 %        camera calibration, pose estimation.
 %        uncalibrated case
 %        stereo: rectification, running stereo correspondence, obtaining the depth.
-%    
+%
 %    feature detection:
 %        features2d toolbox
-%    
+%
 %    object detection:
 %        using a classifier running on a sliding window: cascadeclassifier + hog.
 %        using salient point features: features2d -> matching
-%        
+%
 %    statistical data processing:
 %        clustering (k-means),
 %        classification + regression (SVM, boosting, k-nearest),
@@ -148,22 +148,22 @@
 %\texttt{\href{http://www.ros.org/wiki/Stack Manifest}{stack manifest}}  & Description of a ROS stack.
 %\end{tabular}
 
-\emph{The OpenCV C++ reference manual is here: \url{http://opencv.itseez.com}. Use \textbf{Quick Search} to find descriptions of the particular functions and classes}
+\emph{The OpenCV C++ reference manual is here: \url{http://docs.opencv.org}. Use \textbf{Quick Search} to find descriptions of the particular functions and classes}
 
 \section{Key OpenCV Classes}
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Point_}{Point\_}} & Template 2D point class \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Point3_}{Point3\_}} & Template 3D point class \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Size_}{Size\_}} & Template size (width, height) class \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Vec}{Vec}} & Template short vector class \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Matx}{Matx}} & Template small matrix class \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Scalar_}{Scalar}} & 4-element vector \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Rect_}{Rect}} & Rectangle \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Range}{Range}} & Integer value range \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Mat}{Mat}} & 2D or multi-dimensional dense array (can be used to store matrices, images, histograms, feature descriptors, voxel volumes etc.)\\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#sparsemat}{SparseMat}} & Multi-dimensional sparse array \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Ptr}{Ptr}} & Template smart pointer class
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Point_}{Point\_}} & Template 2D point class \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Point3_}{Point3\_}} & Template 3D point class \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Size_}{Size\_}} & Template size (width, height) class \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Vec}{Vec}} & Template short vector class \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Matx}{Matx}} & Template small matrix class \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Scalar_}{Scalar}} & 4-element vector \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Rect_}{Rect}} & Rectangle \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Range}{Range}} & Integer value range \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Mat}{Mat}} & 2D or multi-dimensional dense array (can be used to store matrices, images, histograms, feature descriptors, voxel volumes etc.)\\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#sparsemat}{SparseMat}} & Multi-dimensional sparse array \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Ptr}{Ptr}} & Template smart pointer class
 \end{tabular}
 
 \section{Matrix Basics}
@@ -173,7 +173,7 @@
 \> \texttt{Mat image(240, 320, CV\_8UC3);} \\
 
 \textbf{[Re]allocate a pre-declared matrix}\\
-\> \texttt{image.\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-create}{create}(480, 640, CV\_8UC3);}\\
+\> \texttt{image.\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-create}{create}(480, 640, CV\_8UC3);}\\
 
 \textbf{Create a matrix initialized with a constant}\\
 \> \texttt{Mat A33(3, 3, CV\_32F, Scalar(5));} \\
@@ -189,8 +189,8 @@
 \> \texttt{Mat B22 = Mat(2, 2, CV\_32F, B22data).clone();}\\
 
 \textbf{Initialize a random matrix}\\
-\> \texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#randu}{randu}(image, Scalar(0), Scalar(256)); }\textit{// uniform dist}\\
-\> \texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#randn}{randn}(image, Scalar(128), Scalar(10)); }\textit{// Gaussian dist}\\
+\> \texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#randu}{randu}(image, Scalar(0), Scalar(256)); }\textit{// uniform dist}\\
+\> \texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#randn}{randn}(image, Scalar(128), Scalar(10)); }\textit{// Gaussian dist}\\
 
 \textbf{Convert matrix to/from other structures}\\
 \>\textbf{(without copying the data)}\\
@@ -230,32 +230,32 @@
 \section{Matrix Manipulations: Copying, Shuffling, Part Access}
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-copyto}{src.copyTo(dst)}} & Copy matrix to another one \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-convertto}{src.convertTo(dst,type,scale,shift)}} & \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Scale and convert to another datatype \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-clone}{m.clone()}} & Make deep copy of a matrix \\
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-reshape}{m.reshape(nch,nrows)}} & Change matrix dimensions and/or number of channels without copying data \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-copyto}{src.copyTo(dst)}} & Copy matrix to another one \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-convertto}{src.convertTo(dst,type,scale,shift)}} & \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Scale and convert to another datatype \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-clone}{m.clone()}} & Make deep copy of a matrix \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-reshape}{m.reshape(nch,nrows)}} & Change matrix dimensions and/or number of channels without copying data \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-row}{m.row(i)}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-col}{m.col(i)}} & Take a matrix row/column \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-row}{m.row(i)}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-col}{m.col(i)}} & Take a matrix row/column \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-rowrange}{m.rowRange(Range(i1,i2))}}
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-colrange}{m.colRange(Range(j1,j2))}} & \ \ \ \ \ \ \ Take a matrix row/column span \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-rowrange}{m.rowRange(Range(i1,i2))}}
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-colrange}{m.colRange(Range(j1,j2))}} & \ \ \ \ \ \ \ Take a matrix row/column span \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#mat-diag}{m.diag(i)}} & Take a matrix diagonal \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-diag}{m.diag(i)}} & Take a matrix diagonal \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#Mat}{m(Range(i1,i2),Range(j1,j2)), m(roi)}} & \ \ \ \ \ \ \ \ \ \ \ \ \ Take a submatrix \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Mat}{m(Range(i1,i2),Range(j1,j2)), m(roi)}} & \ \ \ \ \ \ \ \ \ \ \ \ \ Take a submatrix \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#repeat}{m.repeat(ny,nx)}} & Make a bigger matrix from a smaller one \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#repeat}{m.repeat(ny,nx)}} & Make a bigger matrix from a smaller one \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#flip}{flip(src,dst,dir)}} & Reverse the order of matrix rows and/or columns \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#flip}{flip(src,dst,dir)}} & Reverse the order of matrix rows and/or columns \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#split}{split(...)}} & Split multi-channel matrix into separate channels \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#split}{split(...)}} & Split multi-channel matrix into separate channels \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#merge}{merge(...)}} & Make a multi-channel matrix out of the separate channels \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#merge}{merge(...)}} & Make a multi-channel matrix out of the separate channels \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#mixchannels}{mixChannels(...)}} & Generalized form of split() and merge() \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#mixchannels}{mixChannels(...)}} & Generalized form of split() and merge() \\
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#randshuffle}{randShuffle(...)}} & Randomly shuffle matrix elements \\
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#randshuffle}{randShuffle(...)}} & Randomly shuffle matrix elements \\
 
 \end{tabular}
 
@@ -278,17 +278,17 @@ other matrix operations, such as
 
 \begin{itemize}
 \item
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#add}{add()}},  
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#subtract}{subtract()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#multiply}{multiply()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#divide}{divide()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#absdiff}{absdiff()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#bitwise-and}{bitwise\_and()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#bitwise-or}{bitwise\_or()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#bitwise-xor}{bitwise\_xor()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#max}{max()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#min}{min()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#compare}{compare()}}
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#add}{add()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#subtract}{subtract()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#multiply}{multiply()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#divide}{divide()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#absdiff}{absdiff()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#bitwise-and}{bitwise\_and()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#bitwise-or}{bitwise\_or()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#bitwise-xor}{bitwise\_xor()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#max}{max()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#min}{min()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#compare}{compare()}}
 
 -- correspondingly, addition, subtraction, element-wise multiplication ... comparison of two matrices or a matrix and a scalar.
 
@@ -314,49 +314,49 @@ Exa\=mple. \href{http://en.wikipedia.org/wiki/Alpha_compositing}{Alpha compositi
 
 \item
 
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#sum}{sum()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#mean}{mean()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#meanstddev}{meanStdDev()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#norm}{norm()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#countnonzero}{countNonZero()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#minmaxloc}{minMaxLoc()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#sum}{sum()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#mean}{mean()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#meanstddev}{meanStdDev()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#norm}{norm()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#countnonzero}{countNonZero()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#minmaxloc}{minMaxLoc()}},
 
 -- various statistics of matrix elements.
 
 \item
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#exp}{exp()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#log}{log()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#pow}{pow()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#sqrt}{sqrt()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#carttopolar}{cartToPolar()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#polartocart}{polarToCart()}}
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#exp}{exp()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#log}{log()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#pow}{pow()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#sqrt}{sqrt()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#carttopolar}{cartToPolar()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#polartocart}{polarToCart()}}
 
 -- the classical math functions.
 
 \item
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#scaleadd}{scaleAdd()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#transpose}{transpose()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#gemm}{gemm()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#invert}{invert()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#solve}{solve()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#determinant}{determinant()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#trace}{trace()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#eigen}{eigen()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#SVD}{SVD}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#scaleadd}{scaleAdd()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#transpose}{transpose()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#gemm}{gemm()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#invert}{invert()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#solve}{solve()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#determinant}{determinant()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#trace}{trace()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#eigen}{eigen()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#SVD}{SVD}},
 
 -- the algebraic functions + SVD class.
 
 \item
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#dft}{dft()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#idft}{idft()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#dct}{dct()}},
-\texttt{\href{http://opencv.itseez.com/modules/core/doc/operations_on_arrays.html\#idct}{idct()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#dft}{dft()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#idft}{idft()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#dct}{dct()}},
+\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#idct}{idct()}},
 
--- discrete Fourier and cosine transformations 
+-- discrete Fourier and cosine transformations
 
 \end{itemize}
 
-For some operations a more convenient \href{http://opencv.itseez.com/modules/core/doc/basic_structures.html\#matrix-expressions}{algebraic notation} can be used, for example:
+For some operations a more convenient \href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#matrix-expressions}{algebraic notation} can be used, for example:
 \begin{tabbing}
 \texttt{Mat}\={} \texttt{delta = (J.t()*J + lambda*}\\
 \>\texttt{Mat::eye(J.cols, J.cols, J.type()))}\\
@@ -370,20 +370,20 @@ implements the core of Levenberg-Marquardt optimization algorithm.
 
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#filter2d}{filter2D()}} & Non-separable linear filter \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#filter2d}{filter2D()}} & Non-separable linear filter \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#sepfilter2d}{sepFilter2D()}} & Separable linear filter \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#sepfilter2d}{sepFilter2D()}} & Separable linear filter \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#blur}{boxFilter()}},  \texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#gaussianblur}{GaussianBlur()}},
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#medianblur}{medianBlur()}},
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#bilateralfilter}{bilateralFilter()}}
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#blur}{boxFilter()}},  \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#gaussianblur}{GaussianBlur()}},
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#medianblur}{medianBlur()}},
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#bilateralfilter}{bilateralFilter()}}
 & Smooth the image with one of the linear or non-linear filters \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#sobel}{Sobel()}},  \texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#scharr}{Scharr()}}
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#sobel}{Sobel()}},  \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#scharr}{Scharr()}}
 & Compute the spatial image derivatives \\
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#laplacian}{Laplacian()}} & compute Laplacian: $\Delta I = \frac{\partial ^ 2 I}{\partial x^2} + \frac{\partial ^ 2 I}{\partial y^2}$  \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#laplacian}{Laplacian()}} & compute Laplacian: $\Delta I = \frac{\partial ^ 2 I}{\partial x^2} + \frac{\partial ^ 2 I}{\partial y^2}$  \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#erode}{erode()}}, \texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/filtering.html\#dilate}{dilate()}} & Morphological operations \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#erode}{erode()}}, \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#dilate}{dilate()}} & Morphological operations \\
 
 \end{tabular}
 
@@ -398,17 +398,17 @@ Exa\=mple. Filter image in-place with a 3x3 high-pass kernel\\
 
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html\#resize}{resize()}} & Resize image \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#resize}{resize()}} & Resize image \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html\#getrectsubpix}{getRectSubPix()}} & Extract an image patch \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#getrectsubpix}{getRectSubPix()}} & Extract an image patch \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html\#warpaffine}{warpAffine()}} & Warp image affinely\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#warpaffine}{warpAffine()}} & Warp image affinely\\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html\#warpperspective}{warpPerspective()}} & Warp image perspectively\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#warpperspective}{warpPerspective()}} & Warp image perspectively\\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html\#remap}{remap()}} & Generic image warping\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#remap}{remap()}} & Generic image warping\\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html\#convertmaps}{convertMaps()}} & Optimize maps for a faster remap() execution\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#convertmaps}{convertMaps()}} & Optimize maps for a faster remap() execution\\
 
 \end{tabular}
 
@@ -422,21 +422,21 @@ Example. Decimate image by factor of $\sqrt{2}$:\\
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#cvtcolor}{cvtColor()}} & Convert image from one color space to another \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#cvtcolor}{cvtColor()}} & Convert image from one color space to another \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#threshold}{threshold()}}, \texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#adaptivethreshold}{adaptivethreshold()}} & Convert grayscale image to binary image using a fixed or a variable threshold \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#threshold}{threshold()}}, \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#adaptivethreshold}{adaptivethreshold()}} & Convert grayscale image to binary image using a fixed or a variable threshold \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#floodfill}{floodFill()}} & Find a connected component using region growing algorithm\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#floodfill}{floodFill()}} & Find a connected component using region growing algorithm\\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#integral}{integral()}} & Compute integral image \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#integral}{integral()}} & Compute integral image \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#distancetransform}{distanceTransform()}}
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#distancetransform}{distanceTransform()}}
  & build distance map or discrete Voronoi diagram for a binary image. \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#watershed}{watershed()}},
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/miscellaneous_transformations.html\#grabcut}{grabCut()}}
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#watershed}{watershed()}},
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#grabcut}{grabCut()}}
  & marker-based image segmentation algorithms.
- See the samples \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/watershed.cpp}{watershed.cpp}} and \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/grabcut.cpp}{grabcut.cpp}}.
+ See the samples \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/watershed.cpp}{watershed.cpp}} and \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/grabcut.cpp}{grabcut.cpp}}.
 
 \end{tabular}
 
@@ -445,13 +445,13 @@ Example. Decimate image by factor of $\sqrt{2}$:\\
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/histograms.html\#calchist}{calcHist()}} & Compute image(s) histogram \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#calchist}{calcHist()}} & Compute image(s) histogram \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/histograms.html\#calcbackproject}{calcBackProject()}} & Back-project the histogram \\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#calcbackproject}{calcBackProject()}} & Back-project the histogram \\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/histograms.html\#equalizehist}{equalizeHist()}} & Normalize image brightness and contrast\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#equalizehist}{equalizeHist()}} & Normalize image brightness and contrast\\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/histograms.html\#comparehist}{compareHist()}} & Compare two histograms\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#comparehist}{compareHist()}} & Compare two histograms\\
 
 \end{tabular}
 
@@ -464,12 +464,12 @@ Example. Compute Hue-Saturation histogram of an image:\\
 \end{tabbing}
 
 \subsection{Contours}
-See \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/contours2.cpp}{contours2.cpp}} and \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/squares.cpp}{squares.cpp}}
+See \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/contours2.cpp}{contours2.cpp}} and \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/squares.cpp}{squares.cpp}}
 samples on what are the contours and how to use them.
 
 \section{Data I/O}
 
-\href{http://opencv.itseez.com/modules/core/doc/xml_yaml_persistence.html\#xml-yaml-file-storages-writing-to-a-file-storage}{XML/YAML storages} are collections (possibly nested) of scalar values, structures and heterogeneous lists.
+\href{http://docs.opencv.org/modules/core/doc/xml_yaml_persistence.html\#xml-yaml-file-storages-writing-to-a-file-storage}{XML/YAML storages} are collections (possibly nested) of scalar values, structures and heterogeneous lists.
 
 \begin{tabbing}
 \textbf{Wr}\=\textbf{iting data to YAML (or XML)}\\
@@ -509,7 +509,7 @@ samples on what are the contours and how to use them.
 
 \texttt{Rect r; r.x = (int)tm["x"], r.y = (int)tm["y"];}\\
 \texttt{r.width = (int)tm["width"], r.height = (int)tm["height"];}\\
- 
+
 \texttt{int lbp\_val = 0;}\\
 \texttt{FileNodeIterator it = tm["lbp"].begin();}\\
 
@@ -521,9 +521,9 @@ samples on what are the contours and how to use them.
 
 \begin{tabbing}
 \textbf{Wr}\=\textbf{iting and reading raster images}\\
-\texttt{\href{http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html\#imwrite}{imwrite}("myimage.jpg", image);}\\
-\texttt{Mat image\_color\_copy = \href{http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html\#imread}{imread}("myimage.jpg", 1);}\\
-\texttt{Mat image\_grayscale\_copy = \href{http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html\#imread}{imread}("myimage.jpg", 0);}\\
+\texttt{\href{http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html\#imwrite}{imwrite}("myimage.jpg", image);}\\
+\texttt{Mat image\_color\_copy = \href{http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html\#imread}{imread}("myimage.jpg", 1);}\\
+\texttt{Mat image\_grayscale\_copy = \href{http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html\#imread}{imread}("myimage.jpg", 0);}\\
 \end{tabbing}
 
 \emph{The functions can read/write images in the following formats: \textbf{BMP (.bmp), JPEG (.jpg, .jpeg), TIFF (.tif, .tiff), PNG (.png), PBM/PGM/PPM (.p?m), Sun Raster (.sr), JPEG 2000 (.jp2)}. Every format supports 8-bit, 1- or 3-channel images. Some formats (PNG, JPEG 2000) support 16 bits per channel.}
@@ -544,72 +544,72 @@ samples on what are the contours and how to use them.
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
 
-\texttt{\href{http://opencv.itseez.com/modules/highgui/doc/user_interface.html\#namedwindow}{namedWindow(winname,flags)}} & \ \ \ \ \ \ \ \ \ \ Create named highgui window \\
+\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#namedwindow}{namedWindow(winname,flags)}} & \ \ \ \ \ \ \ \ \ \ Create named highgui window \\
 
-\texttt{\href{http://opencv.itseez.com/modules/highgui/doc/user_interface.html\#destroywindow}{destroyWindow(winname)}} & \ \ \ Destroy the specified window \\
+\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#destroywindow}{destroyWindow(winname)}} & \ \ \ Destroy the specified window \\
 
-\texttt{\href{http://opencv.itseez.com/modules/highgui/doc/user_interface.html\#imshow}{imshow(winname, mtx)}} & Show image in the window \\
+\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#imshow}{imshow(winname, mtx)}} & Show image in the window \\
 
-\texttt{\href{http://opencv.itseez.com/modules/highgui/doc/user_interface.html\#waitkey}{waitKey(delay)}} & Wait for a key press during the specified time interval (or forever). Process events while waiting. \emph{Do not forget to call this function several times a second in your code.} \\
+\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#waitkey}{waitKey(delay)}} & Wait for a key press during the specified time interval (or forever). Process events while waiting. \emph{Do not forget to call this function several times a second in your code.} \\
 
-\texttt{\href{http://opencv.itseez.com/modules/highgui/doc/user_interface.html\#createtrackbar}{createTrackbar(...)}} & Add trackbar (slider) to the specified window \\
+\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#createtrackbar}{createTrackbar(...)}} & Add trackbar (slider) to the specified window \\
 
-\texttt{\href{http://opencv.itseez.com/modules/highgui/doc/user_interface.html\#setmousecallback}{setMouseCallback(...)}} & \ \ Set the callback on mouse clicks and movements in the specified window \\
+\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#setmousecallback}{setMouseCallback(...)}} & \ \ Set the callback on mouse clicks and movements in the specified window \\
 
 \end{tabular}
 
-See \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/camshiftdemo.cpp}{camshiftdemo.cpp}} and other \href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/}{OpenCV samples} on how to use the GUI functions.   
+See \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/camshiftdemo.cpp}{camshiftdemo.cpp}} and other \href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/}{OpenCV samples} on how to use the GUI functions.
 
 \section{Camera Calibration, Pose Estimation and Depth Estimation}
 
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#calibratecamera}{calibrateCamera()}} & Calibrate camera from several views of a calibration pattern. \\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#calibratecamera}{calibrateCamera()}} & Calibrate camera from several views of a calibration pattern. \\
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#findchessboardcorners}{findChessboardCorners()}} & \ \ \ \ \ \ Find feature points on the checkerboard calibration pattern. \\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#findchessboardcorners}{findChessboardCorners()}} & \ \ \ \ \ \ Find feature points on the checkerboard calibration pattern. \\
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#solvepnp}{solvePnP()}} & Find the object pose from the known projections of its feature points. \\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#solvepnp}{solvePnP()}} & Find the object pose from the known projections of its feature points. \\
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#stereocalibrate}{stereoCalibrate()}} & Calibrate stereo camera. \\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#stereocalibrate}{stereoCalibrate()}} & Calibrate stereo camera. \\
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#stereorectify}{stereoRectify()}} & Compute the rectification transforms for a calibrated stereo camera.\\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#stereorectify}{stereoRectify()}} & Compute the rectification transforms for a calibrated stereo camera.\\
 
-\texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/geometric_transformations.html\#initundistortrectifymap}{initUndistortRectifyMap()}} & \ \ \ \ \ \ Compute rectification map (for \texttt{remap()}) for each stereo camera head.\\
+\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#initundistortrectifymap}{initUndistortRectifyMap()}} & \ \ \ \ \ \ Compute rectification map (for \texttt{remap()}) for each stereo camera head.\\
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#StereoBM}{StereoBM}}, \texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#StereoSGBM}{StereoSGBM}} & The stereo correspondence engines to be run on rectified stereo pairs.\\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#StereoBM}{StereoBM}}, \texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#StereoSGBM}{StereoSGBM}} & The stereo correspondence engines to be run on rectified stereo pairs.\\
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#reprojectimageto3d}{reprojectImageTo3D()}} & Convert disparity map to 3D point cloud.\\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#reprojectimageto3d}{reprojectImageTo3D()}} & Convert disparity map to 3D point cloud.\\
 
-\texttt{\href{http://opencv.itseez.com/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#findhomography}{findHomography()}} & Find best-fit perspective transformation between two 2D point sets. \\
+\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#findhomography}{findHomography()}} & Find best-fit perspective transformation between two 2D point sets. \\
 
 \end{tabular}
 
-To calibrate a camera, you can use \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/calibration.cpp}{calibration.cpp}} or
-\texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/stereo\_calib.cpp}{stereo\_calib.cpp}} samples.
+To calibrate a camera, you can use \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/calibration.cpp}{calibration.cpp}} or
+\texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/stereo\_calib.cpp}{stereo\_calib.cpp}} samples.
 To get the disparity maps and the point clouds, use
-\texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/stereo\_match.cpp}{stereo\_match.cpp}} sample.
+\texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/stereo\_match.cpp}{stereo\_match.cpp}} sample.
 
 \section{Object Detection}
 
 \begin{tabular}{@{}p{\the\MyLen}%
                 @{}p{\linewidth-\the\MyLen}@{}}
-                \texttt{\href{http://opencv.itseez.com/modules/imgproc/doc/object_detection.html\#matchtemplate}{matchTemplate}} & Compute proximity map for given template.\\
+                \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/object_detection.html\#matchtemplate}{matchTemplate}} & Compute proximity map for given template.\\
 
-\texttt{\href{http://opencv.itseez.com/modules/objdetect/doc/cascade_classification.html\#cascadeclassifier}{CascadeClassifier}} & Viola's Cascade of Boosted classifiers using Haar or LBP features. Suits for detecting faces, facial features and some other objects without diverse textures. See \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/facedetect.cpp}{facedetect.cpp}}\\
+\texttt{\href{http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html\#cascadeclassifier}{CascadeClassifier}} & Viola's Cascade of Boosted classifiers using Haar or LBP features. Suits for detecting faces, facial features and some other objects without diverse textures. See \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/c/facedetect.cpp}{facedetect.cpp}}\\
 
-\texttt{{HOGDescriptor}} & N. Dalal's object detector using Histogram-of-Oriented-Gradients (HOG) features. Suits for detecting people, cars and other objects with well-defined silhouettes. See \texttt{\href{http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/peopledetect.cpp}{peopledetect.cpp}}\\
+\texttt{{HOGDescriptor}} & N. Dalal's object detector using Histogram-of-Oriented-Gradients (HOG) features. Suits for detecting people, cars and other objects with well-defined silhouettes. See \texttt{\href{http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/peopledetect.cpp}{peopledetect.cpp}}\\
 
 \end{tabular}
 
-%    
+%
 %    feature detection:
 %        features2d toolbox
-%    
+%
 %    object detection:
 %        using a classifier running on a sliding window: cascadeclassifier + hog.
 %        using salient point features: features2d -> matching
-%        
+%
 %    statistical data processing:
 %        clustering (k-means),
 %        classification + regression (SVM, boosting, k-nearest),
diff --git a/doc/tutorials/core/basic_geometric_drawing/basic_geometric_drawing.rst b/doc/tutorials/core/basic_geometric_drawing/basic_geometric_drawing.rst
index 4ff36a125e..3bd3237b70 100644
--- a/doc/tutorials/core/basic_geometric_drawing/basic_geometric_drawing.rst
+++ b/doc/tutorials/core/basic_geometric_drawing/basic_geometric_drawing.rst
@@ -9,7 +9,7 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use  :point:`Point <>`  to define 2D points in an image. 
+   * Use  :point:`Point <>`  to define 2D points in an image.
    * Use  :scalar:`Scalar <>`  and why it is useful
    * Draw a **line** by using the OpenCV function :line:`line <>`
    * Draw an **ellipse** by using the OpenCV function :ellipse:`ellipse <>`
@@ -30,15 +30,15 @@ Point
    It represents a 2D point, specified by its image coordinates :math:`x` and :math:`y`. We can define it as:
 
 .. code-block:: cpp
-   
+
    Point pt;
-   pt.x = 10; 
+   pt.x = 10;
    pt.y = 8;
 
 or
 
 .. code-block:: cpp
-   
+
    Point pt =  Point(10, 8);
 
 Scalar
@@ -48,7 +48,7 @@ Scalar
 * Let's see an example, if we are asked for a color argument and we give:
 
   .. code-block:: cpp
-     
+
      Scalar( a, b, c )
 
   We would be defining a RGB color such as: *Red = c*, *Green = b* and *Blue = a*
@@ -56,12 +56,12 @@ Scalar
 
 Code
 =====
-* This code is in your OpenCV sample folder. Otherwise you can grab it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp>`_
+* This code is in your OpenCV sample folder. Otherwise you can grab it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp>`_
 
 Explanation
 =============
 
-#. Since we plan to draw two examples (an atom and a rook), we have to create 02 images and two windows to display them. 
+#. Since we plan to draw two examples (an atom and a rook), we have to create 02 images and two windows to display them.
 
    .. code-block:: cpp
 
@@ -69,7 +69,7 @@ Explanation
       char atom_window[] = "Drawing 1: Atom";
       char rook_window[] = "Drawing 2: Rook";
 
-      /// Create black empty images 
+      /// Create black empty images
       Mat atom_image = Mat::zeros( w, w, CV_8UC3 );
       Mat rook_image = Mat::zeros( w, w, CV_8UC3 );
 
@@ -79,7 +79,7 @@ Explanation
 
       /// 1. Draw a simple atom:
 
-      /// 1.a. Creating ellipses 
+      /// 1.a. Creating ellipses
       MyEllipse( atom_image, 90 );
       MyEllipse( atom_image, 0 );
       MyEllipse( atom_image, 45 );
@@ -105,7 +105,7 @@ Explanation
 	         -1,
 	         8 );
 
-      /// 2.c. Create a few lines 
+      /// 2.c. Create a few lines
       MyLine( rook_image, Point( 0, 15*w/16 ), Point( w, 15*w/16 ) );
       MyLine( rook_image, Point( w/4, 7*w/8 ), Point( w/4, w ) );
       MyLine( rook_image, Point( w/2, 7*w/8 ), Point( w/2, w ) );
@@ -113,15 +113,15 @@ Explanation
 
 #. Let's check what is inside each of these functions:
 
-   * *MyLine* 
-     
-     .. code-block:: cpp   
+   * *MyLine*
+
+     .. code-block:: cpp
 
         void MyLine( Mat img, Point start, Point end )
 	{
   	  int thickness = 2;
   	  int lineType = 8;
-  	  line( img, 
+  	  line( img,
 	  	start,
 		end,
 		Scalar( 0, 0, 0 ),
@@ -136,12 +136,12 @@ Explanation
         * Draw a line from Point **start** to Point **end**
         * The line is displayed in the image **img**
         * The line color is defined by **Scalar( 0, 0, 0)** which is the RGB value correspondent to **Black**
-        * The line thickness is set to **thickness** (in this case 2) 
+        * The line thickness is set to **thickness** (in this case 2)
         * The line is a 8-connected one (**lineType** = 8)
 
    * *MyEllipse*
 
-     .. code-block:: cpp    
+     .. code-block:: cpp
 
         void MyEllipse( Mat img, double angle )
         {
@@ -152,15 +152,15 @@ Explanation
 	   	   Point( w/2.0, w/2.0 ),
 	   	   Size( w/4.0, w/16.0 ),
 	   	   angle,
-	   	   0, 
+	   	   0,
 	   	   360,
 	   	   Scalar( 255, 0, 0 ),
 	   	   thickness,
-	   	   lineType );  
+	   	   lineType );
         }
 
      From the code above, we can observe that the function :ellipse:`ellipse <>` draws an ellipse such that:
-      
+
      .. container:: enumeratevisibleitemswithsquare
 
         * The ellipse is displayed in the image **img**
@@ -169,7 +169,7 @@ Explanation
         * The ellipse extends an arc between **0** and **360** degrees
         * The color of the figure will be **Scalar( 255, 255, 0)** which means blue in RGB value.
         * The ellipse's **thickness** is 2.
-  
+
 
    * *MyFilledCircle*
 
@@ -180,11 +180,11 @@ Explanation
   	 int thickness = -1;
   	 int lineType = 8;
 
-	 circle( img, 
+	 circle( img,
 	  	 center,
 	  	 w/32.0,
 	  	 Scalar( 0, 0, 255 ),
-	  	 thickness, 
+	  	 thickness,
 	  	 lineType );
 	}
 
@@ -193,9 +193,9 @@ Explanation
      .. container:: enumeratevisibleitemswithsquare
 
         * The image where the circle will be displayed (**img**)
-        * The center of the circle denoted as the Point **center**	
+        * The center of the circle denoted as the Point **center**
         * The radius of the circle: **w/32.0**
-        * The color of the circle: **Scalar(0, 0, 255)** which means *Red* in BGR 
+        * The color of the circle: **Scalar(0, 0, 255)** which means *Red* in BGR
         * Since **thickness** = -1, the circle will be drawn filled.
 
    * *MyPolygon*
@@ -237,18 +237,18 @@ Explanation
 	    	    npt,
             	    1,
 	    	    Scalar( 255, 255, 255 ),
-	    	    lineType );			
+	    	    lineType );
          }
 
      To draw a filled polygon we use the function :fill_poly:`fillPoly <>`. We note that:
-   
+
      .. container:: enumeratevisibleitemswithsquare
 
         * The polygon will be drawn on **img**
         * The vertices of the polygon are the set of points in **ppt**
         * The total number of vertices to be drawn are **npt**
         * The number of polygons to be drawn is only **1**
-        * The color of the polygon is defined by **Scalar( 255, 255, 255)**, which is the BGR value for *white*     
+        * The color of the polygon is defined by **Scalar( 255, 255, 255)**, which is the BGR value for *white*
 
    * *rectangle*
 
@@ -277,4 +277,4 @@ Compiling and running your program should give you a result like this:
 
 .. image:: images/Drawing_1_Tutorial_Result_0.png
    :alt: Drawing Tutorial 1 - Final Result
-   :align: center 
+   :align: center
diff --git a/doc/tutorials/core/random_generator_and_text/random_generator_and_text.rst b/doc/tutorials/core/random_generator_and_text/random_generator_and_text.rst
index 2d7493c22e..38c761fc6e 100644
--- a/doc/tutorials/core/random_generator_and_text/random_generator_and_text.rst
+++ b/doc/tutorials/core/random_generator_and_text/random_generator_and_text.rst
@@ -19,10 +19,10 @@ Code
 .. container:: enumeratevisibleitemswithsquare
 
    * In the previous tutorial (:ref:`Drawing_1`) we drew diverse geometric figures, giving as input parameters such as coordinates (in the form of :point:`Points <>`), color, thickness, etc. You might have noticed that we gave specific values for these arguments.
- 
+
    * In this tutorial, we intend to use *random* values for the drawing parameters. Also, we intend to populate our image with a big number of geometric figures. Since we will be initializing them in a random fashion, this process will be automatic and made by using *loops* .
 
-   * This code is in your OpenCV sample folder. Otherwise you can grab it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/core/Matrix/Drawing_2.cpp>`_ .
+   * This code is in your OpenCV sample folder. Otherwise you can grab it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/core/Matrix/Drawing_2.cpp>`_ .
 
 Explanation
 ============
@@ -43,7 +43,7 @@ Explanation
       Mat image = Mat::zeros( window_height, window_width, CV_8UC3 );
 
       /// Show it in a window during DELAY ms
-      imshow( window_name, image ); 
+      imshow( window_name, image );
 
 #. Then we proceed to draw crazy stuff. After taking a look at the code, you can see that it is mainly divided in 8 sections, defined as functions:
 
@@ -110,22 +110,22 @@ Explanation
 
    * The *for* loop will repeat **NUMBER** times. Since the function :line:`line <>` is inside this loop, that means that **NUMBER** lines will be generated.
    * The line extremes are given by *pt1* and *pt2*. For *pt1* we can see that:
-  
+
      .. code-block:: cpp
-         
-        pt1.x = rng.uniform( x_1, x_2 );    
+
+        pt1.x = rng.uniform( x_1, x_2 );
         pt1.y = rng.uniform( y_1, y_2 );
 
-     * We know that **rng** is a *Random number generator* object. In the code above we are calling **rng.uniform(a,b)**. This generates a radombly uniformed distribution between the values **a** and **b** (inclusive in **a**, exclusive in **b**). 
+     * We know that **rng** is a *Random number generator* object. In the code above we are calling **rng.uniform(a,b)**. This generates a radombly uniformed distribution between the values **a** and **b** (inclusive in **a**, exclusive in **b**).
 
      * From the explanation above, we deduce that the extremes *pt1* and *pt2* will be random values, so the lines positions will be quite impredictable, giving a nice visual effect (check out the Result section below).
 
      * As another observation, we notice that in the :line:`line <>` arguments, for the *color* input we enter:
 
        .. code-block:: cpp
-      
-          randomColor(rng)           
-      
+
+          randomColor(rng)
+
        Let's check the function implementation:
 
        .. code-block:: cpp
@@ -138,7 +138,7 @@ Explanation
 
        As we can see, the return value is an *Scalar* with 3 randomly initialized values, which are used as the *R*, *G* and *B* parameters for the line color. Hence, the color of the lines will be random too!
 
-#. The explanation above applies for the other functions generating circles, ellipses, polygones, etc. The parameters such as *center* and *vertices* are also generated randomly. 
+#. The explanation above applies for the other functions generating circles, ellipses, polygones, etc. The parameters such as *center* and *vertices* are also generated randomly.
 
 #. Before finishing, we also should take a look at the functions *Display_Random_Text* and *Displaying_Big_End*, since they both have a few interesting features:
 
@@ -158,7 +158,7 @@ Explanation
 
           putText( image, "Testing text rendering", org, rng.uniform(0,8),
                    rng.uniform(0,100)*0.05+0.1, randomColor(rng), rng.uniform(1, 10), lineType);
-        
+
           imshow( window_name, image );
           if( waitKey(DELAY) >= 0 )
             { return -1; }
@@ -172,7 +172,7 @@ Explanation
    .. code-block:: cpp
 
       putText( image, "Testing text rendering", org, rng.uniform(0,8),
-               rng.uniform(0,100)*0.05+0.1, randomColor(rng), rng.uniform(1, 10), lineType);   
+               rng.uniform(0,100)*0.05+0.1, randomColor(rng), rng.uniform(1, 10), lineType);
 
 
    So, what does the function :put_text:`putText <>` do? In our example:
@@ -197,7 +197,7 @@ Explanation
         Size textsize = getTextSize("OpenCV forever!", CV_FONT_HERSHEY_COMPLEX, 3, 5, 0);
         Point org((window_width - textsize.width)/2, (window_height - textsize.height)/2);
         int lineType = 8;
-    
+
         Mat image2;
 
         for( int i = 0; i < 255; i += 2 )
@@ -205,7 +205,7 @@ Explanation
           image2 = image - Scalar::all(i);
           putText( image2, "OpenCV forever!", org, CV_FONT_HERSHEY_COMPLEX, 3,
                  Scalar(i, i, 255), 5, lineType );
-        
+
           imshow( window_name, image2 );
           if( waitKey(DELAY) >= 0 )
             { return -1; }
@@ -222,8 +222,8 @@ Explanation
 
    So, **image2** is the substraction of **image** and **Scalar::all(i)**. In fact, what happens here is that every pixel of **image2** will be the result of substracting every pixel of **image** minus the value of **i** (remember that for each pixel we are considering three values such as R, G and B, so each of them will be affected)
 
-  Also remember that the substraction operation *always* performs internally a **saturate** operation, which means that the result obtained will always be inside the allowed range (no negative and between 0 and 255 for our example).   
- 
+  Also remember that the substraction operation *always* performs internally a **saturate** operation, which means that the result obtained will always be inside the allowed range (no negative and between 0 and 255 for our example).
+
 
 Result
 ========
@@ -234,7 +234,7 @@ As you just saw in the Code section, the program will sequentially execute diver
 
    .. image:: images/Drawing_2_Tutorial_Result_0.jpg
       :alt: Drawing Tutorial 2 - Final Result 0
-      :align: center 
+      :align: center
 
 #. Then, a new set of figures, these time *rectangles* will follow.
 
@@ -242,13 +242,13 @@ As you just saw in the Code section, the program will sequentially execute diver
 
    .. image:: images/Drawing_2_Tutorial_Result_2.jpg
       :alt: Drawing Tutorial 2 - Final Result 2
-      :align: center 
+      :align: center
 
 #. Now, *polylines* with 03 segments will appear on screen, again in random configurations.
 
    .. image:: images/Drawing_2_Tutorial_Result_3.jpg
       :alt: Drawing Tutorial 2 - Final Result 3
-      :align: center 
+      :align: center
 
 #. Filled polygons (in this example triangles) will follow.
 
@@ -256,7 +256,7 @@ As you just saw in the Code section, the program will sequentially execute diver
 
    .. image:: images/Drawing_2_Tutorial_Result_5.jpg
       :alt: Drawing Tutorial 2 - Final Result 5
-      :align: center 
+      :align: center
 
 #. Near the end, the text *"Testing Text Rendering"* will appear in a variety of fonts, sizes, colors and positions.
 
@@ -264,4 +264,4 @@ As you just saw in the Code section, the program will sequentially execute diver
 
    .. image:: images/Drawing_2_Tutorial_Result_7.jpg
       :alt: Drawing Tutorial 2 - Final Result 7
-      :align: center 
+      :align: center
diff --git a/doc/tutorials/definitions/tocDefinitions.rst b/doc/tutorials/definitions/tocDefinitions.rst
index f850bb8dc3..6efdc25e8c 100644
--- a/doc/tutorials/definitions/tocDefinitions.rst
+++ b/doc/tutorials/definitions/tocDefinitions.rst
@@ -2,6 +2,7 @@
 .. |Author_BernatG| unicode:: Bern U+00E1 t U+0020 G U+00E1 bor
 .. |Author_AndreyK| unicode:: Andrey U+0020 Kamaev
 .. |Author_LeonidBLB| unicode:: Leonid U+0020 Beynenson
+.. |Author_VsevolodG| unicode:: Vsevolod U+0020 Glumov
 .. |Author_VictorE| unicode:: Victor U+0020 Eruhimov 
 .. |Author_ArtemM| unicode:: Artem U+0020 Myagkov 
 .. |Author_FernandoI| unicode:: Fernando U+0020 Iglesias U+0020 Garc U+00ED a 
diff --git a/doc/tutorials/features2d/feature_description/feature_description.rst b/doc/tutorials/features2d/feature_description/feature_description.rst
index 2d7a9e2025..9ba777500a 100644
--- a/doc/tutorials/features2d/feature_description/feature_description.rst
+++ b/doc/tutorials/features2d/feature_description/feature_description.rst
@@ -15,7 +15,7 @@ In this tutorial you will learn how to:
      * Use :surf_descriptor_extractor:`SurfDescriptorExtractor<>` and its function :descriptor_extractor:`compute<>` to perform the required calculations.
      * Use a :brute_force_matcher:`BruteForceMatcher<>`	to match the features vector
      * Use the function :draw_matches:`drawMatches<>` to draw the detected matches.
-     
+
 
 Theory
 ======
@@ -23,9 +23,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/features2D/SURF_descriptor.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/features2D/SURF_descriptor.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include <stdio.h>
    #include <iostream>
@@ -45,7 +45,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      Mat img_1 = imread( argv[1], CV_LOAD_IMAGE_GRAYSCALE );
      Mat img_2 = imread( argv[2], CV_LOAD_IMAGE_GRAYSCALE );
-  
+
      if( !img_1.data || !img_2.data )
       { return -1; }
 
@@ -74,7 +74,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      //-- Draw matches
      Mat img_matches;
-     drawMatches( img_1, keypoints_1, img_2, keypoints_2, matches, img_matches ); 
+     drawMatches( img_1, keypoints_1, img_2, keypoints_2, matches, img_matches );
 
      //-- Show detected matches
      imshow("Matches", img_matches );
@@ -93,9 +93,9 @@ Explanation
 
 Result
 ======
- 
+
 #. Here is the result after applying the BruteForce matcher between the two original images:
- 
+
    .. image:: images/Feature_Description_BruteForce_Result.jpg
       :align: center
       :height: 200pt
diff --git a/doc/tutorials/features2d/feature_detection/feature_detection.rst b/doc/tutorials/features2d/feature_detection/feature_detection.rst
index 49239d06f2..26798f8f6f 100644
--- a/doc/tutorials/features2d/feature_detection/feature_detection.rst
+++ b/doc/tutorials/features2d/feature_detection/feature_detection.rst
@@ -14,7 +14,7 @@ In this tutorial you will learn how to:
 
      * Use the :surf_feature_detector:`SurfFeatureDetector<>` and its function :feature_detector_detect:`detect<>` to perform the detection process
      * Use the function :draw_keypoints:`drawKeypoints<>` to draw the detected keypoints
-     
+
 
 Theory
 ======
@@ -22,14 +22,14 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/features2D/SURF_detector.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/features2D/SURF_detector.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include <stdio.h>
    #include <iostream>
    #include "opencv2/core/core.hpp"
-   #include "opencv2/features2d/features2d.hpp"	
+   #include "opencv2/features2d/features2d.hpp"
    #include "opencv2/highgui/highgui.hpp"
 
    using namespace cv;
@@ -44,7 +44,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      Mat img_1 = imread( argv[1], CV_LOAD_IMAGE_GRAYSCALE );
      Mat img_2 = imread( argv[2], CV_LOAD_IMAGE_GRAYSCALE );
-  
+
      if( !img_1.data || !img_2.data )
      { std::cout<< " --(!) Error reading images " << std::endl; return -1; }
 
@@ -61,8 +61,8 @@ This tutorial code's is shown lines below. You can also download it from `here <
      //-- Draw keypoints
      Mat img_keypoints_1; Mat img_keypoints_2;
 
-     drawKeypoints( img_1, keypoints_1, img_keypoints_1, Scalar::all(-1), DrawMatchesFlags::DEFAULT ); 
-     drawKeypoints( img_2, keypoints_2, img_keypoints_2, Scalar::all(-1), DrawMatchesFlags::DEFAULT ); 
+     drawKeypoints( img_1, keypoints_1, img_keypoints_1, Scalar::all(-1), DrawMatchesFlags::DEFAULT );
+     drawKeypoints( img_2, keypoints_2, img_keypoints_2, Scalar::all(-1), DrawMatchesFlags::DEFAULT );
 
      //-- Show detected (drawn) keypoints
      imshow("Keypoints 1", img_keypoints_1 );
@@ -82,9 +82,9 @@ Explanation
 
 Result
 ======
- 
+
 #. Here is the result of the feature detection applied to the first image:
- 
+
    .. image:: images/Feature_Detection_Result_a.jpg
       :align: center
       :height: 125pt
@@ -92,6 +92,6 @@ Result
 #. And here is the result for the second image:
 
    .. image:: images/Feature_Detection_Result_b.jpg
-      :align: center  
-      :height: 200pt 
+      :align: center
+      :height: 200pt
 
diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst
index 5eb9d4d281..47eafedbc7 100644
--- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst
+++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst
@@ -19,9 +19,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include <stdio.h>
    #include <iostream>
@@ -41,7 +41,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      Mat img_1 = imread( argv[1], CV_LOAD_IMAGE_GRAYSCALE );
      Mat img_2 = imread( argv[2], CV_LOAD_IMAGE_GRAYSCALE );
-  
+
      if( !img_1.data || !img_2.data )
      { std::cout<< " --(!) Error reading images " << std::endl; return -1; }
 
@@ -79,7 +79,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      printf("-- Max dist : %f \n", max_dist );
      printf("-- Min dist : %f \n", min_dist );
-  
+
      //-- Draw only "good" matches (i.e. whose distance is less than 2*min_dist )
      //-- PS.- radiusMatch can also be used here.
      std::vector< DMatch > good_matches;
@@ -87,13 +87,13 @@ This tutorial code's is shown lines below. You can also download it from `here <
      for( int i = 0; i < descriptors_1.rows; i++ )
      { if( matches[i].distance < 2*min_dist )
        { good_matches.push_back( matches[i]); }
-     }  
+     }
 
      //-- Draw only "good" matches
      Mat img_matches;
-     drawMatches( img_1, keypoints_1, img_2, keypoints_2, 
-                  good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), 
-                  vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); 
+     drawMatches( img_1, keypoints_1, img_2, keypoints_2,
+                  good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
+                  vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
 
      //-- Show detected matches
      imshow( "Good Matches", img_matches );
@@ -115,9 +115,9 @@ Explanation
 
 Result
 ======
- 
+
 #. Here is the result of the feature detection applied to the first image:
- 
+
    .. image:: images/Featur_FlannMatcher_Result.jpg
       :align: center
       :height: 250pt
diff --git a/doc/tutorials/features2d/feature_homography/feature_homography.rst b/doc/tutorials/features2d/feature_homography/feature_homography.rst
index 15c85260a8..ad764ce9b7 100644
--- a/doc/tutorials/features2d/feature_homography/feature_homography.rst
+++ b/doc/tutorials/features2d/feature_homography/feature_homography.rst
@@ -12,7 +12,7 @@ In this tutorial you will learn how to:
 
    * Use the function :find_homography:`findHomography<>` to find the transform between matched keypoints.
    * Use the function :perspective_transform:`perspectiveTransform<>` to map the points.
-     
+
 
 Theory
 ======
@@ -20,9 +20,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/features2D/SURF_Homography.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/features2D/SURF_Homography.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include <stdio.h>
    #include <iostream>
@@ -43,7 +43,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      Mat img_object = imread( argv[1], CV_LOAD_IMAGE_GRAYSCALE );
      Mat img_scene = imread( argv[2], CV_LOAD_IMAGE_GRAYSCALE );
-  
+
      if( !img_object.data || !img_scene.data )
      { std::cout<< " --(!) Error reading images " << std::endl; return -1; }
 
@@ -81,21 +81,21 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      printf("-- Max dist : %f \n", max_dist );
      printf("-- Min dist : %f \n", min_dist );
-  
+
      //-- Draw only "good" matches (i.e. whose distance is less than 3*min_dist )
      std::vector< DMatch > good_matches;
 
      for( int i = 0; i < descriptors_object.rows; i++ )
      { if( matches[i].distance < 3*min_dist )
         { good_matches.push_back( matches[i]); }
-     }  
+     }
 
      Mat img_matches;
-     drawMatches( img_object, keypoints_object, img_scene, keypoints_scene, 
-                  good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), 
-                  vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); 
+     drawMatches( img_object, keypoints_object, img_scene, keypoints_scene,
+                  good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
+                  vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
 
-     //-- Localize the object 
+     //-- Localize the object
      std::vector<Point2f> obj;
      std::vector<Point2f> scene;
 
@@ -103,7 +103,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      {
        //-- Get the keypoints from the good matches
        obj.push_back( keypoints_object[ good_matches[i].queryIdx ].pt );
-       scene.push_back( keypoints_scene[ good_matches[i].trainIdx ].pt ); 
+       scene.push_back( keypoints_scene[ good_matches[i].trainIdx ].pt );
      }
 
      Mat H = findHomography( obj, scene, CV_RANSAC );
@@ -143,6 +143,6 @@ Result
 #. And here is the result for the detected object (highlighted in green)
 
    .. image:: images/Feature_Homography_Result.jpg
-      :align: center  
-      :height: 200pt 
+      :align: center
+      :height: 200pt
 
diff --git a/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst b/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst
index 88d125afaa..1b405e46c0 100644
--- a/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst
+++ b/doc/tutorials/features2d/trackingmotion/corner_subpixeles/corner_subpixeles.rst
@@ -19,9 +19,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -55,7 +55,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      namedWindow( source_window, CV_WINDOW_AUTOSIZE );
 
      /// Create Trackbar to set the number of corners
-     createTrackbar( "Max  corners:", source_window, &maxCorners, maxTrackbar, goodFeaturesToTrack_Demo);  
+     createTrackbar( "Max  corners:", source_window, &maxCorners, maxTrackbar, goodFeaturesToTrack_Demo);
 
      imshow( source_window, src );
 
@@ -72,7 +72,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
    void goodFeaturesToTrack_Demo( int, void* )
    {
      if( maxCorners < 1 ) { maxCorners = 1; }
-  
+
      /// Parameters for Shi-Tomasi algorithm
      vector<Point2f> corners;
      double qualityLevel = 0.01;
@@ -86,7 +86,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      copy = src.clone();
 
      /// Apply corner detection
-     goodFeaturesToTrack( src_gray, 
+     goodFeaturesToTrack( src_gray,
 	  	          corners,
 		          maxCorners,
 		          qualityLevel,
@@ -95,18 +95,18 @@ This tutorial code's is shown lines below. You can also download it from `here <
 		          blockSize,
 		          useHarrisDetector,
 		          k );
-  
+
 
      /// Draw corners detected
      cout<<"** Number of corners detected: "<<corners.size()<<endl;
      int r = 4;
      for( int i = 0; i < corners.size(); i++ )
-        { circle( copy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255), 
+        { circle( copy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255),
                                                     rng.uniform(0,255)), -1, 8, 0 ); }
 
      /// Show what you got
      namedWindow( source_window, CV_WINDOW_AUTOSIZE );
-     imshow( source_window, copy );  
+     imshow( source_window, copy );
 
      /// Set the neeed parameters to find the refined corners
      Size winSize = Size( 5, 5 );
@@ -118,7 +118,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      /// Write them down
      for( int i = 0; i < corners.size(); i++ )
-        { cout<<" -- Refined Corner ["<<i<<"]  ("<<corners[i].x<<","<<corners[i].y<<")"<<endl; }	
+        { cout<<" -- Refined Corner ["<<i<<"]  ("<<corners[i].x<<","<<corners[i].y<<")"<<endl; }
    }
 
 
@@ -129,10 +129,10 @@ Result
 ======
 
 .. image:: images/Corner_Subpixeles_Original_Image.jpg
-              :align: center 
-  
+              :align: center
+
 Here is the result:
 
 .. image:: images/Corner_Subpixeles_Result.jpg
-              :align: center   
+              :align: center
 
diff --git a/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst b/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst
index e44b2fc27c..5dabe60048 100644
--- a/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst
+++ b/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.rst
@@ -11,7 +11,7 @@ In this tutorial you will learn how to:
 .. container:: enumeratevisibleitemswithsquare
 
    * Use the OpenCV function :corner_eigenvals_and_vecs:`cornerEigenValsAndVecs <>` to find the eigenvalues and eigenvectors to determine if a pixel is a corner.
-   * Use the OpenCV function :corner_min_eigenval:`cornerMinEigenVal <>` to find the minimum eigenvalues for corner detection. 
+   * Use the OpenCV function :corner_min_eigenval:`cornerMinEigenVal <>` to find the minimum eigenvalues for corner detection.
    * To implement our own version of the Harris detector as well as the Shi-Tomasi detector, by using the two functions above.
 
 Theory
@@ -20,9 +20,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -34,9 +34,9 @@ This tutorial code's is shown lines below. You can also download it from `here <
    using namespace std;
 
    /// Global variables
-   Mat src, src_gray; 
+   Mat src, src_gray;
    Mat myHarris_dst; Mat myHarris_copy; Mat Mc;
-   Mat myShiTomasi_dst; Mat myShiTomasi_copy; 
+   Mat myShiTomasi_dst; Mat myShiTomasi_copy;
 
    int myShiTomasi_qualityLevel = 50;
    int myHarris_qualityLevel = 50;
@@ -70,7 +70,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      cornerEigenValsAndVecs( src_gray, myHarris_dst, blockSize, apertureSize, BORDER_DEFAULT );
 
-     /* calculate Mc */ 
+     /* calculate Mc */
      for( int j = 0; j < src_gray.rows; j++ )
         { for( int i = 0; i < src_gray.cols; i++ )
              {
@@ -81,25 +81,25 @@ This tutorial code's is shown lines below. You can also download it from `here <
         }
 
      minMaxLoc( Mc, &myHarris_minVal, &myHarris_maxVal, 0, 0, Mat() );
-  
+
      /* Create Window and Trackbar */
      namedWindow( myHarris_window, CV_WINDOW_AUTOSIZE );
-     createTrackbar( " Quality Level:", myHarris_window, &myHarris_qualityLevel, max_qualityLevel, 
-                       myHarris_function );  
+     createTrackbar( " Quality Level:", myHarris_window, &myHarris_qualityLevel, max_qualityLevel,
+                       myHarris_function );
      myHarris_function( 0, 0 );
 
      /// My Shi-Tomasi -- Using cornerMinEigenVal
-     myShiTomasi_dst = Mat::zeros( src_gray.size(), CV_32FC1 );  
+     myShiTomasi_dst = Mat::zeros( src_gray.size(), CV_32FC1 );
      cornerMinEigenVal( src_gray, myShiTomasi_dst, blockSize, apertureSize, BORDER_DEFAULT );
 
      minMaxLoc( myShiTomasi_dst, &myShiTomasi_minVal, &myShiTomasi_maxVal, 0, 0, Mat() );
 
      /* Create Window and Trackbar */
-     namedWindow( myShiTomasi_window, CV_WINDOW_AUTOSIZE );   
-     createTrackbar( " Quality Level:", myShiTomasi_window, &myShiTomasi_qualityLevel, max_qualityLevel, 
-                        myShiTomasi_function );  
+     namedWindow( myShiTomasi_window, CV_WINDOW_AUTOSIZE );
+     createTrackbar( " Quality Level:", myShiTomasi_window, &myShiTomasi_qualityLevel, max_qualityLevel,
+                        myShiTomasi_function );
      myShiTomasi_function( 0, 0 );
-  
+
      waitKey(0);
      return(0);
    }
@@ -114,9 +114,9 @@ This tutorial code's is shown lines below. You can also download it from `here <
      for( int j = 0; j < src_gray.rows; j++ )
         { for( int i = 0; i < src_gray.cols; i++ )
              {
-               if( myShiTomasi_dst.at<float>(j,i) > myShiTomasi_minVal + ( myShiTomasi_maxVal - 
+               if( myShiTomasi_dst.at<float>(j,i) > myShiTomasi_minVal + ( myShiTomasi_maxVal -
                         myShiTomasi_minVal )*myShiTomasi_qualityLevel/max_qualityLevel )
-                 { circle( myShiTomasi_copy, Point(i,j), 4, Scalar( rng.uniform(0,255), 
+                 { circle( myShiTomasi_copy, Point(i,j), 4, Scalar( rng.uniform(0,255),
                             rng.uniform(0,255), rng.uniform(0,255) ), -1, 8, 0 ); }
              }
         }
@@ -135,9 +135,9 @@ This tutorial code's is shown lines below. You can also download it from `here <
              {
                if( Mc.at<float>(j,i) > myHarris_minVal + ( myHarris_maxVal - myHarris_minVal )
                                                             *myHarris_qualityLevel/max_qualityLevel )
-                 { circle( myHarris_copy, Point(i,j), 4, Scalar( rng.uniform(0,255), rng.uniform(0,255), 
+                 { circle( myHarris_copy, Point(i,j), 4, Scalar( rng.uniform(0,255), rng.uniform(0,255),
                            rng.uniform(0,255) ), -1, 8, 0 ); }
-             } 
+             }
         }
      imshow( myHarris_window, myHarris_copy );
    }
@@ -151,9 +151,9 @@ Result
 ======
 
 .. image:: images/My_Harris_corner_detector_Result.jpg
-              :align: center   
+              :align: center
 
 
 .. image:: images/My_Shi_Tomasi_corner_detector_Result.jpg
-              :align: center   
+              :align: center
 
diff --git a/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst b/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst
index 582a82b091..e69937eaa3 100644
--- a/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst
+++ b/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.rst
@@ -18,9 +18,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -56,7 +56,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      namedWindow( source_window, CV_WINDOW_AUTOSIZE );
 
      /// Create Trackbar to set the number of corners
-     createTrackbar( "Max  corners:", source_window, &maxCorners, maxTrackbar, goodFeaturesToTrack_Demo );  
+     createTrackbar( "Max  corners:", source_window, &maxCorners, maxTrackbar, goodFeaturesToTrack_Demo );
 
      imshow( source_window, src );
 
@@ -70,10 +70,10 @@ This tutorial code's is shown lines below. You can also download it from `here <
     * @function goodFeaturesToTrack_Demo.cpp
     * @brief Apply Shi-Tomasi corner detector
     */
-   void goodFeaturesToTrack_Demo( int, void* ) 
+   void goodFeaturesToTrack_Demo( int, void* )
    {
      if( maxCorners < 1 ) { maxCorners = 1; }
-  
+
      /// Parameters for Shi-Tomasi algorithm
      vector<Point2f> corners;
      double qualityLevel = 0.01;
@@ -87,7 +87,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      copy = src.clone();
 
      /// Apply corner detection
-     goodFeaturesToTrack( src_gray, 
+     goodFeaturesToTrack( src_gray,
                   corners,
                   maxCorners,
                   qualityLevel,
@@ -96,18 +96,18 @@ This tutorial code's is shown lines below. You can also download it from `here <
                   blockSize,
                   useHarrisDetector,
                   k );
-  
+
 
      /// Draw corners detected
      cout<<"** Number of corners detected: "<<corners.size()<<endl;
      int r = 4;
      for( int i = 0; i < corners.size(); i++ )
-        { circle( copy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255), 
+        { circle( copy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255),
                  rng.uniform(0,255)), -1, 8, 0 ); }
 
      /// Show what you got
      namedWindow( source_window, CV_WINDOW_AUTOSIZE );
-     imshow( source_window, copy );  
+     imshow( source_window, copy );
    }
 
 Explanation
@@ -117,6 +117,6 @@ Result
 ======
 
 .. image:: images/Feature_Detection_Result_a.jpg
-              :align: center   
+              :align: center
 
 
diff --git a/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst b/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst
index a0e8c436fa..cb9650775f 100644
--- a/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst
+++ b/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.rst
@@ -10,7 +10,7 @@ In this tutorial you will learn:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * What features are and why they are important    
+   * What features are and why they are important
    * Use the function :corner_harris:`cornerHarris <>` to detect corners using the Harris-Stephens method.
 
 Theory
@@ -56,7 +56,7 @@ How does it work?
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Let's look for corners. Since corners represents a variation in the gradient in the image, we will look for this "variation". 
+   * Let's look for corners. Since corners represents a variation in the gradient in the image, we will look for this "variation".
 
    * Consider a grayscale image :math:`I`. We are going to sweep a window :math:`w(x,y)` (with displacements :math:`u` in the x direction and :math:`v` in the right direction) :math:`I` and will calculate the variation of intensity.
 
@@ -66,10 +66,10 @@ How does it work?
 
      where:
 
-     * :math:`w(x,y)` is the window at position :math:`(x,y)`    
+     * :math:`w(x,y)` is the window at position :math:`(x,y)`
      * :math:`I(x,y)` is the intensity at :math:`(x,y)`
      * :math:`I(x+u,y+v)` is the intensity at the moved window :math:`(x+u,y+v)`
-	
+
    * Since we are looking for windows with corners, we are looking for windows with a large variation in intensity. Hence, we have to maximize the equation above, specifically the term:
 
      .. math::
@@ -89,36 +89,36 @@ How does it work?
      .. math::
 
         E(u,v) \approx \sum _{x,y} u^{2}I_{x}^{2} + 2uvI_{x}I_{y} + v^{2}I_{y}^{2}
-  
+
    * Which can be expressed in a matrix form as:
 
      .. math::
 
         E(u,v) \approx \begin{bmatrix}
-                        u & v 
+                        u & v
                        \end{bmatrix}
                        \left (
 		       \displaystyle \sum_{x,y}
                        w(x,y)
                        \begin{bmatrix}
                         I_x^{2} & I_{x}I_{y} \\
-                        I_xI_{y} & I_{y}^{2} 
+                        I_xI_{y} & I_{y}^{2}
 		       \end{bmatrix}
-		       \right )	
+		       \right )
 		       \begin{bmatrix}
                         u \\
-			v 
-                       \end{bmatrix}		
+			v
+                       \end{bmatrix}
 
    * Let's denote:
 
      .. math::
 
         M = \displaystyle \sum_{x,y}
-			      w(x,y) 
+			      w(x,y)
 			      \begin{bmatrix}
                         	I_x^{2} & I_{x}I_{y} \\
-                        	I_xI_{y} & I_{y}^{2} 
+                        	I_xI_{y} & I_{y}^{2}
 		       	       \end{bmatrix}
 
    * So, our equation now is:
@@ -126,34 +126,34 @@ How does it work?
      .. math::
 
         E(u,v) \approx \begin{bmatrix}
-                        u & v 
+                        u & v
                        \end{bmatrix}
 		       M
 		       \begin{bmatrix}
                         u \\
-			v 
-                       \end{bmatrix}		
+			v
+                       \end{bmatrix}
+
 
-  
    * A score is calculated for each window, to determine if it can possibly contain a corner:
 
      .. math::
 
-        R = det(M) - k(trace(M))^{2} 
-	
+        R = det(M) - k(trace(M))^{2}
+
      where:
-  
+
      * det(M) = :math:`\lambda_{1}\lambda_{2}`
      * trace(M) = :math:`\lambda_{1}+\lambda_{2}`
 
      a window with a score :math:`R` greater than a certain value is considered a "corner"
-     
+
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/TrackingMotion/cornerHarris_Demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/TrackingMotion/cornerHarris_Demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -161,7 +161,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
    #include <stdio.h>
    #include <stdlib.h>
 
-   using namespace cv; 
+   using namespace cv;
    using namespace std;
 
    /// Global variables
@@ -186,7 +186,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      namedWindow( source_window, CV_WINDOW_AUTOSIZE );
      createTrackbar( "Threshold: ", source_window, &thresh, max_thresh, cornerHarris_demo );
      imshow( source_window, src );
-  
+
      cornerHarris_demo( 0, 0 );
 
      waitKey(0);
@@ -204,25 +204,25 @@ This tutorial code's is shown lines below. You can also download it from `here <
      int blockSize = 2;
      int apertureSize = 3;
      double k = 0.04;
- 
+
      /// Detecting corners
      cornerHarris( src_gray, dst, blockSize, apertureSize, k, BORDER_DEFAULT );
 
      /// Normalizing
      normalize( dst, dst_norm, 0, 255, NORM_MINMAX, CV_32FC1, Mat() );
-     convertScaleAbs( dst_norm, dst_norm_scaled ); 
+     convertScaleAbs( dst_norm, dst_norm_scaled );
 
      /// Drawing a circle around corners
      for( int j = 0; j < dst_norm.rows ; j++ )
         { for( int i = 0; i < dst_norm.cols; i++ )
              {
                if( (int) dst_norm.at<float>(j,i) > thresh )
-                 { 
-                  circle( dst_norm_scaled, Point( i, j ), 5,  Scalar(0), 2, 8, 0 ); 
+                 {
+                  circle( dst_norm_scaled, Point( i, j ), 5,  Scalar(0), 2, 8, 0 );
                  }
-             } 
-        }    
-     /// Showing the result 
+             }
+        }
+     /// Showing the result
      namedWindow( corners_window, CV_WINDOW_AUTOSIZE );
      imshow( corners_window, dst_norm_scaled );
    }
@@ -237,11 +237,11 @@ Result
 The original image:
 
 .. image:: images/Harris_Detector_Original_Image.jpg
-              :align: center   
+              :align: center
 
 The detected corners are surrounded by a small black circle
 
 .. image:: images/Harris_Detector_Result.jpg
-              :align: center   
+              :align: center
 
 
diff --git a/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.rst b/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.rst
index f3e75764ba..9bd460d159 100644
--- a/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.rst
+++ b/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.rst
@@ -22,7 +22,7 @@ Cool Theory
 Morphological Operations
 --------------------------
 
-* In short: A set of operations that process images based on shapes. Morphological operations apply a *structuring element* to an input image and generate an output image. 
+* In short: A set of operations that process images based on shapes. Morphological operations apply a *structuring element* to an input image and generate an output image.
 
 * The most basic morphological operations are two: Erosion and Dilation. They have a wide array of uses, i.e. :
 
@@ -36,7 +36,7 @@ Morphological Operations
 
   .. image:: images/Morphology_1_Tutorial_Theory_Original_Image.png
      :alt: Original image
-     :align: center 
+     :align: center
 
 Dilation
 ^^^^^^^^^
@@ -49,7 +49,7 @@ Dilation
 
   .. image:: images/Morphology_1_Tutorial_Theory_Dilation.png
      :alt: Dilation result - Theory example
-     :align: center 
+     :align: center
 
 The background (bright) dilates around the black regions of the letter.
 
@@ -58,21 +58,21 @@ Erosion
 
 * This operation is the sister of dilation. What this does is to compute a local minimum over the area of the kernel.
 
-* As the kernel :math:`B` is scanned over the image, we compute the minimal pixel value overlapped by :math:`B` and replace the image pixel under the anchor point with that minimal value. 
+* As the kernel :math:`B` is scanned over the image, we compute the minimal pixel value overlapped by :math:`B` and replace the image pixel under the anchor point with that minimal value.
 
 * Analagously to the example for dilation, we can apply the erosion operator to the original image (shown above). You can see in the result below that the bright areas of the image (the background, apparently), get thinner, whereas the dark zones (the "writing"( gets bigger.
 
   .. image:: images/Morphology_1_Tutorial_Theory_Erosion.png
      :alt: Erosion result - Theory example
-     :align: center 
+     :align: center
 
 
 Code
 ======
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -104,29 +104,29 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      if( !src.data )
      { return -1; }
-    
+
      /// Create windows
      namedWindow( "Erosion Demo", CV_WINDOW_AUTOSIZE );
      namedWindow( "Dilation Demo", CV_WINDOW_AUTOSIZE );
      cvMoveWindow( "Dilation Demo", src.cols, 0 );
 
      /// Create Erosion Trackbar
-     createTrackbar( "Element:\n 0: Rect \n 1: Cross \n 2: Ellipse", "Erosion Demo", 
-     		     &erosion_elem, max_elem, 
+     createTrackbar( "Element:\n 0: Rect \n 1: Cross \n 2: Ellipse", "Erosion Demo",
+     		     &erosion_elem, max_elem,
 		     Erosion );
 
-     createTrackbar( "Kernel size:\n 2n +1", "Erosion Demo", 
+     createTrackbar( "Kernel size:\n 2n +1", "Erosion Demo",
 		     &erosion_size, max_kernel_size,
 		     Erosion );
 
      /// Create Dilation Trackbar
-     createTrackbar( "Element:\n 0: Rect \n 1: Cross \n 2: Ellipse", "Dilation Demo", 
-		     &dilation_elem, max_elem, 
+     createTrackbar( "Element:\n 0: Rect \n 1: Cross \n 2: Ellipse", "Dilation Demo",
+		     &dilation_elem, max_elem,
 		     Dilation );
 
-     createTrackbar( "Kernel size:\n 2n +1", "Dilation Demo", 
+     createTrackbar( "Kernel size:\n 2n +1", "Dilation Demo",
 		     &dilation_size, max_kernel_size,
-		     Dilation ); 
+		     Dilation );
 
      /// Default start
      Erosion( 0, 0 );
@@ -144,13 +144,13 @@ This tutorial code's is shown lines below. You can also download it from `here <
      else if( erosion_elem == 1 ){ erosion_type = MORPH_CROSS; }
      else if( erosion_elem == 2) { erosion_type = MORPH_ELLIPSE; }
 
-     Mat element = getStructuringElement( erosion_type, 
+     Mat element = getStructuringElement( erosion_type,
      	  				  Size( 2*erosion_size + 1, 2*erosion_size+1 ),
-				       	  Point( erosion_size, erosion_size ) ); 
+				       	  Point( erosion_size, erosion_size ) );
 
      /// Apply the erosion operation
      erode( src, erosion_dst, element );
-     imshow( "Erosion Demo", erosion_dst );  
+     imshow( "Erosion Demo", erosion_dst );
    }
 
    /** @function Dilation */
@@ -161,12 +161,12 @@ This tutorial code's is shown lines below. You can also download it from `here <
      else if( dilation_elem == 1 ){ dilation_type = MORPH_CROSS; }
      else if( dilation_elem == 2) { dilation_type = MORPH_ELLIPSE; }
 
-     Mat element = getStructuringElement( dilation_type, 
+     Mat element = getStructuringElement( dilation_type,
 				       	  Size( 2*dilation_size + 1, 2*dilation_size+1 ),
-				       	  Point( dilation_size, dilation_size ) ); 
+				       	  Point( dilation_size, dilation_size ) );
      /// Apply the dilation operation
      dilate( src, dilation_dst, element );
-     imshow( "Dilation Demo", dilation_dst );  
+     imshow( "Dilation Demo", dilation_dst );
    }
 
 
@@ -182,12 +182,12 @@ Explanation
       * Create a set of 02 Trackbars for each operation:
 
         * The first trackbar "Element" returns either **erosion_elem** or **dilation_elem**
-        * The second trackbar "Kernel size" return **erosion_size** or **dilation_size** for the corresponding operation. 
+        * The second trackbar "Kernel size" return **erosion_size** or **dilation_size** for the corresponding operation.
 
       * Every time we move any slider, the user's function **Erosion** or **Dilation** will be called and it will update the output image based on the current trackbar values.
-  
+
    Let's analyze these two functions:
- 
+
 #. **erosion:**
 
    .. code-block:: cpp
@@ -200,32 +200,32 @@ Explanation
         else if( erosion_elem == 1 ){ erosion_type = MORPH_CROSS; }
         else if( erosion_elem == 2) { erosion_type = MORPH_ELLIPSE; }
 
-        Mat element = getStructuringElement( erosion_type, 
+        Mat element = getStructuringElement( erosion_type,
      	    				     Size( 2*erosion_size + 1, 2*erosion_size+1 ),
-				       	     Point( erosion_size, erosion_size ) ); 
+				       	     Point( erosion_size, erosion_size ) );
         /// Apply the erosion operation
         erode( src, erosion_dst, element );
-        imshow( "Erosion Demo", erosion_dst );  
+        imshow( "Erosion Demo", erosion_dst );
       }
 
    * The function that performs the *erosion* operation is :erode:`erode <>`. As we can see, it receives three arguments:
- 
+
      * *src*: The source image
      * *erosion_dst*: The output image
      * *element*: This is the kernel we will use to perform the operation. If we do not specify, the default is a simple :math:`3x3` matrix. Otherwise, we can specify its shape. For this, we need to use the function :get_structuring_element:`getStructuringElement <>`:
 
         .. code-block:: cpp
 
- 	   Mat element = getStructuringElement( erosion_type, 
+ 	   Mat element = getStructuringElement( erosion_type,
      	    				        Size( 2*erosion_size + 1, 2*erosion_size+1 ),
-				       	        Point( erosion_size, erosion_size ) );   
-						
+				       	        Point( erosion_size, erosion_size ) );
+
        We can choose any of three shapes for our kernel:
 
        .. container:: enumeratevisibleitemswithsquare
 
 	  + Rectangular box: MORPH_RECT
- 	  + Cross:  MORPH_CROSS 
+ 	  + Cross:  MORPH_CROSS
 	  + Ellipse: MORPH_ELLIPSE
 
        Then, we just have to specify the size of our kernel and the *anchor point*. If not specified, it is assumed to be in the center.
@@ -233,8 +233,8 @@ Explanation
    * That is all. We are ready to perform the erosion of our image.
 
    .. note::
-      Additionally, there is another parameter that allows you to perform multiple erosions (iterations) at once. We are not using it in this simple tutorial, though. You can check out the Reference for more details. 
-						
+      Additionally, there is another parameter that allows you to perform multiple erosions (iterations) at once. We are not using it in this simple tutorial, though. You can check out the Reference for more details.
+
 
 #. **dilation:**
 
@@ -250,12 +250,12 @@ The code is below. As you can see, it is completely similar to the snippet of co
      else if( dilation_elem == 1 ){ dilation_type = MORPH_CROSS; }
      else if( dilation_elem == 2) { dilation_type = MORPH_ELLIPSE; }
 
-     Mat element = getStructuringElement( dilation_type, 
+     Mat element = getStructuringElement( dilation_type,
        	       	                          Size( 2*dilation_size + 1, 2*dilation_size+1 ),
-				       	  Point( dilation_size, dilation_size ) ); 
+				       	  Point( dilation_size, dilation_size ) );
      /// Apply the dilation operation
      dilate( src, dilation_dst, element );
-     imshow( "Dilation Demo", dilation_dst );  
+     imshow( "Dilation Demo", dilation_dst );
    }
 
 
@@ -267,10 +267,10 @@ Results
 
   .. image:: images/Morphology_1_Tutorial_Original_Image.jpg
      :alt: Original image
-     :align: center 
+     :align: center
 
   We get the results below. Varying the indices in the Trackbars give different output images, naturally. Try them out! You can even try to add a third Trackbar to control the number of iterations.
 
   .. image:: images/Morphology_1_Tutorial_Cover.jpg
      :alt: Dilation and Erosion application
-     :align: center 
+     :align: center
diff --git a/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.rst b/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.rst
index 1e2c0ded01..30dfdf8e1b 100644
--- a/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.rst
+++ b/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.rst
@@ -19,11 +19,11 @@ Theory
 ======
 
 .. note::
-   The explanation below belongs to the book `Computer Vision: Algorithms and Applications <http://szeliski.org/Book/>`_  by Richard Szeliski and to *LearningOpenCV* 
+   The explanation below belongs to the book `Computer Vision: Algorithms and Applications <http://szeliski.org/Book/>`_  by Richard Szeliski and to *LearningOpenCV*
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * *Smoothing*, also called *blurring*, is a simple and frequently used image processing operation. 
+   * *Smoothing*, also called *blurring*, is a simple and frequently used image processing operation.
 
    * There are many reasons for smoothing. In this tutorial we will focus on smoothing in order to reduce noise (other uses will be seen in the following tutorials).
 
@@ -33,7 +33,7 @@ Theory
         g(i,j) = \sum_{k,l} f(i+k, j+l) h(k,l)
 
      :math:`h(k,l)` is called the *kernel*, which is nothing more than the coefficients of the filter.
-    
+
 
      It helps to visualize a *filter* as a window of coefficients sliding across the image.
 
@@ -44,19 +44,19 @@ Normalized Box Filter
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * This filter is the simplest of all! Each output pixel is the *mean* of its kernel neighbors ( all of them contribute with equal weights) 
+   * This filter is the simplest of all! Each output pixel is the *mean* of its kernel neighbors ( all of them contribute with equal weights)
 
    * The kernel is below:
 
      .. math::
-   
+
         K = \dfrac{1}{K_{width} \cdot K_{height}} \begin{bmatrix}
             1 & 1 & 1 & ... & 1 \\
             1 & 1 & 1 & ... & 1 \\
             . & . & . & ... & 1 \\
             . & . & . & ... & 1 \\
             1 & 1 & 1 & ... & 1
-           \end{bmatrix} 
+           \end{bmatrix}
 
 
 Gaussian Filter
@@ -69,16 +69,16 @@ Gaussian Filter
    * Just to make the picture clearer, remember how a 1D Gaussian kernel look like?
 
      .. image:: images/Smoothing_Tutorial_theory_gaussian_0.jpg
-              :align: center 
+              :align: center
 
-     Assuming that an image is 1D, you can notice that the pixel located in the middle would have the biggest weight. The weight of its neighbors decreases as the spatial distance between them and the center pixel increases. 
+     Assuming that an image is 1D, you can notice that the pixel located in the middle would have the biggest weight. The weight of its neighbors decreases as the spatial distance between them and the center pixel increases.
 
 .. note::
 
    Remember that a 2D Gaussian can be represented as :
-   
+
    .. math::
-     
+
       G_{0}(x, y) = A  e^{ \dfrac{ -(x - \mu_{x})^{2} }{ 2\sigma^{2}_{x} } +  \dfrac{ -(y - \mu_{y})^{2} }{ 2\sigma^{2}_{y} } }
 
    where :math:`\mu` is the mean (the peak) and :math:`\sigma` represents the variance (per each of the variables :math:`x` and :math:`y`)
@@ -97,9 +97,9 @@ Bilateral Filter
 
    * So far, we have explained some filters which main goal is to *smooth* an input image. However, sometimes the filters do not only dissolve the noise, but also smooth away the *edges*. To avoid this (at certain extent at least), we can use a bilateral filter.
 
-   * In an analogous way as the Gaussian filter,  the bilateral filter also considers the neighboring pixels with  weights assigned to each of them. These weights have two components, the first of which is the same weighting used by the Gaussian filter. The second component takes into account the difference in intensity between the neighboring pixels and the evaluated one. 
+   * In an analogous way as the Gaussian filter,  the bilateral filter also considers the neighboring pixels with  weights assigned to each of them. These weights have two components, the first of which is the same weighting used by the Gaussian filter. The second component takes into account the difference in intensity between the neighboring pixels and the evaluated one.
 
-   * For a more detailed explanation you can check `this link <http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/MANDUCHI1/Bilateral_Filtering.html>`_  
+   * For a more detailed explanation you can check `this link <http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/MANDUCHI1/Bilateral_Filtering.html>`_
 
 
 Code
@@ -108,14 +108,14 @@ Code
 .. container:: enumeratevisibleitemswithsquare
 
    * **What does this program do?**
- 
+
      .. container:: enumeratevisibleitemswithsquare
 
         * Loads an image
         * Applies 4 different kinds of filters (explained in Theory) and show the filtered images sequentially
 
    * **Downloadable code**:
-     Click `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp>`_
+     Click `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp>`_
 
    * **Code at glance:**
 
@@ -140,29 +140,29 @@ Code
    int display_caption( char* caption );
    int display_dst( int delay );
 
-   /** 
-    * function main 
+   /**
+    * function main
     */
-    int main( int argc, char** argv )	
+    int main( int argc, char** argv )
     {
       namedWindow( window_name, CV_WINDOW_AUTOSIZE );
 
       /// Load the source image
-      src = imread( "../images/lena.jpg", 1 ); 
+      src = imread( "../images/lena.jpg", 1 );
 
       if( display_caption( "Original Image" ) != 0 ) { return 0; }
 
       dst = src.clone();
       if( display_dst( DELAY_CAPTION ) != 0 ) { return 0; }
 
-      /// Applying Homogeneous blur 
+      /// Applying Homogeneous blur
       if( display_caption( "Homogeneous Blur" ) != 0 ) { return 0; }
 
       for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
       	  { blur( src, dst, Size( i, i ), Point(-1,-1) );
             if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
 
-       /// Applying Gaussian blur 
+       /// Applying Gaussian blur
        if( display_caption( "Gaussian Blur" ) != 0 ) { return 0; }
 
        for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
@@ -193,8 +193,8 @@ Code
     int display_caption( char* caption )
     {
       dst = Mat::zeros( src.size(), src.type() );
-      putText( dst, caption, 
-               Point( src.cols/4, src.rows/2), 
+      putText( dst, caption,
+               Point( src.cols/4, src.rows/2),
                CV_FONT_HERSHEY_COMPLEX, 1, Scalar(255, 255, 255) );
 
       imshow( window_name, dst );
@@ -208,7 +208,7 @@ Code
        imshow( window_name, dst );
        int c = waitKey ( delay );
        if( c >= 0 ) { return -1; }
-       return 0;   
+       return 0;
      }
 
 
@@ -216,7 +216,7 @@ Code
 Explanation
 =============
 
-#. Let's check the OpenCV functions that involve only the smoothing procedure, since the rest is already known by now. 
+#. Let's check the OpenCV functions that involve only the smoothing procedure, since the rest is already known by now.
 
 #. **Normalized Block Filter:**
 
@@ -237,10 +237,10 @@ Explanation
 
       + *dst*: Destination image
 
-      + *Size( w,h )*: Defines the size of the kernel to be used ( of width *w* pixels and height *h* pixels) 
+      + *Size( w,h )*: Defines the size of the kernel to be used ( of width *w* pixels and height *h* pixels)
+
+      + *Point(-1, -1)*: Indicates where the anchor point (the pixel evaluated) is located with respect to the neighborhood. If there is a negative value, then the center of the kernel is considered the anchor point.
 
-      + *Point(-1, -1)*: Indicates where the anchor point (the pixel evaluated) is located with respect to the neighborhood. If there is a negative value, then the center of the kernel is considered the anchor point.  
-    
 #. **Gaussian Filter:**
 
    It is performed by the function :gaussian_blur:`GaussianBlur <>` :
@@ -262,9 +262,9 @@ Explanation
       + *Size(w, h)*: The size of the kernel to be used (the neighbors to be considered). :math:`w` and :math:`h` have to be odd and positive numbers otherwise thi size will be calculated using the :math:`\sigma_{x}` and :math:`\sigma_{y}` arguments.
 
       + :math:`\sigma_{x}`: The standard deviation in x. Writing :math:`0` implies that :math:`\sigma_{x}` is calculated using kernel size.
-   
+
       + :math:`\sigma_{y}`: The standard deviation in y. Writing :math:`0` implies that :math:`\sigma_{y}` is calculated using kernel size.
-    
+
 
 #. **Median Filter:**
 
@@ -283,12 +283,12 @@ Explanation
       + *src*: Source image
 
       + *dst*: Destination image, must be the same type as *src*
-   
-      + *i*: Size of the kernel (only one because we use a square window). Must be odd. 
+
+      + *i*: Size of the kernel (only one because we use a square window). Must be odd.
 
 
 #. **Bilateral Filter**
-   
+
    Provided by OpenCV function :bilateral_filter:`bilateralFilter <>`
 
    .. code-block:: cpp
@@ -296,7 +296,7 @@ Explanation
       for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
           { bilateralFilter ( src, dst, i, i*2, i/2 );
             if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
- 
+
    We use 5 arguments:
 
    .. container:: enumeratevisibleitemswithsquare
@@ -306,9 +306,9 @@ Explanation
       + *dst*: Destination image
 
       + *d*: The diameter of each pixel neighborhood.
-  
+
       + :math:`\sigma_{Color}`: Standard deviation in the color space.
-  
+
       + :math:`\sigma_{Space}`: Standard deviation in the coordinate space (in pixel terms)
 
 
@@ -317,10 +317,10 @@ Results
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * The code opens an image (in this case *lena.jpg*) and display it under the effects of the 4 filters explained. 
+   * The code opens an image (in this case *lena.jpg*) and display it under the effects of the 4 filters explained.
 
    * Here is a snapshot of the image smoothed using *medianBlur*:
 
      .. image:: images/Smoothing_Tutorial_Result_Median_Filter.jpg
               :alt: Smoothing with a median filter
-              :align: center 
+              :align: center
diff --git a/doc/tutorials/imgproc/histograms/back_projection/back_projection.rst b/doc/tutorials/imgproc/histograms/back_projection/back_projection.rst
index a9bcc9884d..f8b134322a 100644
--- a/doc/tutorials/imgproc/histograms/back_projection/back_projection.rst
+++ b/doc/tutorials/imgproc/histograms/back_projection/back_projection.rst
@@ -14,7 +14,7 @@ In this tutorial you will learn:
    * What is Back Projection and why it is useful
 
    * How to use the OpenCV function :calc_back_project:`calcBackProject <>` to calculate Back Projection
-  
+
    * How to mix different channels of an image by using the OpenCV function :mix_channels:`mixChannels <>`
 
 
@@ -27,8 +27,8 @@ What is Back Projection?
 .. container:: enumeratevisibleitemswithsquare
 
    * Back Projection is a way of recording how well the pixels of a given image fit the distribution of pixels in a histogram model.
-   
-   * To make it simpler: For Back Projection, you calculate the histogram model of a feature and then use it to find this feature in an image. 
+
+   * To make it simpler: For Back Projection, you calculate the histogram model of a feature and then use it to find this feature in an image.
 
    * Application example: If you have a histogram of flesh color (say, a Hue-Saturation histogram ), then you can use it to find flesh color areas in an image:
 
@@ -42,9 +42,9 @@ How does it work?
 
    * Let's say you have gotten a skin histogram (Hue-Saturation) based on the image below. The histogram besides is going to be our *model histogram* (which we know represents a sample of skin tonality). You applied some mask to capture only the histogram of the skin area:
 
-     ======  ======  
-      |T0|    |T1|   
-     ======  ======   
+     ======  ======
+      |T0|    |T1|
+     ======  ======
 
      .. |T0| image:: images/Back_Projection_Theory0.jpg
                    :align: middle
@@ -55,9 +55,9 @@ How does it work?
 
    * Now, let's imagine that you get another hand image (Test Image) like the one below: (with its respective histogram):
 
-     ======  ======  
-      |T2|    |T3|   
-     ======  ======   
+     ======  ======
+      |T2|    |T3|
+     ======  ======
 
      .. |T2| image:: images/Back_Projection_Theory2.jpg
                    :align: middle
@@ -70,7 +70,7 @@ How does it work?
 
      a. In each pixel of our Test Image (i.e. :math:`p(i,j)` ), collect the data and find the correspondent bin location for that pixel (i.e. :math:`( h_{i,j}, s_{i,j} )` ).
 
-     b. Lookup the *model histogram* in the correspondent bin - :math:`( h_{i,j}, s_{i,j} )` - and read the bin value. 
+     b. Lookup the *model histogram* in the correspondent bin - :math:`( h_{i,j}, s_{i,j} )` - and read the bin value.
 
      c. Store this bin value in a new image (*BackProjection*). Also, you may consider to normalize the *model histogram* first, so the output for the Test Image can be visible for you.
 
@@ -88,7 +88,7 @@ Code
 .. container:: enumeratevisibleitemswithsquare
 
    * **What does this program do?**
- 
+
      .. container:: enumeratevisibleitemswithsquare
 
         * Loads an image
@@ -99,9 +99,9 @@ Code
 
    * **Downloadable code**:
 
-      a. Click `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp>`_ for the basic version (explained in this tutorial). 
-      b. For stuff slightly fancier (using H-S histograms and floodFill to define a mask for the skin area) you can check the `improved demo <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo2.cpp>`_ 
-      c. ...or you can always check out the classical `camshiftdemo <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/camshiftdemo.cpp>`_ in samples.
+      a. Click `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp>`_ for the basic version (explained in this tutorial).
+      b. For stuff slightly fancier (using H-S histograms and floodFill to define a mask for the skin area) you can check the `improved demo <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo2.cpp>`_
+      c. ...or you can always check out the classical `camshiftdemo <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/camshiftdemo.cpp>`_ in samples.
 
    * **Code at glance:**
 
@@ -116,7 +116,7 @@ Code
    using namespace std;
 
    /// Global Variables
-   Mat src; Mat hsv; Mat hue; 
+   Mat src; Mat hsv; Mat hue;
    int bins = 25;
 
    /// Function Headers
@@ -133,7 +133,7 @@ Code
      /// Use only the Hue value
      hue.create( hsv.size(), hsv.depth() );
      int ch[] = { 0, 0 };
-     mixChannels( &hsv, 1, &hue, 1, ch, 1 );  
+     mixChannels( &hsv, 1, &hue, 1, ch, 1 );
 
      /// Create Trackbar to enter the number of bins
      char* window_image = "Source image";
@@ -146,7 +146,7 @@ Code
 
      /// Wait until user exits the program
      waitKey(0);
-     return 0;  
+     return 0;
    }
 
 
@@ -157,7 +157,7 @@ Code
    void Hist_and_Backproj(int, void* )
    {
      MatND hist;
-     int histSize = MAX( bins, 2 ); 
+     int histSize = MAX( bins, 2 );
      float hue_range[] = { 0, 180 };
      const float* ranges = { hue_range };
 
@@ -168,16 +168,16 @@ Code
      /// Get Backprojection
      MatND backproj;
      calcBackProject( &hue, 1, 0, hist, backproj, &ranges, 1, true );
- 
+
      /// Draw the backproj
      imshow( "BackProj", backproj );
 
      /// Draw the histogram
      int w = 400; int h = 400;
-     int bin_w = cvRound( (double) w / histSize ); 
+     int bin_w = cvRound( (double) w / histSize );
      Mat histImg = Mat::zeros( w, h, CV_8UC3 );
 
-     for( int i = 0; i < bins; i ++ )  
+     for( int i = 0; i < bins; i ++ )
         { rectangle( histImg, Point( i*bin_w, h ), Point( (i+1)*bin_w, h - cvRound( hist.at<float>(i)*h/255.0 ) ), Scalar( 0, 0, 255 ), -1 ); }
 
      imshow( "Histogram", histImg );
@@ -190,7 +190,7 @@ Explanation
 
    .. code-block:: cpp
 
-      Mat src; Mat hsv; Mat hue; 
+      Mat src; Mat hsv; Mat hue;
       int bins = 25;
 
 #. Read the input image and transform it to HSV format:
@@ -206,7 +206,7 @@ Explanation
 
       hue.create( hsv.size(), hsv.depth() );
       int ch[] = { 0, 0 };
-      mixChannels( &hsv, 1, &hue, 1, ch, 1 );  
+      mixChannels( &hsv, 1, &hue, 1, ch, 1 );
 
    as you see, we use the function :mix_channels:`mixChannels` to get only the channel 0 (Hue) from the hsv image. It gets the following parameters:
 
@@ -214,15 +214,15 @@ Explanation
 
       + **&hsv:** The source array from which the channels will be copied
       + **1:** The number of source arrays
-      + **&hue:** The destination array of the copied channels  
+      + **&hue:** The destination array of the copied channels
       + **1:** The number of destination arrays
       + **ch[] = {0,0}:** The array of index pairs indicating how the channels are copied. In this case, the Hue(0) channel of &hsv is being copied to the 0 channel of &hue (1-channel)
-      + **1:** Number of index pairs 
- 
+      + **1:** Number of index pairs
+
 #. Create a Trackbar for the user to enter the bin values. Any change on the Trackbar means a call to the **Hist_and_Backproj** callback function.
 
    .. code-block:: cpp
-  
+
       char* window_image = "Source image";
       namedWindow( window_image, CV_WINDOW_AUTOSIZE );
       createTrackbar("* Hue  bins: ", window_image, &bins, 180, Hist_and_Backproj );
@@ -235,7 +235,7 @@ Explanation
      imshow( window_image, src );
 
      waitKey(0);
-     return 0;  
+     return 0;
 
 #. **Hist_and_Backproj function:** Initialize the arguments needed for :calc_hist:`calcHist <>`. The number of bins comes from the Trackbar:
 
@@ -245,7 +245,7 @@ Explanation
       void Hist_and_Backproj(int, void* )
       {
         MatND hist;
-        int histSize = MAX( bins, 2 ); 
+        int histSize = MAX( bins, 2 );
         float hue_range[] = { 0, 180 };
         const float* ranges = { hue_range };
 
@@ -264,7 +264,7 @@ Explanation
       calcBackProject( &hue, 1, 0, hist, backproj, &ranges, 1, true );
 
    all the arguments are known (the same as used to calculate the histogram), only we add the backproj matrix, which will store the backprojection of the source image (&hue)
- 
+
 #. Display backproj:
 
    .. code-block:: cpp
@@ -276,10 +276,10 @@ Explanation
    .. code-block:: cpp
 
       int w = 400; int h = 400;
-      int bin_w = cvRound( (double) w / histSize ); 
+      int bin_w = cvRound( (double) w / histSize );
       Mat histImg = Mat::zeros( w, h, CV_8UC3 );
 
-      for( int i = 0; i < bins; i ++ )  
+      for( int i = 0; i < bins; i ++ )
          { rectangle( histImg, Point( i*bin_w, h ), Point( (i+1)*bin_w, h - cvRound( hist.at<float>(i)*h/255.0 ) ), Scalar( 0, 0, 255 ), -1 ); }
 
       imshow( "Histogram", histImg );
@@ -291,9 +291,9 @@ Results
 
 #. Here are the output by using a sample image ( guess what? Another hand ). You can play with the bin values and you will observe how it affects the results:
 
-   ======  ======  ======  
-    |R0|    |R1|    |R2|   
-   ======  ======  ====== 
+   ======  ======  ======
+    |R0|    |R1|    |R2|
+   ======  ======  ======
 
    .. |R0| image:: images/Back_Projection1_Source_Image.jpg
                  :align: middle
diff --git a/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst b/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst
index e0b9711a84..de1567abb2 100644
--- a/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst
+++ b/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.rst
@@ -13,7 +13,7 @@ In this tutorial you will learn how to:
    * Use the OpenCV function :split:`split <>` to divide an image into its correspondent planes.
 
    * To calculate histograms of arrays of images by using the OpenCV function :calc_hist:`calcHist <>`
- 
+
    * To normalize an array by using the function :normalize:`normalize <>`
 
 
@@ -34,7 +34,7 @@ What are histograms?
 
 
      .. image:: images/Histogram_Calculation_Theory_Hist0.jpg
-              :align: center   
+              :align: center
 
    * What happens if we want to *count* this data in an organized way? Since we know that the *range* of information value for this case is 256 values, we can segment our range in subparts (called **bins**) like:
 
@@ -42,22 +42,22 @@ What are histograms?
         \begin{array}{l}
         [0, 255] = { [0, 15] \cup [16, 31] \cup ....\cup [240,255] } \\
         range = { bin_{1} \cup bin_{2} \cup ....\cup bin_{n = 15} }
-        \end{array} 
+        \end{array}
 
      and we can keep count of the number of pixels that fall in the range of each :math:`bin_{i}`. Applying this to the example above we get the image below ( axis x represents the bins and axis y the number of pixels in each of them).
- 
+
 
      .. image:: images/Histogram_Calculation_Theory_Hist1.jpg
-              :align: center 
+              :align: center
+
+   * This was just a simple example of how an histogram works and why it is useful. An histogram can keep count not only of color intensities, but of whatever image features that we want to measure (i.e. gradients, directions, etc).
 
-   * This was just a simple example of how an histogram works and why it is useful. An histogram can keep count not only of color intensities, but of whatever image features that we want to measure (i.e. gradients, directions, etc). 
-  
    * Let's identify some parts of the histogram:
 
      a. **dims**: The number of parameters you want to collect data of. In our example, **dims = 1** because we are only counting the intensity values of each pixel (in a greyscale image).
      b. **bins**: It is the number of **subdivisions** in each dim. In our example, **bins = 16**
-     c. **range**: The limits for the values to be measured. In this case: **range = [0,255]** 
-     
+     c. **range**: The limits for the values to be measured. In this case: **range = [0,255]**
+
    * What if you want to count two features? In this case your resulting histogram would be a 3D plot (in which x and y would be :math:`bin_{x}` and :math:`bin_{y}` for each feature and z would be the number of counts for each combination of :math:`(bin_{x}, bin_{y})`. The same would apply for more features (of course it gets trickier).
 
 
@@ -65,7 +65,7 @@ What OpenCV offers you
 -----------------------
 
 For simple purposes, OpenCV implements the function :calc_hist:`calcHist <>`, which calculates the histogram of a set of arrays (usually images or image planes). It can operate with up to 32 dimensions. We will see it in the code below!
-     
+
 
 Code
 ====
@@ -73,7 +73,7 @@ Code
 .. container:: enumeratevisibleitemswithsquare
 
    * **What does this program do?**
- 
+
      .. container:: enumeratevisibleitemswithsquare
 
         * Loads an image
@@ -82,7 +82,7 @@ Code
         * Plot the three histograms in a window
 
    * **Downloadable code**:
-     Click `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp>`_
+     Click `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp>`_
 
    * **Code at glance:**
 
@@ -181,7 +181,7 @@ Explanation
        if( !src.data )
          { return -1; }
 
-#. Separate the source image in its three R,G and B planes. For this we use the OpenCV function :split:`split <>`: 
+#. Separate the source image in its three R,G and B planes. For this we use the OpenCV function :split:`split <>`:
 
    .. code-block:: cpp
 
@@ -195,7 +195,7 @@ Explanation
    a. Establish number of bins (5, 10...):
 
       .. code-block:: cpp
-  
+
          int histSize = 256; //from 0 to 255
 
    b. Set the range of values (as we said, between 0 and 255 )
@@ -219,25 +219,25 @@ Explanation
          Mat b_hist, g_hist, r_hist;
 
    e. We proceed to calculate the histograms by using the OpenCV function :calc_hist:`calcHist <>`:
-   
+
       .. code-block:: cpp
 
           /// Compute the histograms:
           calcHist( &bgr_planes[0], 1, 0, Mat(), b_hist, 1, &histSize, &histRange, uniform, accumulate );
           calcHist( &bgr_planes[1], 1, 0, Mat(), g_hist, 1, &histSize, &histRange, uniform, accumulate );
           calcHist( &bgr_planes[2], 1, 0, Mat(), r_hist, 1, &histSize, &histRange, uniform, accumulate );
-   
+
       where the arguments are:
 
       .. container:: enumeratevisibleitemswithsquare
-        
+
          + **&bgr_planes[0]:** The source array(s)
          + **1**: The number of source arrays (in this case we are using 1. We can enter here also a list of arrays )
          + **0**: The channel (*dim*) to be measured. In this case it is just the intensity (each array is single-channel) so we just write 0.
          + **Mat()**: A mask to be used on the source array ( zeros indicating pixels to be ignored ). If not defined it is not used
          + **b_hist**: The Mat object where the histogram will be stored
-         + **1**: The histogram dimensionality. 
-         + **histSize:** The number of bins per each used dimension  
+         + **1**: The histogram dimensionality.
+         + **histSize:** The number of bins per each used dimension
          + **histRange:** The range of values to be measured per each dimension
          + **uniform** and **accumulate**: The bin sizes are the same and the histogram is cleared at the beginning.
 
@@ -264,7 +264,7 @@ Explanation
    this function receives these arguments:
 
    .. container:: enumeratevisibleitemswithsquare
-   
+
       + **b_hist:** Input array
       + **b_hist:** Output normalized array (can be the same)
       + **0** and**histImage.rows**: For this example, they are the lower and upper limits to normalize the values of **r_hist**
@@ -291,7 +291,7 @@ Explanation
       }
 
 
-    we use the expression: 
+    we use the expression:
 
     .. code-block:: cpp
 
@@ -315,7 +315,7 @@ Explanation
       waitKey(0);
 
       return 0;
- 
+
 
 Result
 ======
@@ -323,10 +323,10 @@ Result
 #. Using as input argument an image like the shown below:
 
      .. image:: images/Histogram_Calculation_Original_Image.jpg
-              :align: center   
+              :align: center
 
 #. Produces the following histogram:
 
      .. image:: images/Histogram_Calculation_Result.jpg
-              :align: center   
+              :align: center
 
diff --git a/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.rst b/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.rst
index 7844d0e576..be9dc7f81b 100644
--- a/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.rst
+++ b/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.rst
@@ -25,43 +25,43 @@ Theory
 
 
      a. **Correlation ( CV\_COMP\_CORREL )**
-        
+
         .. math::
-    
-           d(H_1,H_2) =  \frac{\sum_I (H_1(I) - \bar{H_1}) (H_2(I) - \bar{H_2})}{\sqrt{\sum_I(H_1(I) - \bar{H_1})^2 \sum_I(H_2(I) - \bar{H_2})^2}} 
-    
+
+           d(H_1,H_2) =  \frac{\sum_I (H_1(I) - \bar{H_1}) (H_2(I) - \bar{H_2})}{\sqrt{\sum_I(H_1(I) - \bar{H_1})^2 \sum_I(H_2(I) - \bar{H_2})^2}}
+
         where
-    
+
         .. math::
-    
-           \bar{H_k} =  \frac{1}{N} \sum _J H_k(J) 
-    
-    
+
+           \bar{H_k} =  \frac{1}{N} \sum _J H_k(J)
+
+
         and :math:`N` is the total number of histogram bins.
-    
-    
+
+
 
      b. **Chi-Square ( CV\_COMP\_CHISQR )**
-    
+
         .. math::
-    
-           d(H_1,H_2) =  \sum _I  \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)} 
-     
-    
+
+           d(H_1,H_2) =  \sum _I  \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)}
+
+
      c. **Intersection ( method=CV\_COMP\_INTERSECT )**
-    
+
         .. math::
-    
-           d(H_1,H_2) =  \sum _I  \min (H_1(I), H_2(I))  
-    
-    
+
+           d(H_1,H_2) =  \sum _I  \min (H_1(I), H_2(I))
+
+
      d. **Bhattacharyya distance ( CV\_COMP\_BHATTACHARYYA )**
-    
+
         .. math::
-    
-           d(H_1,H_2) =  \sqrt{1 - \frac{1}{\sqrt{\bar{H_1} \bar{H_2} N^2}} \sum_I \sqrt{H_1(I) \cdot H_2(I)}} 
-    
- 
+
+           d(H_1,H_2) =  \sqrt{1 - \frac{1}{\sqrt{\bar{H_1} \bar{H_2} N^2}} \sum_I \sqrt{H_1(I) \cdot H_2(I)}}
+
+
 
 Code
 ====
@@ -69,7 +69,7 @@ Code
 .. container:: enumeratevisibleitemswithsquare
 
    * **What does this program do?**
- 
+
      .. container:: enumeratevisibleitemswithsquare
 
         * Loads a *base image* and 2 *test images* to be compared with it.
@@ -79,8 +79,8 @@ Code
         * Compare the histogram of the *base image* with respect to the 2 test histograms, the histogram of the lower half base image and with the same base image histogram.
         * Display the numerical matching parameters obtained.
 
-   * **Downloadable code**: 
-     Click `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp>`_ 
+   * **Downloadable code**:
+     Click `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp>`_
 
    * **Code at glance:**
 
@@ -105,7 +105,7 @@ Code
      /// Load three images with different environment settings
      if( argc < 4 )
        { printf("** Error. Usage: ./compareHist_Demo <image_settings0> <image_setting1> <image_settings2>\n");
-         return -1; 
+         return -1;
        }
 
      src_base = imread( argv[1], 1 );
@@ -117,7 +117,7 @@ Code
      cvtColor( src_test1, hsv_test1, CV_BGR2HSV );
      cvtColor( src_test2, hsv_test2, CV_BGR2HSV );
 
-     hsv_half_down = hsv_base( Range( hsv_base.rows/2, hsv_base.rows - 1 ), Range( 0, hsv_base.cols - 1 ) );  
+     hsv_half_down = hsv_base( Range( hsv_base.rows/2, hsv_base.rows - 1 ), Range( 0, hsv_base.cols - 1 ) );
 
      /// Using 30 bins for hue and 32 for saturation
      int h_bins = 50; int s_bins = 60;
@@ -153,14 +153,14 @@ Code
 
      /// Apply the histogram comparison methods
      for( int i = 0; i < 4; i++ )
-        { int compare_method = i; 
+        { int compare_method = i;
           double base_base = compareHist( hist_base, hist_base, compare_method );
           double base_half = compareHist( hist_base, hist_half_down, compare_method );
           double base_test1 = compareHist( hist_base, hist_test1, compare_method );
           double base_test2 = compareHist( hist_base, hist_test2, compare_method );
-     
+
           printf( " Method [%d] Perfect, Base-Half, Base-Test(1), Base-Test(2) : %f, %f, %f, %f \n", i, base_base, base_half , base_test1, base_test2 );
-        } 
+        }
 
      printf( "Done \n" );
 
@@ -171,7 +171,7 @@ Code
 Explanation
 ===========
 
-#. Declare variables such as the matrices to store the base image and the two other images to compare ( RGB and HSV ) 
+#. Declare variables such as the matrices to store the base image and the two other images to compare ( RGB and HSV )
 
    .. code-block:: cpp
 
@@ -186,7 +186,7 @@ Explanation
 
       if( argc < 4 )
         { printf("** Error. Usage: ./compareHist_Demo <image_settings0> <image_setting1> <image_settings2>\n");
-          return -1; 
+          return -1;
         }
 
       src_base = imread( argv[1], 1 );
@@ -205,7 +205,7 @@ Explanation
 
    .. code-block:: cpp
 
-      hsv_half_down = hsv_base( Range( hsv_base.rows/2, hsv_base.rows - 1 ), Range( 0, hsv_base.cols - 1 ) );  
+      hsv_half_down = hsv_base( Range( hsv_base.rows/2, hsv_base.rows - 1 ), Range( 0, hsv_base.cols - 1 ) );
 
 #. Initialize the arguments to calculate the histograms (bins, ranges and channels H and S ).
 
@@ -233,7 +233,7 @@ Explanation
 #. Calculate the Histograms for the base image, the 2 test images and the half-down base image:
 
    .. code-block:: cpp
- 
+
       calcHist( &hsv_base, 1, channels, Mat(), hist_base, 2, histSize, ranges, true, false );
       normalize( hist_base, hist_base, 0, 1, NORM_MINMAX, -1, Mat() );
 
@@ -252,24 +252,24 @@ Explanation
    .. code-block:: cpp
 
       for( int i = 0; i < 4; i++ )
-         { int compare_method = i; 
+         { int compare_method = i;
            double base_base = compareHist( hist_base, hist_base, compare_method );
            double base_half = compareHist( hist_base, hist_half_down, compare_method );
            double base_test1 = compareHist( hist_base, hist_test1, compare_method );
            double base_test2 = compareHist( hist_base, hist_test2, compare_method );
-     
-          printf( " Method [%d] Perfect, Base-Half, Base-Test(1), Base-Test(2) : %f, %f, %f, %f \n", i, base_base, base_half , base_test1, base_test2 );
-        } 
 
-     
+          printf( " Method [%d] Perfect, Base-Half, Base-Test(1), Base-Test(2) : %f, %f, %f, %f \n", i, base_base, base_half , base_test1, base_test2 );
+        }
+
+
 Results
 ========
 
 #. We use as input the following images:
 
-   ============  ============  ============ 
+   ============  ============  ============
     |Base_0|       |Test_1|      |Test_2|
-   ============  ============  ============ 
+   ============  ============  ============
 
    .. |Base_0| image:: images/Histogram_Comparison_Source_0.jpg
                     :align: middle
@@ -289,10 +289,10 @@ Results
   ===============   ===============  ===============  ===============  ===============
   *Method*          Base - Base      Base - Half      Base - Test 1    Base - Test 2
   ===============   ===============  ===============  ===============  ===============
-  *Correlation*     1.000000         0.930766         0.182073         0.120447 
-  *Chi-square*      0.000000         4.940466         21.184536        49.273437 
-  *Intersection*    24.391548        14.959809        3.889029         5.775088 
-  *Bhattacharyya*   0.000000         0.222609         0.646576         0.801869 
+  *Correlation*     1.000000         0.930766         0.182073         0.120447
+  *Chi-square*      0.000000         4.940466         21.184536        49.273437
+  *Intersection*    24.391548        14.959809        3.889029         5.775088
+  *Bhattacharyya*   0.000000         0.222609         0.646576         0.801869
   ===============   ===============  ===============  ===============  ===============
 
 
diff --git a/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.rst b/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.rst
index 5568072fe2..24534a7068 100644
--- a/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.rst
+++ b/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.rst
@@ -12,7 +12,7 @@ In this tutorial you will learn:
 
    * What an image histogram is and why it is useful
 
-   * To equalize histograms of images by using the OpenCV function:equalize_hist:`equalizeHist <>` 
+   * To equalize histograms of images by using the OpenCV function:equalize_hist:`equalizeHist <>`
 
 
 
@@ -24,12 +24,12 @@ What is an Image Histogram?
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * It is a graphical representation of the intensity distribution of an image. 
+   * It is a graphical representation of the intensity distribution of an image.
 
    * It quantifies the number of pixels for each intensity value considered.
 
 .. image:: images/Histogram_Equalization_Theory_0.jpg
-        :align: center 
+        :align: center
 
 
 What is Histogram Equalization?
@@ -42,30 +42,30 @@ What is Histogram Equalization?
    * To make it clearer, from the image above, you can see that the pixels seem clustered around the middle of the available range of intensities. What Histogram Equalization does is to *stretch out* this range. Take a look at the figure below: The green circles indicate the *underpopulated* intensities. After applying the equalization, we get an histogram like the figure in the center. The resulting image is shown in the picture at right.
 
 .. image:: images/Histogram_Equalization_Theory_1.jpg
-          :align: center 
+          :align: center
 
 How does it work?
 -----------------
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Equalization implies *mapping* one distribution (the given histogram) to another distribution (a wider and more uniform distribution of intensity values) so the intensity values are spreaded over the whole range. 
+   * Equalization implies *mapping* one distribution (the given histogram) to another distribution (a wider and more uniform distribution of intensity values) so the intensity values are spreaded over the whole range.
 
    * To accomplish the equalization effect, the remapping should be the *cumulative distribution function (cdf)* (more details, refer to *Learning OpenCV*). For the histogram :math:`H(i)`, its *cumulative distribution* :math:`H^{'}(i)` is:
 
      .. math::
 
-        H^{'}(i) = \sum_{0 \le j < i} H(j) 
+        H^{'}(i) = \sum_{0 \le j < i} H(j)
 
      To use this as a remapping function, we have to normalize :math:`H^{'}(i)` such that the maximum value is 255 ( or the maximum value for the intensity of the image ). From the example above, the cumulative function is:
 
      .. image:: images/Histogram_Equalization_Theory_2.jpg
-              :align: center 
+              :align: center
 
    * Finally, we use a simple remapping procedure to obtain the intensity values of the equalized image:
 
      .. math::
- 
+
         equalized( x, y ) = H^{'}( src(x,y) )
 
 Code
@@ -74,16 +74,16 @@ Code
 .. container:: enumeratevisibleitemswithsquare
 
    * **What does this program do?**
- 
+
      .. container:: enumeratevisibleitemswithsquare
 
         * Loads an image
-        * Convert the original image to grayscale 
+        * Convert the original image to grayscale
         * Equalize the Histogram by using the OpenCV function :equalize_hist:`EqualizeHist <>`
         * Display the source and equalized images in a window.
 
    * **Downloadable code**:
-     Click `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/Histograms_Matching/EqualizeHist_Demo.cpp>`_
+     Click `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/Histograms_Matching/EqualizeHist_Demo.cpp>`_
 
    * **Code at glance:**
 
@@ -117,15 +117,15 @@ Code
 
      /// Apply Histogram Equalization
      equalizeHist( src, dst );
- 
+
      /// Display results
      namedWindow( source_window, CV_WINDOW_AUTOSIZE );
      namedWindow( equalized_window, CV_WINDOW_AUTOSIZE );
 
      imshow( source_window, src );
      imshow( equalized_window, dst );
- 
-     /// Wait until user exits the program 
+
+     /// Wait until user exits the program
      waitKey(0);
 
      return 0;
@@ -134,7 +134,7 @@ Code
 Explanation
 ===========
 
-#. Declare the source and destination images as well as the windows names:  
+#. Declare the source and destination images as well as the windows names:
 
    .. code-block:: cpp
 
@@ -144,7 +144,7 @@ Explanation
       char* equalized_window = "Equalized Image";
 
 #. Load the source image:
- 
+
    .. code-block:: cpp
 
       src = imread( argv[1], 1 );
@@ -164,7 +164,7 @@ Explanation
    .. code-block:: cpp
 
       equalizeHist( src, dst );
- 
+
    As it can  be easily seen, the only arguments are the original image and the output (equalized) image.
 
 #. Display both images (original and equalized) :
@@ -176,9 +176,9 @@ Explanation
 
       imshow( source_window, src );
       imshow( equalized_window, dst );
- 
+
 #. Wait until user exists the program
-     
+
    .. code-block:: cpp
 
       waitKey(0);
@@ -191,19 +191,19 @@ Results
 #. To appreciate better the results of equalization, let's introduce an image with not much contrast, such as:
 
    .. image:: images/Histogram_Equalization_Original_Image.jpg
-            :align: center 
+            :align: center
 
    which, by the way, has this histogram:
 
    .. image:: images/Histogram_Equalization_Original_Histogram.jpg
-            :align: center 
+            :align: center
 
    notice that the pixels are clustered around the center of the histogram.
 
 #. After applying the equalization with our program, we get this result:
 
    .. image:: images/Histogram_Equalization_Equalized_Image.jpg
-            :align: center 
+            :align: center
 
    this image has certainly more contrast. Check out its new histogram like this:
 
diff --git a/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst b/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst
index b58ec11587..e0c643d05c 100644
--- a/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst
+++ b/doc/tutorials/imgproc/histograms/template_matching/template_matching.rst
@@ -31,81 +31,81 @@ How does it work?
 
    * We need two primary components:
 
-     a. **Source image (I):** The image in which we expect to find a match to the template image 
-     b. **Template image (T):** The patch image which will be compared to the template image 
+     a. **Source image (I):** The image in which we expect to find a match to the template image
+     b. **Template image (T):** The patch image which will be compared to the template image
 
      our goal is to detect the highest matching area:
 
      .. image:: images/Template_Matching_Template_Theory_Summary.jpg
-              :align: center      
+              :align: center
 
    * To identify the matching area, we have to *compare* the template image against the source image by sliding it:
 
      .. image:: images/Template_Matching_Template_Theory_Sliding.jpg
-              :align: center      
+              :align: center
 
    *  By **sliding**, we mean moving the patch one pixel at a time (left to right, up to down). At each location, a metric is calculated so it represents how "good" or "bad" the match at that location is (or how similar the patch is to that particular area of the source image).
 
    *  For each location of **T** over **I**, you *store* the metric in the *result matrix* **(R)**. Each location :math:`(x,y)` in **R** contains the match metric:
 
       .. image:: images/Template_Matching_Template_Theory_Result.jpg
-               :align: center      
+               :align: center
 
-      the image above is the result **R** of sliding the patch with a metric **TM_CCORR_NORMED**. The brightest locations indicate the highest matches. As you can see, the location marked by the red circle is probably the one with the highest value, so that location (the rectangle formed by that point as a corner and width and height equal to the patch image) is considered the match. 
+      the image above is the result **R** of sliding the patch with a metric **TM_CCORR_NORMED**. The brightest locations indicate the highest matches. As you can see, the location marked by the red circle is probably the one with the highest value, so that location (the rectangle formed by that point as a corner and width and height equal to the patch image) is considered the match.
 
    * In practice, we use the function :min_max_loc:`minMaxLoc <>` to locate the highest value (or lower, depending of the type of matching method) in the *R* matrix.
-   
+
 Which are the matching methods available in OpenCV?
 ----------------------------------------------------
 
 Good question. OpenCV implements Template matching in the function :match_template:`matchTemplate <>`. The available methods are 6:
 
 a. **method=CV\_TM\_SQDIFF**
-        
+
    .. math::
-    
-      R(x,y)= \sum _{x',y'} (T(x',y')-I(x+x',y+y'))^2  
-    
-    
+
+      R(x,y)= \sum _{x',y'} (T(x',y')-I(x+x',y+y'))^2
+
+
 b. **method=CV\_TM\_SQDIFF\_NORMED**
-        
+
    .. math::
-    
-      R(x,y)= \frac{\sum_{x',y'} (T(x',y')-I(x+x',y+y'))^2}{\sqrt{\sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}} 
-       
+
+      R(x,y)= \frac{\sum_{x',y'} (T(x',y')-I(x+x',y+y'))^2}{\sqrt{\sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}}
+
 
 c. **method=CV\_TM\_CCORR**
-    
-   .. math::
-    
-      R(x,y)= \sum _{x',y'} (T(x',y')  \cdot I(x+x',y+y'))  
-      
 
-d. **method=CV\_TM\_CCORR\_NORMED**    
-    
    .. math::
-    
-      R(x,y)= \frac{\sum_{x',y'} (T(x',y') \cdot I'(x+x',y+y'))}{\sqrt{\sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}} 
-    
+
+      R(x,y)= \sum _{x',y'} (T(x',y')  \cdot I(x+x',y+y'))
+
+
+d. **method=CV\_TM\_CCORR\_NORMED**
+
+   .. math::
+
+      R(x,y)= \frac{\sum_{x',y'} (T(x',y') \cdot I'(x+x',y+y'))}{\sqrt{\sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}}
+
 
 e. **method=CV\_TM\_CCOEFF**
-    
+
    .. math::
-    
-      R(x,y)= \sum _{x',y'} (T'(x',y')  \cdot I(x+x',y+y'))  
-   
+
+      R(x,y)= \sum _{x',y'} (T'(x',y')  \cdot I(x+x',y+y'))
+
    where
-     
+
    .. math::
-    
-      \begin{array}{l} T'(x',y')=T(x',y') - 1/(w  \cdot h)  \cdot \sum _{x'',y''} T(x'',y'') \\ I'(x+x',y+y')=I(x+x',y+y') - 1/(w  \cdot h)  \cdot \sum _{x'',y''} I(x+x'',y+y'') \end{array} 
-    
-    
+
+      \begin{array}{l} T'(x',y')=T(x',y') - 1/(w  \cdot h)  \cdot \sum _{x'',y''} T(x'',y'') \\ I'(x+x',y+y')=I(x+x',y+y') - 1/(w  \cdot h)  \cdot \sum _{x'',y''} I(x+x'',y+y'') \end{array}
+
+
 f. **method=CV\_TM\_CCOEFF\_NORMED**
-    
+
    .. math::
-    
-      R(x,y)= \frac{ \sum_{x',y'} (T'(x',y') \cdot I'(x+x',y+y')) }{ \sqrt{\sum_{x',y'}T'(x',y')^2 \cdot \sum_{x',y'} I'(x+x',y+y')^2} } 
+
+      R(x,y)= \frac{ \sum_{x',y'} (T'(x',y') \cdot I'(x+x',y+y')) }{ \sqrt{\sum_{x',y'}T'(x',y')^2 \cdot \sum_{x',y'} I'(x+x',y+y')^2} }
 
 
 Code
@@ -115,7 +115,7 @@ Code
 .. container:: enumeratevisibleitemswithsquare
 
    * **What does this program do?**
- 
+
      .. container:: enumeratevisibleitemswithsquare
 
         * Loads an input image and a image patch (*template*)
@@ -125,13 +125,13 @@ Code
         * Draw a rectangle around the area corresponding to the highest match
 
    * **Downloadable code**:
-     Click `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp>`_
+     Click `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp>`_
 
    * **Code at glance:**
 
 .. code-block:: cpp
 
-   #include "opencv2/highgui/highgui.hpp" 
+   #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
    #include <iostream>
    #include <stdio.h>
@@ -160,7 +160,7 @@ Code
      /// Create windows
      namedWindow( image_window, CV_WINDOW_AUTOSIZE );
      namedWindow( result_window, CV_WINDOW_AUTOSIZE );
-  
+
      /// Create Trackbar
      char* trackbar_label = "Method: \n 0: SQDIFF \n 1: SQDIFF NORMED \n 2: TM CCORR \n 3: TM CCORR NORMED \n 4: TM COEFF \n 5: TM COEFF NORMED";
      createTrackbar( trackbar_label, image_window, &match_method, max_Trackbar, MatchingMethod );
@@ -180,11 +180,11 @@ Code
      /// Source image to display
      Mat img_display;
      img.copyTo( img_display );
- 
+
      /// Create the result matrix
      int result_cols =  img.cols - templ.cols + 1;
-     int result_rows = img.rows - templ.rows + 1;   
-  
+     int result_rows = img.rows - templ.rows + 1;
+
      result.create( result_cols, result_rows, CV_32FC1 );
 
      /// Do the Matching and Normalize
@@ -194,18 +194,18 @@ Code
      /// Localizing the best match with minMaxLoc
      double minVal; double maxVal; Point minLoc; Point maxLoc;
      Point matchLoc;
- 
+
      minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
 
      /// For SQDIFF and SQDIFF_NORMED, the best matches are lower values. For all the other methods, the higher the better
      if( match_method  == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED )
        { matchLoc = minLoc; }
-     else  
+     else
        { matchLoc = maxLoc; }
 
      /// Show me what you got
-     rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 ); 
-     rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 ); 
+     rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
+     rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
 
      imshow( image_window, img_display );
      imshow( result_window, result );
@@ -241,7 +241,7 @@ Explanation
 
       namedWindow( image_window, CV_WINDOW_AUTOSIZE );
       namedWindow( result_window, CV_WINDOW_AUTOSIZE );
-  
+
 #. Create the Trackbar to enter the kind of matching method to be used. When a change is detected the callback function **MatchingMethod** is called.
 
    .. code-block:: cpp
@@ -255,7 +255,7 @@ Explanation
 
       waitKey(0);
       return 0;
-  
+
 #. Let's check out the callback function. First, it makes a copy of the source image:
 
    .. code-block:: cpp
@@ -267,12 +267,12 @@ Explanation
 #. Next, it creates the result matrix that will store the matching results for each template location. Observe in detail the size of the result matrix (which matches all possible locations for it)
 
    .. code-block:: cpp
- 
+
       int result_cols =  img.cols - templ.cols + 1;
-      int result_rows = img.rows - templ.rows + 1;   
-  
+      int result_rows = img.rows - templ.rows + 1;
+
       result.create( result_cols, result_rows, CV_32FC1 );
- 
+
 #. Perform the template matching operation:
 
    .. code-block:: cpp
@@ -287,18 +287,18 @@ Explanation
 
       normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
 
-#. We localize the minimum and maximum values in the result matrix **R** by using :min_max_loc:`minMaxLoc <>`. 
+#. We localize the minimum and maximum values in the result matrix **R** by using :min_max_loc:`minMaxLoc <>`.
 
    .. code-block:: cpp
 
       double minVal; double maxVal; Point minLoc; Point maxLoc;
       Point matchLoc;
- 
+
       minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
-   
+
    the function calls as arguments:
 
-   .. container:: enumeratevisibleitemswithsquare   
+   .. container:: enumeratevisibleitemswithsquare
 
       + **result:** The source array
       + **&minVal** and **&maxVal:** Variables to save the minimum and maximum values in **result**
@@ -309,18 +309,18 @@ Explanation
 #. For the first two methods ( CV\_SQDIFF and CV\_SQDIFF\_NORMED ) the best match are the lowest values. For all the others, higher values represent better matches. So, we save the corresponding value in the **matchLoc** variable:
 
    .. code-block:: cpp
-     
+
      if( match_method  == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED )
        { matchLoc = minLoc; }
-     else  
+     else
        { matchLoc = maxLoc; }
 
 #. Display the source image and the result matrix. Draw a rectangle around the highest possible matching area:
 
    .. code-block:: cpp
 
-      rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 ); 
-      rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 ); 
+      rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
+      rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
 
       imshow( image_window, img_display );
       imshow( result_window, result );
@@ -333,19 +333,19 @@ Results
 
    .. image:: images/Template_Matching_Original_Image.jpg
             :align: center
- 
+
    and a template image:
 
    .. image:: images/Template_Matching_Template_Image.jpg
-            :align: center  
+            :align: center
 
 #. Generate the following result matrices (first row are the standard methods SQDIFF, CCORR and CCOEFF, second row are the same methods in its normalized version). In the first column, the darkest is the better match, for the other two columns, the brighter a location, the higher the match.
 
-   ============  ============  ============ 
+   ============  ============  ============
     |Result_0|    |Result_2|    |Result_4|
-   ============  ============  ============ 
+   ============  ============  ============
     |Result_1|    |Result_3|    |Result_5|
-   ============  ============  ============ 
+   ============  ============  ============
 
    .. |Result_0| image:: images/Template_Matching_Correl_Result_0.jpg
                       :align: middle
diff --git a/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst b/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst
index f9ec17ae9b..52b10468ba 100644
--- a/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst
+++ b/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.rst
@@ -19,7 +19,7 @@ Theory
 
    * **Low error rate:** Meaning a good detection of only existent edges.
    * **Good localization:** The distance between edge pixels detected and real edge pixels have to be minimized.
-   * **Minimal response:** Only one detector response per edge.  
+   * **Minimal response:** Only one detector response per edge.
 
 Steps
 ------
@@ -27,39 +27,39 @@ Steps
 #. Filter out any noise. The Gaussian filter is used for this purpose. An example of a Gaussian kernel of :math:`size = 5` that might be used is shown below:
 
    .. math::
-   
+
       K = \dfrac{1}{159}\begin{bmatrix}
                 2 & 4 & 5 & 4 & 2 \\
                 4 & 9 & 12 & 9 & 4 \\
                 5 & 12 & 15 & 12 & 5 \\
                 4 & 9 & 12 & 9 & 4 \\
-                2 & 4 & 5 & 4 & 2 
-                        \end{bmatrix} 
+                2 & 4 & 5 & 4 & 2
+                        \end{bmatrix}
 
 
-#. Find the intensity gradient of the image. For this, we follow a procedure analogous to Sobel: 
+#. Find the intensity gradient of the image. For this, we follow a procedure analogous to Sobel:
 
    a. Apply a pair of convolution masks (in :math:`x` and :math:`y` directions:
 
       .. math::
-   
+
          G_{x} = \begin{bmatrix}
          -1 & 0 & +1  \\
          -2 & 0 & +2  \\
-         -1 & 0 & +1 
+         -1 & 0 & +1
          \end{bmatrix}
-   
+
          G_{y} = \begin{bmatrix}
          -1 & -2 & -1  \\
          0 & 0 & 0  \\
-         +1 & +2 & +1 
-         \end{bmatrix} 
+         +1 & +2 & +1
+         \end{bmatrix}
 
    b. Find the gradient strength and direction with:
 
       .. math::
          \begin{array}{l}
-         G = \sqrt{ G_{x}^{2} + G_{y}^{2} } \\        
+         G = \sqrt{ G_{x}^{2} + G_{y}^{2} } \\
          \theta = \arctan(\dfrac{ G_{y} }{ G_{x} })
          \end{array}
 
@@ -71,22 +71,22 @@ Steps
 
    a. If a pixel gradient is higher than the *upper* threshold, the pixel is accepted as an edge
    b. If a pixel gradient value is below the *lower* threshold, then it is rejected.
-   c. If the pixel gradient is between the two thresholds, then it will be accepted only if it is connected to a pixel that is above the *upper* threshold. 
+   c. If the pixel gradient is between the two thresholds, then it will be accepted only if it is connected to a pixel that is above the *upper* threshold.
 
    Canny recommended a *upper*:*lower* ratio between 2:1 and 3:1.
-  
-#. For more details, you can always consult your favorite Computer Vision book. 
+
+#. For more details, you can always consult your favorite Computer Vision book.
 
 Code
 =====
 
 #. **What does this program do?**
- 
+
    * Asks the user to enter a numerical value to set the lower threshold for our *Canny Edge Detector* (by means of a Trackbar)
    * Applies the *Canny Detector* and generates a **mask** (bright lines representing the edges on a black background).
    * Applies the mask obtained on the original image and display it in a window.
- 
-#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/CannyDetector_Demo.cpp>`_
+
+#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/CannyDetector_Demo.cpp>`_
 
 .. code-block::  cpp
 
@@ -123,7 +123,7 @@ Code
 
      /// Using Canny's output as a mask, we display our result
      dst = Scalar::all(0);
-    
+
      src.copyTo( dst, detected_edges);
      imshow( window_name, dst );
     }
@@ -194,7 +194,7 @@ Explanation
         { return -1; }
 
 #. Create a matrix of the same type and size of *src* (to be *dst*)
- 
+
    .. code-block:: cpp
 
       dst.create( src.size(), src.type() );
@@ -249,9 +249,9 @@ Explanation
    .. code-block:: cpp
 
       dst = Scalar::all(0);
-    
-#. Finally, we will use the function :copy_to:`copyTo <>` to map only the areas of the image that are identified as edges (on a black background). 
-  
+
+#. Finally, we will use the function :copy_to:`copyTo <>` to map only the areas of the image that are identified as edges (on a black background).
+
    .. code-block:: cpp
 
       src.copyTo( dst, detected_edges);
@@ -280,8 +280,8 @@ Result
            :alt: Result after running Canny
            :width: 200pt
            :align: center
-  
+
 * Notice how the image is superposed to the black background on the edge regions.
-  
+
 
 
diff --git a/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.rst b/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.rst
index 1658bab523..337ecd7ebd 100644
--- a/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.rst
+++ b/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.rst
@@ -10,8 +10,8 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :copy_make_border:`copyMakeBorder <>` to set the borders (extra padding to your image).  
-  
+   * Use the OpenCV function :copy_make_border:`copyMakeBorder <>` to set the borders (extra padding to your image).
+
 Theory
 ========
 
@@ -19,14 +19,14 @@ Theory
    The explanation below belongs to the book **Learning OpenCV** by Bradski and Kaehler.
 
 
-#. In our previous tutorial we learned to use convolution to operate on images. One problem that naturally arises is how to handle the boundaries. How can we convolve them if the evaluated points are at the edge of the image? 
+#. In our previous tutorial we learned to use convolution to operate on images. One problem that naturally arises is how to handle the boundaries. How can we convolve them if the evaluated points are at the edge of the image?
 
 #. What most of OpenCV functions do is to copy a given image onto another slightly larger image and then automatically pads the boundary (by any of the methods explained in the sample code just below). This way, the convolution  can be performed over the needed pixels without problems (the extra padding is cut after the operation is done).
 
 #. In this tutorial, we will briefly explore two ways of defining the extra padding (border) for an image:
 
    a. **BORDER_CONSTANT**: Pad the image with a constant value (i.e. black or :math:`0`
- 
+
    b. **BORDER_REPLICATE**: The row or column at the very edge of the original is replicated to the extra border.
 
    This will be seen more clearly in the Code section.
@@ -37,20 +37,20 @@ Code
 ======
 
 #. **What does this program do?**
- 
-   * Load an image 
+
+   * Load an image
    * Let the user choose what kind of padding use in the input image. There are two options:
-     
-     #. *Constant value border*: Applies a padding of a constant value for the whole border. This value will be updated randomly each 0.5 seconds.   
+
+     #. *Constant value border*: Applies a padding of a constant value for the whole border. This value will be updated randomly each 0.5 seconds.
      #. *Replicated border*: The border will be replicated from the pixel values at the edges of the original image.
 
      The user chooses either option by pressing 'c' (constant) or 'r' (replicate)
    * The program finishes when the user presses 'ESC'
- 
-#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp>`_
+
+#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp>`_
 
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -59,7 +59,7 @@ Code
 
    using namespace cv;
 
-   /// Global Variables 
+   /// Global Variables
    Mat src, dst;
    int top, bottom, left, right;
    int borderType;
@@ -75,10 +75,10 @@ Code
 
      /// Load an image
      src = imread( argv[1] );
-  
+
      if( !src.data )
      { return -1;
-       printf(" No data entered, please enter the path to an image file \n"); 
+       printf(" No data entered, please enter the path to an image file \n");
      }
 
      /// Brief how-to for this program
@@ -92,12 +92,12 @@ Code
      namedWindow( window_name, CV_WINDOW_AUTOSIZE );
 
      /// Initialize arguments for the filter
-     top = (int) (0.05*src.rows); bottom = (int) (0.05*src.rows); 
+     top = (int) (0.05*src.rows); bottom = (int) (0.05*src.rows);
      left = (int) (0.05*src.cols); right = (int) (0.05*src.cols);
      dst = src;
 
      imshow( window_name, dst );
-  
+
      while( true )
        {
          c = waitKey(500);
@@ -140,14 +140,14 @@ Explanation
    .. code-block:: cpp
 
      src = imread( argv[1] );
-  
+
      if( !src.data )
      { return -1;
-       printf(" No data entered, please enter the path to an image file \n"); 
+       printf(" No data entered, please enter the path to an image file \n");
      }
 
 #. After giving a short intro of how to use the program, we create a window:
- 
+
    .. code-block:: cpp
 
      namedWindow( window_name, CV_WINDOW_AUTOSIZE );
@@ -156,13 +156,13 @@ Explanation
 
    .. code-block:: cpp
 
-      top = (int) (0.05*src.rows); bottom = (int) (0.05*src.rows); 
+      top = (int) (0.05*src.rows); bottom = (int) (0.05*src.rows);
       left = (int) (0.05*src.cols); right = (int) (0.05*src.cols);
 
 #. The program begins a *while* loop. If the user presses 'c' or 'r', the *borderType* variable takes the value of *BORDER_CONSTANT* or *BORDER_REPLICATE* respectively:
 
    .. code-block:: cpp
-  
+
       while( true )
        {
          c = waitKey(500);
@@ -185,7 +185,7 @@ Explanation
 #. Finally, we call the function :copy_make_border:`copyMakeBorder <>` to apply the respective padding:
 
    .. code-block:: cpp
-       
+
       copyMakeBorder( src, dst, top, bottom, left, right, borderType, value );
 
    The arguments are:
@@ -199,7 +199,7 @@ Explanation
 #. We display our output image in the image created previously
 
    .. code-block:: cpp
-  
+
       imshow( window_name, dst );
 
 
@@ -213,12 +213,12 @@ Results
    .. container:: enumeratevisibleitemswithsquare
 
       * By default, it begins with the border set to BORDER_CONSTANT. Hence, a succession of random colored borders will be shown.
-      * If you press 'r', the border will become a replica of the edge pixels. 
+      * If you press 'r', the border will become a replica of the edge pixels.
       * If you press 'c', the random colored borders will appear again
       * If you press 'ESC' the program will exit.
 
    Below some screenshot showing how the border changes color and how the *BORDER_REPLICATE* option looks:
-   
+
 
    .. image:: images/CopyMakeBorder_Tutorial_Results.jpg
            :alt: Final result after copyMakeBorder application
diff --git a/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.rst b/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.rst
index feaef69403..1c81ba33ae 100644
--- a/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.rst
+++ b/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.rst
@@ -10,8 +10,8 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :filter2d:`filter2D <>` to create your own linear filters.  
-  
+   * Use the OpenCV function :filter2d:`filter2D <>` to create your own linear filters.
+
 Theory
 =======
 
@@ -21,15 +21,15 @@ Theory
 
 Convolution
 ------------
-In a very general sense, convolution is an operation between every part of an image and an operator (kernel). 
+In a very general sense, convolution is an operation between every part of an image and an operator (kernel).
 
 What is a kernel?
 ------------------
-A kernel is essentially a fixed size array of numerical coefficeints along with an *anchor point* in that array, which is tipically located at the center. 
+A kernel is essentially a fixed size array of numerical coefficeints along with an *anchor point* in that array, which is tipically located at the center.
 
 .. image:: images/filter_2d_tutorial_kernel_theory.png
         :alt: kernel example
-        :align: center 
+        :align: center
 
 How does convolution with a kernel work?
 -----------------------------------------
@@ -38,7 +38,7 @@ Assume you want to know the resulting value of a particular location in the imag
 
 #. Place the kernel anchor on top of a determined pixel, with the rest of the kernel overlaying the corresponding local pixels in the image.
 
-#. Multiply the kernel coefficients by the corresponding image pixel values and sum the result. 
+#. Multiply the kernel coefficients by the corresponding image pixel values and sum the result.
 
 #. Place the result to the location of the *anchor* in the input image.
 
@@ -47,35 +47,35 @@ Assume you want to know the resulting value of a particular location in the imag
 Expressing the procedure above in the form of an equation we would have:
 
 .. math::
- 
+
    H(x,y) = \sum_{i=0}^{M_{i} - 1} \sum_{j=0}^{M_{j}-1} I(x+i - a_{i}, y + j - a_{j})K(i,j)
 
-Fortunately, OpenCV provides you with the function :filter2d:`filter2D <>` so you do not have to code all these operations. 
+Fortunately, OpenCV provides you with the function :filter2d:`filter2D <>` so you do not have to code all these operations.
 
 Code
 ======
 
 #. **What does this program do?**
- 
+
    * Loads an image
    * Performs a *normalized box filter*. For instance, for a kernel of size :math:`size = 3`, the kernel would be:
 
      .. math::
-   
+
         K = \dfrac{1}{3 \cdot 3} \begin{bmatrix}
         1 & 1 & 1  \\
         1 & 1 & 1  \\
-        1 & 1 & 1 
-        \end{bmatrix} 
+        1 & 1 & 1
+        \end{bmatrix}
 
      The program will perform the filter operation with kernels of sizes 3, 5, 7, 9 and 11.
 
    * The filter output (with each kernel) will be shown during 500 milliseconds
 
-#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp>`_
+#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp>`_
 
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -93,7 +93,7 @@ Code
      Mat kernel;
      Point anchor;
      double delta;
-     int ddepth;  
+     int ddepth;
      int kernel_size;
      char* window_name = "filter2D Demo";
 
@@ -107,7 +107,7 @@ Code
 
      /// Create window
      namedWindow( window_name, CV_WINDOW_AUTOSIZE );
-  
+
      /// Initialize arguments for the filter
      anchor = Point( -1, -1 );
      delta = 0;
@@ -131,7 +131,7 @@ Code
          imshow( window_name, dst );
          ind++;
        }
-  
+
      return 0;
    }
 
@@ -171,12 +171,12 @@ Explanation
       kernel_size = 3 + 2*( ind%5 );
       kernel = Mat::ones( kernel_size, kernel_size, CV_32F )/ (float)(kernel_size*kernel_size);
 
-   The first line is to update the *kernel_size* to odd values in the range: :math:`[3,11]`. The second line actually builds the kernel by setting its value to a matrix filled with :math:`1's` and normalizing it by dividing it between the number of elements. 
+   The first line is to update the *kernel_size* to odd values in the range: :math:`[3,11]`. The second line actually builds the kernel by setting its value to a matrix filled with :math:`1's` and normalizing it by dividing it between the number of elements.
 
 #. After setting the kernel, we can generate the filter by using the function :filter2d:`filter2D <>`:
 
    .. code-block:: cpp
-   
+
       filter2D(src, dst, ddepth , kernel, anchor, delta, BORDER_DEFAULT );
 
    The arguments denote:
diff --git a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
index c4a972bf31..628841d768 100644
--- a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
+++ b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.rst
@@ -17,16 +17,16 @@ Hough Circle Transform
 
 * The Hough Circle Transform works in a *roughly* analogous way to the Hough Line Transform explained in the previous tutorial.
 * In the line detection case, a line was defined by two parameters :math:`(r, \theta)`. In the circle case, we need three parameters to define a circle:
- 
+
   .. math::
-     
-     C : ( x_{center}, y_{center}, r ) 
+
+     C : ( x_{center}, y_{center}, r )
 
   where :math:`(x_{center}, y_{center})` define the center position (gree point) and :math:`r` is the radius, which allows us to completely define a circle, as it can be seen below:
 
   .. image:: images/Hough_Circle_Tutorial_Theory_0.jpg
           :alt: Result of detecting circles with Hough Transform
-          :align: center 
+          :align: center
 
 * For sake of efficiency, OpenCV implements a detection method slightly trickier than the standard Hough Transform: *The Hough gradient method*. For more details, please check the book *Learning OpenCV* or your favorite Computer Vision bibliography
 
@@ -34,19 +34,19 @@ Code
 ======
 
 #. **What does this program do?**
- 
+
    * Loads an image and blur it to reduce the noise
-   * Applies the *Hough Circle Transform* to the blurred image . 
+   * Applies the *Hough Circle Transform* to the blurred image .
    * Display the detected circle in a window.
 
    .. |TutorialHoughCirclesSimpleDownload| replace:: here
-   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/houghlines.cpp
+   .. _TutorialHoughCirclesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghlines.cpp
    .. |TutorialHoughCirclesFancyDownload| replace:: here
-   .. _TutorialHoughCirclesFancyDownload: http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp
+   .. _TutorialHoughCirclesFancyDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp
 
 #. The sample code that we will explain can be downloaded from |TutorialHoughCirclesSimpleDownload|_. A slightly fancier version (which shows both Hough standard and probabilistic with trackbars for changing the threshold values) can be found |TutorialHoughCirclesFancyDownload|_.
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -66,7 +66,7 @@ Code
      if( !src.data )
        { return -1; }
 
-     /// Convert it to gray 
+     /// Convert it to gray
      cvtColor( src, src_gray, CV_BGR2GRAY );
 
      /// Reduce the noise so we avoid false circle detection
@@ -88,7 +88,7 @@ Code
          circle( src, center, radius, Scalar(0,0,255), 3, 8, 0 );
       }
 
-     /// Show your results 
+     /// Show your results
      namedWindow( "Hough Circle Transform Demo", CV_WINDOW_AUTOSIZE );
      imshow( "Hough Circle Transform Demo", src );
 
@@ -117,7 +117,7 @@ Explanation
       cvtColor( src, src_gray, CV_BGR2GRAY );
 
 #. Apply a Gaussian blur to reduce noise and avoid false circle detection:
-    
+
    .. code-block::  cpp
 
       GaussianBlur( src_gray, src_gray, Size(9, 9), 2, 2 );
@@ -138,10 +138,10 @@ Explanation
    * *dp = 1*: The inverse ratio of resolution
    * *min_dist = src_gray.rows/8*: Minimum distance between detected centers
    * *param_1 = 200*: Upper threshold for the internal Canny edge detector
-   * *param_2* = 100*: Threshold for center detection. 
-   * *min_radius = 0*: Minimum radio to be detected. If unknown, put zero as default. 
+   * *param_2* = 100*: Threshold for center detection.
+   * *min_radius = 0*: Minimum radio to be detected. If unknown, put zero as default.
    * *max_radius = 0*: Maximum radius to be detected. If unknown, put zero as default
-   
+
 #. Draw the detected circles:
 
    .. code-block:: cpp
@@ -154,14 +154,14 @@ Explanation
          circle( src, center, 3, Scalar(0,255,0), -1, 8, 0 );
          // circle outline
          circle( src, center, radius, Scalar(0,0,255), 3, 8, 0 );
-       }	
+       }
 
    You can see that we will draw the circle(s) on red and the center(s) with a small green dot
 
 #. Display the detected circle(s):
 
    .. code-block:: cpp
-   
+
       namedWindow( "Hough Circle Transform Demo", CV_WINDOW_AUTOSIZE );
       imshow( "Hough Circle Transform Demo", src );
 
@@ -175,8 +175,8 @@ Explanation
 Result
 =======
 
-The result of running the code above with a test image is shown below: 
+The result of running the code above with a test image is shown below:
 
 .. image:: images/Hough_Circle_Tutorial_Result.jpg
    :alt: Result of detecting circles with Hough Transform
-   :align: center 
+   :align: center
diff --git a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst
index 48aae54918..dfb57c03c7 100644
--- a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst
+++ b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.rst
@@ -9,7 +9,7 @@ Goal
 In this tutorial you will learn how to:
 
 * Use the OpenCV functions :hough_lines:`HoughLines <>` and :hough_lines_p:`HoughLinesP <>` to detect lines in an image.
-  
+
 Theory
 =======
 
@@ -18,60 +18,60 @@ Theory
 
 Hough Line Transform
 ---------------------
-#. The Hough Line Transform is a transform used to detect straight lines. 
+#. The Hough Line Transform is a transform used to detect straight lines.
 #. To apply the Transform, first an edge detection pre-processing is desirable.
 
 How does it work?
 ^^^^^^^^^^^^^^^^^^
 
 #. As you know, a line in the image space can be expressed with two variables. For example:
- 
+
    a. In the **Cartesian coordinate system:**  Parameters: :math:`(m,b)`.
    b. In the **Polar coordinate system:** Parameters: :math:`(r,\theta)`
 
    .. image:: images/Hough_Lines_Tutorial_Theory_0.jpg
       :alt: Line variables
-      :align: center 
+      :align: center
 
-   For Hough Transforms, we will express lines in the *Polar system*. Hence, a line equation can be written as: 
+   For Hough Transforms, we will express lines in the *Polar system*. Hence, a line equation can be written as:
 
    .. math::
 
-      y = \left ( -\dfrac{\cos \theta}{\sin \theta} \right ) x + \left ( \dfrac{r}{\sin \theta} \right ) 
+      y = \left ( -\dfrac{\cos \theta}{\sin \theta} \right ) x + \left ( \dfrac{r}{\sin \theta} \right )
 
   Arranging the terms: :math:`r = x \cos \theta + y \sin \theta`
 
 #. In general for each point :math:`(x_{0}, y_{0})`, we can define the family of lines that goes through that point as:
 
    .. math::
-   
+
       r_{\theta} = x_{0} \cdot \cos \theta  + y_{0} \cdot \sin \theta
 
-   Meaning that each pair :math:`(r_{\theta},\theta)` represents each line that passes by :math:`(x_{0}, y_{0})`. 
+   Meaning that each pair :math:`(r_{\theta},\theta)` represents each line that passes by :math:`(x_{0}, y_{0})`.
 
 #. If for a given :math:`(x_{0}, y_{0})` we plot the family of lines that goes through it, we get a sinusoid. For instance, for :math:`x_{0} = 8` and :math:`y_{0} = 6` we get the following plot (in a plane :math:`\theta` - :math:`r`):
 
    .. image:: images/Hough_Lines_Tutorial_Theory_1.jpg
       :alt: Polar plot of a the family of lines of a point
-      :align: center 
+      :align: center
 
-   We consider only points such that :math:`r > 0` and :math:`0< \theta < 2 \pi`. 
+   We consider only points such that :math:`r > 0` and :math:`0< \theta < 2 \pi`.
 
 #. We can do the same operation above for all the points in an image. If the curves of two different points intersect in the plane :math:`\theta` - :math:`r`, that means that both points belong to a same line. For instance, following with the example above and drawing the plot for two more points: :math:`x_{1} = 9`, :math:`y_{1} = 4` and :math:`x_{2} = 12`, :math:`y_{2} = 3`, we get:
 
    .. image:: images/Hough_Lines_Tutorial_Theory_2.jpg
       :alt: Polar plot of the family of lines for three points
-      :align: center 
+      :align: center
 
-   The three plots intersect in one single point :math:`(0.925, 9.6)`, these coordinates are the parameters (:math:`\theta, r`) or the line in which :math:`(x_{0}, y_{0})`, :math:`(x_{1}, y_{1})` and :math:`(x_{2}, y_{2})` lay. 
+   The three plots intersect in one single point :math:`(0.925, 9.6)`, these coordinates are the parameters (:math:`\theta, r`) or the line in which :math:`(x_{0}, y_{0})`, :math:`(x_{1}, y_{1})` and :math:`(x_{2}, y_{2})` lay.
 
 #. What does all the stuff above mean? It means that in general, a line can be *detected* by finding the number of intersections between curves.The more curves intersecting means that the line represented by that intersection have more points. In general, we can define a *threshold* of the minimum number of intersections needed to *detect* a line.
- 
+
 #. This is what the Hough Line Transform does. It keeps track of the intersection between curves of every point in the image. If the number of intersections is above some *threshold*, then it declares it as a line with the parameters :math:`(\theta, r_{\theta})` of the intersection point.
 
 Standard and Probabilistic Hough Line Transform
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-OpenCV implements two kind of Hough Line Transforms: 
+OpenCV implements two kind of Hough Line Transforms:
 
 a. **The Standard Hough Transform**
 
@@ -88,21 +88,21 @@ b. **The Probabilistic Hough Line Transform**
 Code
 ======
 
-.. |TutorialHoughLinesSimpleDownload| replace:: here
-.. _TutorialHoughLinesSimpleDownload: http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/houghlines.cpp
-.. |TutorialHoughLinesFancyDownload| replace:: here
-.. _TutorialHoughLinesFancyDownload: http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp
+.. |TutorialHoughLinesSimpleDownload| replace:: here
+.. _TutorialHoughLinesSimpleDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/houghlines.cpp
+.. |TutorialHoughLinesFancyDownload| replace:: here
+.. _TutorialHoughLinesFancyDownload: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp
 
 
 #. **What does this program do?**
- 
+
    * Loads an image
-   * Applies either a *Standard Hough Line Transform* or a *Probabilistic Line Transform*. 
+   * Applies either a *Standard Hough Line Transform* or a *Probabilistic Line Transform*.
    * Display the original image and the detected line in two windows.
 
 #. The sample code that we will explain can be downloaded from  |TutorialHoughLinesSimpleDownload|_. A slightly fancier version (which shows both Hough standard and probabilistic with trackbars for changing the threshold values) can be found  |TutorialHoughLinesFancyDownload|_.
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -207,9 +207,9 @@ Explanation
       * *rho* : The resolution of the parameter :math:`r` in pixels. We use **1** pixel.
       * *theta*: The resolution of the parameter :math:`\theta` in radians. We use **1 degree** (CV_PI/180)
       * *threshold*: The minimum number of intersections to "*detect*" a line
-      * *srn* and *stn*: Default parameters to zero. Check OpenCV reference for more info. 
+      * *srn* and *stn*: Default parameters to zero. Check OpenCV reference for more info.
 
-   b. And then you display the result by drawing the lines. 
+   b. And then you display the result by drawing the lines.
 
       .. code-block:: cpp
 
@@ -236,14 +236,14 @@ Explanation
          HoughLinesP(dst, lines, 1, CV_PI/180, 50, 50, 10 );
 
       with the arguments:
- 
+
       * *dst*: Output of the edge detector. It should be a grayscale image (although in fact it is a binary one)
       * *lines*: A vector that will store the parameters :math:`(x_{start}, y_{start}, x_{end}, y_{end})` of the detected lines
       * *rho* : The resolution of the parameter :math:`r` in pixels. We use **1** pixel.
       * *theta*: The resolution of the parameter :math:`\theta` in radians. We use **1 degree** (CV_PI/180)
       * *threshold*: The minimum number of intersections to "*detect*" a line
-      * *minLinLength*: The minimum number of points that can form a line. Lines with less than this number of points are disregarded. 
-      * *maxLineGap*: The maximum gap between two points to be considered in the same line. 
+      * *minLinLength*: The minimum number of points that can form a line. Lines with less than this number of points are disregarded.
+      * *maxLineGap*: The maximum gap between two points to be considered in the same line.
 
    b. And then you display the result by drawing the lines.
 
@@ -256,7 +256,7 @@ Explanation
          }
 
 
-#. Display the original image and the detected lines: 
+#. Display the original image and the detected lines:
 
    .. code-block:: cpp
 
@@ -274,20 +274,20 @@ Result
 =======
 
 .. note::
-  
+
    The results below are obtained using the slightly fancier version we mentioned in the *Code* section. It still implements the same stuff as above, only adding the Trackbar for the Threshold.
 
 Using an input image such as:
 
 .. image:: images/Hough_Lines_Tutorial_Original_Image.jpg
    :alt: Result of detecting lines with Hough Transform
-   :align: center 
- 
+   :align: center
+
 We get the following result by using the Probabilistic Hough Line Transform:
 
 .. image:: images/Hough_Lines_Tutorial_Result.jpg
    :alt: Result of detecting lines with Hough Transform
-   :align: center 
+   :align: center
 
-You may observe that the number of lines detected vary while you change the *threshold*. The explanation is sort of evident: If you establish a higher threshold, fewer lines will be detected (since you will need more points to declare a line detected). 
+You may observe that the number of lines detected vary while you change the *threshold*. The explanation is sort of evident: If you establish a higher threshold, fewer lines will be detected (since you will need more points to declare a line detected).
 
diff --git a/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.rst b/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.rst
index fca569088a..da9373201e 100644
--- a/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.rst
+++ b/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.rst
@@ -36,7 +36,7 @@ Laplacian Operator
 -------------------
 
 #. From the explanation above, we deduce that the second derivative can be used to *detect edges*. Since images are "*2D*", we would need to take the derivative in both dimensions. Here, the Laplacian operator comes handy.
- 
+
 #. The *Laplacian operator* is defined by:
 
   .. math::
@@ -49,13 +49,13 @@ Code
 ======
 
 #. **What does this program do?**
- 
+
    * Loads an image
-   * Remove noise by applying a Gaussian blur and then convert the original image to grayscale 
+   * Remove noise by applying a Gaussian blur and then convert the original image to grayscale
    * Applies a Laplacian operator to the grayscale image and stores the output image
    * Display the result in a window
 
-#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp>`_
+#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp>`_
 
 .. code-block:: cpp
 
@@ -70,7 +70,7 @@ Code
    int main( int argc, char** argv )
    {
      Mat src, src_gray, dst;
-     int kernel_size = 3; 
+     int kernel_size = 3;
      int scale = 1;
      int delta = 0;
      int ddepth = CV_16S;
@@ -116,7 +116,7 @@ Explanation
    .. code-block:: cpp
 
       Mat src, src_gray, dst;
-      int kernel_size = 3; 
+      int kernel_size = 3;
       int scale = 1;
       int delta = 0;
       int ddepth = CV_16S;
@@ -136,7 +136,7 @@ Explanation
    .. code-block:: cpp
 
       GaussianBlur( src, src, Size(3,3), 0, 0, BORDER_DEFAULT );
- 
+
 #. Convert the image to grayscale using :cvt_color:`cvtColor <>`
 
    .. code-block:: cpp
diff --git a/doc/tutorials/imgproc/imgtrans/remap/remap.rst b/doc/tutorials/imgproc/imgtrans/remap/remap.rst
index 191ffcca38..27eef0d116 100644
--- a/doc/tutorials/imgproc/imgtrans/remap/remap.rst
+++ b/doc/tutorials/imgproc/imgtrans/remap/remap.rst
@@ -16,14 +16,14 @@ Theory
 What is remapping?
 ------------------
 
-* It is the process of taking pixels from one place in the image and locating them in  another position in a new image. 
+* It is the process of taking pixels from one place in the image and locating them in  another position in a new image.
 
 * To accomplish the mapping process, it might be necessary to do some interpolation for non-integer pixel locations, since there will not always be a one-to-one-pixel correspondence between source and destination images.
 
 * We can express the remap for every pixel location :math:`(x,y)` as:
 
   .. math::
-  
+
      g(x,y) = f ( h(x,y) )
 
   where :math:`g()` is the remapped image, :math:`f()` the source image and :math:`h(x,y)` is the mapping function that operates on :math:`(x,y)`.
@@ -34,7 +34,7 @@ What is remapping?
 
      h(x,y) = (I.cols - x, y )
 
-  What would happen? It is easily seen that the image would flip in the :math:`x` direction. For instance,  consider the input image: 
+  What would happen? It is easily seen that the image would flip in the :math:`x` direction. For instance,  consider the input image:
 
   .. image:: images/Remap_Tutorial_Theory_0.jpg
            :alt: Original test image
@@ -54,12 +54,12 @@ Code
 ====
 
 #. **What does this program do?**
- 
+
    * Loads an image
    * Each second, apply 1 of 4 different remapping processes to the image and display them indefinitely in a window.
    * Wait for the user to exit the program
- 
-#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp>`_
+
+#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp>`_
 
 .. code-block:: cpp
 
@@ -91,7 +91,7 @@ Code
     dst.create( src.size(), src.type() );
     map_x.create( src.size(), CV_32FC1 );
     map_y.create( src.size(), CV_32FC1 );
-  
+
     /// Create window
     namedWindow( remap_window, CV_WINDOW_AUTOSIZE );
 
@@ -106,7 +106,7 @@ Code
 
       /// Update map_x & map_y. Then apply remap
       update_map();
-      remap( src, dst, map_x, map_y, CV_INTER_LINEAR, BORDER_CONSTANT, Scalar(0,0, 0) ); 
+      remap( src, dst, map_x, map_y, CV_INTER_LINEAR, BORDER_CONSTANT, Scalar(0,0, 0) );
 
       /// Display results
       imshow( remap_window, dst );
@@ -126,7 +126,7 @@ Code
      { for( int i = 0; i < src.cols; i++ )
  	 {
            switch( ind )
-	   {   
+	   {
 	     case 0:
 	       if( i > src.cols*0.25 && i < src.cols*0.75 && j > src.rows*0.25 && j < src.rows*0.75 )
                  {
@@ -169,7 +169,7 @@ Explanation
       int ind = 0;
 
 #. Load an image:
-   
+
    .. code-block:: cpp
 
       src = imread( argv[1], 1 );
@@ -181,7 +181,7 @@ Explanation
       dst.create( src.size(), src.type() );
       map_x.create( src.size(), CV_32FC1 );
       map_y.create( src.size(), CV_32FC1 );
-  
+
 #. Create a window to  display results
 
    .. code-block:: cpp
@@ -189,7 +189,7 @@ Explanation
       namedWindow( remap_window, CV_WINDOW_AUTOSIZE );
 
 #. Establish a loop. Each 1000 ms we update our mapping matrices (*mat_x* and *mat_y*) and apply them to our source image:
-   
+
    .. code-block:: cpp
 
       while( true )
@@ -202,19 +202,19 @@ Explanation
 
         /// Update map_x & map_y. Then apply remap
         update_map();
-        remap( src, dst, map_x, map_y, CV_INTER_LINEAR, BORDER_CONSTANT, Scalar(0,0, 0) ); 
+        remap( src, dst, map_x, map_y, CV_INTER_LINEAR, BORDER_CONSTANT, Scalar(0,0, 0) );
 
         /// Display results
         imshow( remap_window, dst );
       }
 
    The function that applies the remapping is :remap:`remap <>`. We give the following arguments:
- 
+
    * **src**: Source image
    * **dst**: Destination image of same size as *src*
    * **map_x**: The mapping function in the x direction. It is equivalent to the first component of :math:`h(i,j)`
    * **map_y**: Same as above, but in y direction. Note that *map_y* and *map_x* are both of the same size as *src*
-   * **CV_INTER_LINEAR**: The type of interpolation to use for non-integer pixels. This is by default. 
+   * **CV_INTER_LINEAR**: The type of interpolation to use for non-integer pixels. This is by default.
    * **BORDER_CONSTANT**: Default
 
    How do we update our mapping matrices *mat_x* and *mat_y*? Go on reading:
@@ -225,25 +225,25 @@ Explanation
 
       .. math::
 
-         h(i,j) = ( 2*i - src.cols/2  + 0.5, 2*j - src.rows/2  + 0.5) 
+         h(i,j) = ( 2*i - src.cols/2  + 0.5, 2*j - src.rows/2  + 0.5)
+
+      for all pairs :math:`(i,j)` such that: :math:`\dfrac{src.cols}{4}<i<\dfrac{3 \cdot src.cols}{4}`  and  :math:`\dfrac{src.rows}{4}<j<\dfrac{3 \cdot src.rows}{4}`
 
-      for all pairs :math:`(i,j)` such that: :math:`\dfrac{src.cols}{4}<i<\dfrac{3 \cdot src.cols}{4}`  and  :math:`\dfrac{src.rows}{4}<j<\dfrac{3 \cdot src.rows}{4}`		
- 
    b. Turn the image upside down: :math:`h( i, j ) = (i, src.rows - j)`
-       
+
    c. Reflect the image from left to right: :math:`h(i,j) = ( src.cols - i, j )`
 
    d. Combination of b and c: :math:`h(i,j) = ( src.cols - i, src.rows - j )`
 
   This is expressed in the following snippet. Here, *map_x* represents the first coordinate of *h(i,j)* and *map_y* the second coordinate.
-  
+
   .. code-block:: cpp
 
      for( int j = 0; j < src.rows; j++ )
      { for( int i = 0; i < src.cols; i++ )
  	 {
            switch( ind )
-	   {   
+	   {
 	     case 0:
 	       if( i > src.cols*0.25 && i < src.cols*0.75 && j > src.rows*0.25 && j < src.rows*0.75 )
                  {
@@ -292,7 +292,7 @@ Result
             :align: center
 
 #. Turning it upside down:
- 
+
    .. image:: images/Remap_Tutorial_Result_1.jpg
             :alt: Result 0 for remapping
             :width: 250pt
diff --git a/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.rst b/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.rst
index 9c3827dfaa..625ca160dd 100644
--- a/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.rst
+++ b/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.rst
@@ -12,8 +12,8 @@ In this tutorial you will learn how to:
 .. container:: enumeratevisibleitemswithsquare
 
    * Use the OpenCV function :sobel:`Sobel <>` to calculate the derivatives from an image.
-   * Use the OpenCV function :scharr:`Scharr <>` to calculate a more accurate derivative for a kernel of size :math:`3 \cdot 3`  
-  
+   * Use the OpenCV function :scharr:`Scharr <>` to calculate a more accurate derivative for a kernel of size :math:`3 \cdot 3`
+
 Theory
 ========
 
@@ -29,8 +29,8 @@ Theory
    .. image:: images/Sobel_Derivatives_Tutorial_Theory_0.jpg
            :alt: How intensity changes in an edge
            :align: center
- 
-   You can easily notice that in an *edge*, the pixel intensity *changes* in a notorious way. A good way to express *changes* is by using *derivatives*. A high change in gradient indicates a major change in the image. 
+
+   You can easily notice that in an *edge*, the pixel intensity *changes* in a notorious way. A good way to express *changes* is by using *derivatives*. A high change in gradient indicates a major change in the image.
 
 #. To be more graphical, let's assume we have a 1D-image. An edge is shown by the "jump" in intensity in the plot below:
 
@@ -51,9 +51,9 @@ Theory
 Sobel Operator
 ---------------
 
-#. The Sobel Operator is a discrete differentiation operator. It computes an approximation of the gradient of an image intensity function. 
+#. The Sobel Operator is a discrete differentiation operator. It computes an approximation of the gradient of an image intensity function.
 
-#. The Sobel Operator combines Gaussian smoothing and differentiation.  
+#. The Sobel Operator combines Gaussian smoothing and differentiation.
 
 Formulation
 ^^^^^^^^^^^^
@@ -64,21 +64,21 @@ Assuming that the image to  be operated is :math:`I`:
    a. **Horizontal changes**: This is computed by convolving :math:`I` with a kernel :math:`G_{x}` with odd size. For example for a kernel size of 3, :math:`G_{x}` would be computed as:
 
       .. math::
-   
+
          G_{x} = \begin{bmatrix}
          -1 & 0 & +1  \\
          -2 & 0 & +2  \\
-         -1 & 0 & +1 
+         -1 & 0 & +1
          \end{bmatrix} * I
 
    b. **Vertical changes**: This is computed by convolving :math:`I` with a kernel :math:`G_{y}` with odd size. For example for a kernel size of 3, :math:`G_{y}` would be computed as:
 
       .. math::
-   
+
          G_{y} = \begin{bmatrix}
          -1 & -2 & -1  \\
          0 & 0 & 0  \\
-         +1 & +2 & +1 
+         +1 & +2 & +1
          \end{bmatrix} * I
 
 #. At each point of the image we calculate an approximation of the *gradient* in that point by combining both results above:
@@ -90,7 +90,7 @@ Assuming that the image to  be operated is :math:`I`:
    Although sometimes the following simpler equation is used:
 
    .. math::
-      
+
       G = |G_{x}| + |G_{y}|
 
 
@@ -103,14 +103,14 @@ Assuming that the image to  be operated is :math:`I`:
          G_{x} = \begin{bmatrix}
          -3 & 0 & +3  \\
          -10 & 0 & +10  \\
-         -3 & 0 & +3 
-         \end{bmatrix} 
-   
+         -3 & 0 & +3
+         \end{bmatrix}
+
          G_{y} = \begin{bmatrix}
          -3 & -10 & -3  \\
          0 & 0 & 0  \\
-         +3 & +10 & +3 
-         \end{bmatrix} 
+         +3 & +10 & +3
+         \end{bmatrix}
 
   You can check out more information of this function in the OpenCV reference (:scharr:`Scharr <>`). Also, in the sample code below, you will notice that above the code for :sobel:`Sobel <>` function there is also code for the :scharr:`Scharr <>` function commented. Uncommenting it (and obviously commenting the Sobel stuff) should give you an idea of how this function works.
 
@@ -118,12 +118,12 @@ Code
 =====
 
 #. **What does this program do?**
- 
-   * Applies the *Sobel Operator* and generates as output an image with the detected *edges* bright on a darker background.
- 
-#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp>`_
 
-.. code-block:: cpp 
+   * Applies the *Sobel Operator* and generates as output an image with the detected *edges* bright on a darker background.
+
+#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp>`_
+
+.. code-block:: cpp
 
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -137,7 +137,7 @@ Code
    {
 
      Mat src, src_gray;
-     Mat grad; 
+     Mat grad;
      char* window_name = "Sobel Demo - Simple Edge Detector";
      int scale = 1;
      int delta = 0;
@@ -162,15 +162,15 @@ Code
      /// Generate grad_x and grad_y
      Mat grad_x, grad_y;
      Mat abs_grad_x, abs_grad_y;
- 
+
      /// Gradient X
      //Scharr( src_gray, grad_x, ddepth, 1, 0, scale, delta, BORDER_DEFAULT );
-     Sobel( src_gray, grad_x, ddepth, 1, 0, 3, scale, delta, BORDER_DEFAULT );   
+     Sobel( src_gray, grad_x, ddepth, 1, 0, 3, scale, delta, BORDER_DEFAULT );
      convertScaleAbs( grad_x, abs_grad_x );
 
-     /// Gradient Y  
+     /// Gradient Y
      //Scharr( src_gray, grad_y, ddepth, 0, 1, scale, delta, BORDER_DEFAULT );
-     Sobel( src_gray, grad_y, ddepth, 0, 1, 3, scale, delta, BORDER_DEFAULT );   
+     Sobel( src_gray, grad_y, ddepth, 0, 1, 3, scale, delta, BORDER_DEFAULT );
      convertScaleAbs( grad_y, abs_grad_y );
 
      /// Total Gradient (approximate)
@@ -192,7 +192,7 @@ Explanation
    ..  code-block:: cpp
 
        Mat src, src_gray;
-       Mat grad; 
+       Mat grad;
        char* window_name = "Sobel Demo - Simple Edge Detector";
        int scale = 1;
        int delta = 0;
@@ -203,12 +203,12 @@ Explanation
    .. code-block:: cpp
 
      src = imread( argv[1] );
-  
+
      if( !src.data )
      { return -1; }
 
 #. First, we apply a :gaussian_blur:`GaussianBlur <>` to our image to reduce the noise ( kernel size = 3 )
- 
+
    .. code-block:: cpp
 
       GaussianBlur( src, src, Size(3,3), 0, 0, BORDER_DEFAULT );
@@ -220,27 +220,27 @@ Explanation
       cvtColor( src, src_gray, CV_RGB2GRAY );
 
 #. Second, we calculate the "*derivatives*" in *x* and *y* directions. For this, we use the function :sobel:`Sobel <>` as shown below:
- 
+
    .. code-block:: cpp
 
       Mat grad_x, grad_y;
       Mat abs_grad_x, abs_grad_y;
- 
+
       /// Gradient X
-      Sobel( src_gray, grad_x, ddepth, 1, 0, 3, scale, delta, BORDER_DEFAULT );   
-      /// Gradient Y  
-      Sobel( src_gray, grad_y, ddepth, 0, 1, 3, scale, delta, BORDER_DEFAULT );   
+      Sobel( src_gray, grad_x, ddepth, 1, 0, 3, scale, delta, BORDER_DEFAULT );
+      /// Gradient Y
+      Sobel( src_gray, grad_y, ddepth, 0, 1, 3, scale, delta, BORDER_DEFAULT );
 
    The function takes the following arguments:
 
-   * *src_gray*: In our example, the input image. Here it is *CV_8U* 
-   * *grad_x*/*grad_y*: The output image. 
+   * *src_gray*: In our example, the input image. Here it is *CV_8U*
+   * *grad_x*/*grad_y*: The output image.
    * *ddepth*: The depth of the output image. We set it to *CV_16S* to avoid overflow.
-   * *x_order*: The order of the derivative in **x** direction. 
-   * *y_order*: The order of the derivative in **y** direction. 
+   * *x_order*: The order of the derivative in **x** direction.
+   * *y_order*: The order of the derivative in **y** direction.
    * *scale*, *delta* and *BORDER_DEFAULT*: We use default values.
 
-   Notice that to calculate the gradient in *x* direction we use: :math:`x_{order}= 1` and :math:`y_{order} = 0`. We do analogously for the *y* direction. 
+   Notice that to calculate the gradient in *x* direction we use: :math:`x_{order}= 1` and :math:`y_{order} = 0`. We do analogously for the *y* direction.
 
 #. We convert our partial results back to *CV_8U*:
 
@@ -248,7 +248,7 @@ Explanation
 
       convertScaleAbs( grad_x, abs_grad_x );
       convertScaleAbs( grad_y, abs_grad_y );
- 
+
 
 #. Finally, we try to approximate the *gradient* by adding both directional gradients (note that this is not an exact calculation at all! but it is good for our purposes).
 
@@ -268,7 +268,7 @@ Results
 ========
 
 #. Here is the output of applying our basic detector to *lena.jpg*:
-   
+
 
    .. image:: images/Sobel_Derivatives_Tutorial_Result.jpg
            :alt: Result of applying Sobel operator to lena.jpg
diff --git a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst
index f90bd47a0b..8c08d22e42 100644
--- a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst
+++ b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.rst
@@ -19,49 +19,49 @@ Theory
 What is an Affine Transformation?
 ----------------------------------
 
-#. It is any transformation that can be expressed in the form of a *matrix multiplication* (linear transformation) followed by a *vector addition* (translation). 
+#. It is any transformation that can be expressed in the form of a *matrix multiplication* (linear transformation) followed by a *vector addition* (translation).
 
-#. From the above, We can use an Affine Transformation to express: 
+#. From the above, We can use an Affine Transformation to express:
 
    a. Rotations (linear transformation)
    b. Translations (vector addition)
    c. Scale operations (linear transformation)
 
-   you can see that, in essence, an Affine Transformation represents a **relation** between two images. 
- 
-#. The usual way to represent an Affine Transform is by using a :math:`2 \times 3` matrix. 
+   you can see that, in essence, an Affine Transformation represents a **relation** between two images.
 
-   .. math:: 
+#. The usual way to represent an Affine Transform is by using a :math:`2 \times 3` matrix.
+
+   .. math::
 
       A = \begin{bmatrix}
-          a_{00} & a_{01} \\ 
+          a_{00} & a_{01} \\
           a_{10} & a_{11}
           \end{bmatrix}_{2 \times 2}
       B = \begin{bmatrix}
-          b_{00} \\ 
+          b_{00} \\
           b_{10}
           \end{bmatrix}_{2 \times 1}
-  
+
       M = \begin{bmatrix}
-          A & B 
+          A & B
           \end{bmatrix}
-      = 
+      =
      \begin{bmatrix}
-          a_{00} & a_{01} & b_{00} \\ 
-          a_{10} & a_{11} & b_{10} 
+          a_{00} & a_{01} & b_{00} \\
+          a_{10} & a_{11} & b_{10}
      \end{bmatrix}_{2 \times 3}
 
    Considering that we want to transform a 2D vector :math:`X = \begin{bmatrix}x \\ y\end{bmatrix}` by using :math:`A` and :math:`B`, we can do it equivalently with:
 
- 
+
    :math:`T = A \cdot \begin{bmatrix}x \\ y\end{bmatrix} + B` or   :math:`T = M \cdot  [x, y, 1]^{T}`
 
    .. math::
 
       T =  \begin{bmatrix}
-          a_{00}x + a_{01}y + b_{00} \\ 
+          a_{00}x + a_{01}y + b_{00} \\
           a_{10}x + a_{11}y + b_{10}
-          \end{bmatrix}  
+          \end{bmatrix}
 
 
 How do we get an Affine Transformation?
@@ -80,20 +80,20 @@ How do we get an Affine Transformation?
             :width: 350pt
             :align: center
 
-   the points 1, 2 and 3 (forming a triangle in image 1) are mapped into image 2, still forming a triangle, but now they have changed notoriously. If we find the Affine Transformation with these 3 points (you can choose them as you like), then we can apply this found relation to the whole pixels in the image. 
- 
+   the points 1, 2 and 3 (forming a triangle in image 1) are mapped into image 2, still forming a triangle, but now they have changed notoriously. If we find the Affine Transformation with these 3 points (you can choose them as you like), then we can apply this found relation to the whole pixels in the image.
+
 
 Code
 ====
 
 #. **What does this program do?**
- 
+
    * Loads an image
    * Applies an Affine Transform to the image. This Transform is obtained from the relation between three points. We use the function :warp_affine:`warpAffine <>` for that purpose.
    * Applies a Rotation to the image after being transformed. This rotation is with respect to the image center
    * Waits until the user exits the program
- 
-#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgTrans/Geometric_Transforms_Demo.cpp>`_
+
+#. The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgTrans/Geometric_Transforms_Demo.cpp>`_
 
 .. code-block:: cpp
 
@@ -123,14 +123,14 @@ Code
       /// Load the image
       src = imread( argv[1], 1 );
 
-      /// Set the dst image the same type and size as src  
+      /// Set the dst image the same type and size as src
       warp_dst = Mat::zeros( src.rows, src.cols, src.type() );
 
       /// Set your 3 points to calculate the  Affine Transform
       srcTri[0] = Point2f( 0,0 );
       srcTri[1] = Point2f( src.cols - 1, 0 );
       srcTri[2] = Point2f( 0, src.rows - 1 );
-  
+
       dstTri[0] = Point2f( src.cols*0.0, src.rows*0.33 );
       dstTri[1] = Point2f( src.cols*0.85, src.rows*0.25 );
       dstTri[2] = Point2f( src.cols*0.15, src.rows*0.7 );
@@ -153,7 +153,7 @@ Code
 
       /// Rotate the warped image
       warpAffine( warp_dst, warp_rotate_dst, rot_mat, warp_dst.size() );
-  
+
       /// Show what you got
       namedWindow( source_window, CV_WINDOW_AUTOSIZE );
       imshow( source_window, src );
@@ -193,7 +193,7 @@ Explanation
 #. Initialize the destination image as having the same size and type as the source:
 
    .. code-block:: cpp
-  
+
       warp_dst = Mat::zeros( src.rows, src.cols, src.type() );
 
 #. **Affine Transform:** As we explained lines above, we need two sets of 3 points to derive the affine transform relation. Take a look:
@@ -203,11 +203,11 @@ Explanation
       srcTri[0] = Point2f( 0,0 );
       srcTri[1] = Point2f( src.cols - 1, 0 );
       srcTri[2] = Point2f( 0, src.rows - 1 );
-  
+
       dstTri[0] = Point2f( src.cols*0.0, src.rows*0.33 );
       dstTri[1] = Point2f( src.cols*0.85, src.rows*0.25 );
       dstTri[2] = Point2f( src.cols*0.15, src.rows*0.7 );
-   
+
    You may want to draw the points to make a better idea of how they change. Their locations are approximately the same as the ones depicted in the example figure (in the Theory section). You may note that the size and orientation of the triangle defined by the 3 points change.
 
 #. Armed with both sets of points, we calculate the Affine Transform by using OpenCV function :get_affine_transform:`getAffineTransform <>`:
@@ -264,7 +264,7 @@ Explanation
 #. Finally, we display our results in two windows plus the original image for good measure:
 
    .. code-block:: cpp
-  
+
       namedWindow( source_window, CV_WINDOW_AUTOSIZE );
       imshow( source_window, src );
 
@@ -292,7 +292,7 @@ Result
             :alt: Original image
             :width: 250pt
             :align: center
-   
+
    after applying the first Affine Transform we obtain:
 
    .. image:: images/Warp_Affine_Tutorial_Result_Warp.jpg
diff --git a/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst b/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst
index 658d42b74c..db96faa2da 100644
--- a/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst
+++ b/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.rst
@@ -11,8 +11,8 @@ In this tutorial you will learn how to:
 .. container:: enumeratevisibleitemswithsquare
 
    * Use the OpenCV function :morphology_ex:`morphologyEx <>` to apply Morphological Transformation such as:
-  
-     + Opening 
+
+     + Opening
      + Closing
      + Morphological Gradient
      + Top Hat
@@ -24,12 +24,12 @@ Theory
 .. note::
    The explanation below belongs to the book **Learning OpenCV** by Bradski and Kaehler.
 
-In the previous tutorial we covered two basic Morphology operations: 
+In the previous tutorial we covered two basic Morphology operations:
 
 .. container:: enumeratevisibleitemswithsquare
 
    * Erosion
-   * Dilation. 
+   * Dilation.
 
 Based on these two we can effectuate more sophisticated transformations to our images. Here we discuss briefly 05 operations offered by OpenCV:
 
@@ -39,7 +39,7 @@ Opening
 * It is obtained by the erosion of an image followed by a dilation.
 
   .. math::
-     
+
      dst = open( src, element) = dilate( erode( src, element ) )
 
 * Useful for removing small objects (it is assumed that the objects are bright on a dark foreground)
@@ -48,7 +48,7 @@ Opening
 
   .. image:: images/Morphology_2_Tutorial_Theory_Opening.png
      :alt: Opening
-     :align: center  
+     :align: center
 
 Closing
 ---------
@@ -56,14 +56,14 @@ Closing
 * It is obtained by the dilation of an image followed by an erosion.
 
   .. math::
-    
+
      dst = close( src, element ) = erode( dilate( src, element ) )
 
-* Useful to remove small holes (dark regions). 
+* Useful to remove small holes (dark regions).
 
   .. image:: images/Morphology_2_Tutorial_Theory_Closing.png
      :alt: Closing example
-     :align: center  
+     :align: center
 
 
 Morphological Gradient
@@ -79,7 +79,7 @@ Morphological Gradient
 
   .. image:: images/Morphology_2_Tutorial_Theory_Gradient.png
      :alt: Gradient
-     :align: center  
+     :align: center
 
 
 Top Hat
@@ -88,12 +88,12 @@ Top Hat
 * It is the difference between an input image and its opening.
 
   .. math::
-   
+
      dst = tophat( src, element ) = src - open( src, element )
 
   .. image:: images/Morphology_2_Tutorial_Theory_TopHat.png
      :alt: Top Hat
-     :align: center  
+     :align: center
 
 Black Hat
 ----------
@@ -101,19 +101,19 @@ Black Hat
 * It is the difference between the closing and its input image
 
   .. math::
- 
+
      dst = blackhat( src, element ) = close( src, element ) - src
 
   .. image:: images/Morphology_2_Tutorial_Theory_BlackHat.png
      :alt: Black Hat
-     :align: center    
+     :align: center
 
 Code
 ======
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -145,7 +145,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      if( !src.data )
      { return -1; }
-    
+
     /// Create window
     namedWindow( window_name, CV_WINDOW_AUTOSIZE );
 
@@ -153,12 +153,12 @@ This tutorial code's is shown lines below. You can also download it from `here <
     createTrackbar("Operator:\n 0: Opening - 1: Closing \n 2: Gradient - 3: Top Hat \n 4: Black Hat", window_name, &morph_operator, max_operator, Morphology_Operations );
 
     /// Create Trackbar to select kernel type
-    createTrackbar( "Element:\n 0: Rect - 1: Cross - 2: Ellipse", window_name, 
-		    &morph_elem, max_elem, 
+    createTrackbar( "Element:\n 0: Rect - 1: Cross - 2: Ellipse", window_name,
+		    &morph_elem, max_elem,
 		    Morphology_Operations );
 
     /// Create Trackbar to choose kernel size
-    createTrackbar( "Kernel size:\n 2n +1", window_name, 
+    createTrackbar( "Kernel size:\n 2n +1", window_name,
 		    &morph_size, max_kernel_size,
 		    Morphology_Operations );
 
@@ -169,7 +169,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
     return 0;
     }
 
-    /** 
+    /**
      * @function Morphology_Operations
      */
    void Morphology_Operations( int, void* )
@@ -177,11 +177,11 @@ This tutorial code's is shown lines below. You can also download it from `here <
      // Since MORPH_X : 2,3,4,5 and 6
      int operation = morph_operator + 2;
 
-     Mat element = getStructuringElement( morph_elem, Size( 2*morph_size + 1, 2*morph_size+1 ), Point( morph_size, morph_size ) ); 
+     Mat element = getStructuringElement( morph_elem, Size( 2*morph_size + 1, 2*morph_size+1 ), Point( morph_size, morph_size ) );
 
      /// Apply the specified morphology operation
      morphologyEx( src, dst, operation, element );
-     imshow( window_name, dst );  
+     imshow( window_name, dst );
      }
 
 
@@ -200,34 +200,34 @@ Explanation
 
        .. code-block:: cpp
 
-          createTrackbar("Operator:\n 0: Opening - 1: Closing \n 2: Gradient - 3: Top Hat \n 4: Black Hat", 
-                         window_name, &morph_operator, max_operator, 
+          createTrackbar("Operator:\n 0: Opening - 1: Closing \n 2: Gradient - 3: Top Hat \n 4: Black Hat",
+                         window_name, &morph_operator, max_operator,
                          Morphology_Operations );
 
 
 
-     * The second trackbar **"Element"** returns **morph_elem**, which indicates what kind of structure our kernel is: 
+     * The second trackbar **"Element"** returns **morph_elem**, which indicates what kind of structure our kernel is:
 
        .. code-block:: cpp
 
-          createTrackbar( "Element:\n 0: Rect - 1: Cross - 2: Ellipse", window_name, 
-		          &morph_elem, max_elem, 
+          createTrackbar( "Element:\n 0: Rect - 1: Cross - 2: Ellipse", window_name,
+		          &morph_elem, max_elem,
 		          Morphology_Operations );
 
      * The final trackbar **"Kernel Size"** returns the size of the kernel to be used (**morph_size**)
 
        .. code-block:: cpp
 
-          createTrackbar( "Kernel size:\n 2n +1", window_name, 
+          createTrackbar( "Kernel size:\n 2n +1", window_name,
 		          &morph_size, max_kernel_size,
 		          Morphology_Operations );
 
 
    * Every time we move any slider, the user's function **Morphology_Operations** will be called to effectuate a new morphology operation and it will update the output image based on the current trackbar values.
-  
+
      .. code-block:: cpp
 
-        /** 
+        /**
          * @function Morphology_Operations
          */
        void Morphology_Operations( int, void* )
@@ -235,11 +235,11 @@ Explanation
          // Since MORPH_X : 2,3,4,5 and 6
          int operation = morph_operator + 2;
 
-         Mat element = getStructuringElement( morph_elem, Size( 2*morph_size + 1, 2*morph_size+1 ), Point( morph_size, morph_size ) ); 
+         Mat element = getStructuringElement( morph_elem, Size( 2*morph_size + 1, 2*morph_size+1 ), Point( morph_size, morph_size ) );
 
          /// Apply the specified morphology operation
          morphologyEx( src, dst, operation, element );
-         imshow( window_name, dst );  
+         imshow( window_name, dst );
         }
 
 
@@ -259,11 +259,11 @@ Explanation
 
        ..  code-block:: cpp
 
-           int operation = morph_operator + 2;    
+           int operation = morph_operator + 2;
 
      * **element**: The kernel to be used. We use the function :get_structuring_element:`getStructuringElement <>` to define our own structure.
 
-   
+
 
 Results
 ========
@@ -272,11 +272,11 @@ Results
 
   .. image:: images/Morphology_2_Tutorial_Original_Image.jpg
      :alt: Morphology 2: Original image
-     :align: center 
+     :align: center
 
 * And here are two snapshots of the display window. The first picture shows the output after using the operator **Opening** with a cross kernel. The second picture (right side, shows the result of using a **Blackhat** operator with an ellipse kernel.
- 
+
   .. image:: images/Morphology_2_Tutorial_Cover.jpg
      :alt: Morphology 2: Result sample
-     :align: center 
+     :align: center
 
diff --git a/doc/tutorials/imgproc/pyramids/pyramids.rst b/doc/tutorials/imgproc/pyramids/pyramids.rst
index 413c0f5832..ee40bf72eb 100644
--- a/doc/tutorials/imgproc/pyramids/pyramids.rst
+++ b/doc/tutorials/imgproc/pyramids/pyramids.rst
@@ -11,7 +11,7 @@ In this tutorial you will learn how to:
 .. container:: enumeratevisibleitemswithsquare
 
    * Use the OpenCV functions :pyr_up:`pyrUp <>` and :pyr_down:`pyrDown <>` to downsample  or upsample a given image.
-  
+
 Theory
 =======
 
@@ -21,9 +21,9 @@ Theory
 .. container:: enumeratevisibleitemswithsquare
 
    * Usually we need to convert an image to a size different than its original. For this, there are two possible options:
-  
-     #. *Upsize* the image (zoom in) or 
-     #. *Downsize* it (zoom out). 
+
+     #. *Upsize* the image (zoom in) or
+     #. *Downsize* it (zoom out).
 
    * Although there is a *geometric transformation* function in OpenCV that -literally- resize an image (:resize:`resize <>`, which we will show in a future tutorial), in this section we analyze first the use of **Image Pyramids**, which are widely applied in a huge range of vision applications.
 
@@ -39,20 +39,20 @@ Image Pyramid
 
      * **Gaussian pyramid:** Used to downsample images
 
-     * **Laplacian pyramid:** Used to  reconstruct an upsampled image from an image lower in the pyramid (with less resolution) 
+     * **Laplacian pyramid:** Used to  reconstruct an upsampled image from an image lower in the pyramid (with less resolution)
 
    * In this tutorial we'll use the *Gaussian pyramid*.
 
 Gaussian Pyramid
 ^^^^^^^^^^^^^^^^^
 
-* Imagine the pyramid as a set of layers in which the higher the layer, the smaller the size. 
+* Imagine the pyramid as a set of layers in which the higher the layer, the smaller the size.
 
   .. image:: images/Pyramids_Tutorial_Pyramid_Theory.png
      :alt: Pyramid figure
-     :align: center 
+     :align: center
 
-* Every layer is numbered from bottom to top, so layer :math:`(i+1)` (denoted as :math:`G_{i+1}` is smaller than layer :math:`i` (:math:`G_{i}`). 
+* Every layer is numbered from bottom to top, so layer :math:`(i+1)` (denoted as :math:`G_{i+1}` is smaller than layer :math:`i` (:math:`G_{i}`).
 
 * To produce layer :math:`(i+1)` in the Gaussian pyramid, we do the following:
 
@@ -60,9 +60,9 @@ Gaussian Pyramid
 
     .. math::
 
-       \frac{1}{16} \begin{bmatrix} 1 & 4 & 6 & 4 & 1  \\ 4 & 16 & 24 & 16 & 4  \\ 6 & 24 & 36 & 24 & 6  \\ 4 & 16 & 24 & 16 & 4  \\ 1 & 4 & 6 & 4 & 1 \end{bmatrix} 
+       \frac{1}{16} \begin{bmatrix} 1 & 4 & 6 & 4 & 1  \\ 4 & 16 & 24 & 16 & 4  \\ 6 & 24 & 36 & 24 & 6  \\ 4 & 16 & 24 & 16 & 4  \\ 1 & 4 & 6 & 4 & 1 \end{bmatrix}
 
-  * Remove every even-numbered row and column. 
+  * Remove every even-numbered row and column.
 
 * You can easily notice that the resulting image will be exactly one-quarter the area of its predecessor. Iterating this process on the input image :math:`G_{0}` (original image) produces the entire pyramid.
 
@@ -72,7 +72,7 @@ Gaussian Pyramid
 
   * Perform a convolution with the same kernel shown above (multiplied by 4) to approximate the values of the "missing pixels"
 
-* These two procedures (downsampling and upsampling as explained above) are implemented by the OpenCV functions :pyr_up:`pyrUp <>` and :pyr_down:`pyrDown <>`, as we will see in an example with the code below: 
+* These two procedures (downsampling and upsampling as explained above) are implemented by the OpenCV functions :pyr_up:`pyrUp <>` and :pyr_down:`pyrDown <>`, as we will see in an example with the code below:
 
 .. note::
    When we reduce the size of an image, we are actually *losing* information of the image.
@@ -80,9 +80,9 @@ Gaussian Pyramid
 Code
 ======
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -115,7 +115,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
        { printf(" No data! -- Exiting the program \n");
          return -1; }
 
-     tmp = src;  
+     tmp = src;
      dst = tmp;
 
      /// Create window
@@ -124,7 +124,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      /// Loop
      while( true )
-     { 
+     {
        int c;
        c = waitKey(10);
 
@@ -132,7 +132,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
        	 { break; }
        if( (char)c == 'u' )
          { pyrUp( tmp, dst, Size( tmp.cols*2, tmp.rows*2 ) );
-           printf( "** Zoom In: Image x 2 \n" ); 
+           printf( "** Zoom In: Image x 2 \n" );
          }
        else if( (char)c == 'd' )
         { pyrDown( tmp, dst, Size( tmp.cols/2, tmp.rows/2 ) );
@@ -141,7 +141,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
        imshow( window_name, dst );
        tmp = dst;
-     }      
+     }
      return 0;
    }
 
@@ -160,13 +160,13 @@ Explanation
           { printf(" No data! -- Exiting the program \n");
             return -1; }
 
-   * Create a Mat object to store the result of the operations (*dst*) and one to save temporal results (*tmp*). 
+   * Create a Mat object to store the result of the operations (*dst*) and one to save temporal results (*tmp*).
 
      .. code-block:: cpp
-  
+
         Mat src, dst, tmp;
         /* ... */
-        tmp = src;  
+        tmp = src;
         dst = tmp;
 
 
@@ -183,7 +183,7 @@ Explanation
      .. code-block:: cpp
 
         while( true )
-        { 
+        {
           int c;
           c = waitKey(10);
 
@@ -191,7 +191,7 @@ Explanation
        	    { break; }
           if( (char)c == 'u' )
             { pyrUp( tmp, dst, Size( tmp.cols*2, tmp.rows*2 ) );
-              printf( "** Zoom In: Image x 2 \n" ); 
+              printf( "** Zoom In: Image x 2 \n" );
             }
           else if( (char)c == 'd' )
            { pyrDown( tmp, dst, Size( tmp.cols/2, tmp.rows/2 ) );
@@ -200,12 +200,12 @@ Explanation
 
           imshow( window_name, dst );
           tmp = dst;
-        }      
-   
+        }
+
 
      Our program exits if the user presses *ESC*. Besides, it has two options:
- 
-     * **Perform upsampling (after pressing 'u')**    
+
+     * **Perform upsampling (after pressing 'u')**
 
        .. code-block:: cpp
 
@@ -217,7 +217,7 @@ Explanation
        * *dst*: The destination image (to be shown on screen, supposedly the double of the input image)
        * *Size( tmp.cols*2, tmp.rows*2 )* : The destination size. Since we are upsampling, :pyr_up:`pyrUp <>` expects a size double than the input image (in this case *tmp*).
 
-     * **Perform downsampling (after pressing 'd')**    
+     * **Perform downsampling (after pressing 'd')**
 
        .. code-block:: cpp
 
@@ -232,7 +232,7 @@ Explanation
      * Notice that it is important that the input image can be divided by a factor of two (in both dimensions). Otherwise, an error will be shown.
 
      * Finally, we update the input image **tmp** with the current image displayed, so the subsequent operations are performed on it.
- 
+
        .. code-block:: cpp
 
           tmp = dst;
@@ -245,19 +245,19 @@ Results
 * After compiling the code above we can test it. The program calls an image **chicky_512.jpg** that comes in the *tutorial_code/image* folder. Notice that this image is :math:`512 \times 512`, hence a downsample won't generate any error (:math:`512 = 2^{9}`). The original image is shown below:
 
   .. image:: images/Pyramids_Tutorial_Original_Image.jpg
-     :alt: Pyramids: Original image	
-     :align: center 
+     :alt: Pyramids: Original image
+     :align: center
 
 * First we apply two successive :pyr_down:`pyrDown <>` operations by pressing 'd'. Our output is:
- 
+
   .. image:: images/Pyramids_Tutorial_PyrDown_Result.jpg
      :alt: Pyramids: PyrDown Result
-     :align: center 
+     :align: center
 
 * Note that we should have lost some resolution due to the fact that we are diminishing the size of the image. This is evident after we apply :pyr_up:`pyrUp <>` twice (by pressing 'u'). Our output is now:
- 
+
   .. image:: images/Pyramids_Tutorial_PyrUp_Result.jpg
      :alt: Pyramids: PyrUp Result
-     :align: center 
+     :align: center
 
 
diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst
index 94bcab1baf..90baaaff95 100644
--- a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.rst
@@ -11,9 +11,9 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :bounding_rect:`boundingRect <>` 
+   * Use the OpenCV function :bounding_rect:`boundingRect <>`
    * Use the OpenCV function :min_enclosing_circle:`minEnclosingCircle <>`
-          
+
 
 Theory
 ======
@@ -21,9 +21,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -73,7 +73,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      /// Detect edges using Threshold
      threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
-     /// Find contours  
+     /// Find contours
      findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
 
      /// Approximate contours to polygons + get bounding rects and circles
@@ -83,18 +83,18 @@ This tutorial code's is shown lines below. You can also download it from `here <
      vector<float>radius( contours.size() );
 
      for( int i = 0; i < contours.size(); i++ )
-        { approxPolyDP( Mat(contours[i]), contours_poly[i], 3, true ); 
-          boundRect[i] = boundingRect( Mat(contours_poly[i]) ); 
+        { approxPolyDP( Mat(contours[i]), contours_poly[i], 3, true );
+          boundRect[i] = boundingRect( Mat(contours_poly[i]) );
           minEnclosingCircle( contours_poly[i], center[i], radius[i] );
-        } 
+        }
 
 
      /// Draw polygonal contour + bonding rects + circles
      Mat drawing = Mat::zeros( threshold_output.size(), CV_8UC3 );
      for( int i = 0; i< contours.size(); i++ )
-        { 
+        {
           Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-          drawContours( drawing, contours_poly, i, color, 1, 8, vector<Vec4i>(), 0, Point() ); 
+          drawContours( drawing, contours_poly, i, color, 1, 8, vector<Vec4i>(), 0, Point() );
           rectangle( drawing, boundRect[i].tl(), boundRect[i].br(), color, 2, 8, 0 );
           circle( drawing, center[i], (int)radius[i], color, 2, 8, 0 );
         }
@@ -112,13 +112,13 @@ Result
 
 #. Here it is:
 
-   ========== ==========  
-    |BRC_0|   |BRC_1|   
-   ========== ==========   
+   ========== ==========
+    |BRC_0|   |BRC_1|
+   ========== ==========
 
    .. |BRC_0|  image:: images/Bounding_Rects_Circles_Source_Image.jpg
                     :align: middle
 
    .. |BRC_1|  image:: images/Bounding_Rects_Circles_Result.jpg
-                    :align: middle   
+                    :align: middle
 
diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst
index 53d35f336b..894df8605e 100644
--- a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.rst
@@ -11,9 +11,9 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :min_area_rect:`minAreaRect <>` 
+   * Use the OpenCV function :min_area_rect:`minAreaRect <>`
    * Use the OpenCV function :fit_ellipse:`fitEllipse <>`
-           
+
 
 Theory
 ======
@@ -21,9 +21,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -73,7 +73,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      /// Detect edges using Threshold
      threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
-     /// Find contours  
+     /// Find contours
      findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
 
      /// Find the rotated rectangles and ellipses for each contour
@@ -81,29 +81,29 @@ This tutorial code's is shown lines below. You can also download it from `here <
      vector<RotatedRect> minEllipse( contours.size() );
 
      for( int i = 0; i < contours.size(); i++ )
-        { minRect[i] = minAreaRect( Mat(contours[i]) ); 
+        { minRect[i] = minAreaRect( Mat(contours[i]) );
           if( contours[i].size() > 5 )
             { minEllipse[i] = fitEllipse( Mat(contours[i]) ); }
-        } 
+        }
 
      /// Draw contours + rotated rects + ellipses
      Mat drawing = Mat::zeros( threshold_output.size(), CV_8UC3 );
      for( int i = 0; i< contours.size(); i++ )
-        { 
+        {
           Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
           // contour
-          drawContours( drawing, contours, i, color, 1, 8, vector<Vec4i>(), 0, Point() ); 
+          drawContours( drawing, contours, i, color, 1, 8, vector<Vec4i>(), 0, Point() );
           // ellipse
           ellipse( drawing, minEllipse[i], color, 2, 8 );
-          // rotated rectangle        
+          // rotated rectangle
           Point2f rect_points[4]; minRect[i].points( rect_points );
           for( int j = 0; j < 4; j++ )
-             line( drawing, rect_points[j], rect_points[(j+1)%4], color, 1, 8 ); 
+             line( drawing, rect_points[j], rect_points[(j+1)%4], color, 1, 8 );
         }
 
      /// Show in a window
      namedWindow( "Contours", CV_WINDOW_AUTOSIZE );
-     imshow( "Contours", drawing ); 
+     imshow( "Contours", drawing );
    }
 
 Explanation
@@ -114,13 +114,13 @@ Result
 
 #. Here it is:
 
-   ========== ==========  
-    |BRE_0|   |BRE_1|   
-   ========== ==========   
+   ========== ==========
+    |BRE_0|   |BRE_1|
+   ========== ==========
 
    .. |BRE_0|  image:: images/Bounding_Rotated_Ellipses_Source_Image.jpg
                     :align: middle
 
    .. |BRE_1|  image:: images/Bounding_Rotated_Ellipses_Result.jpg
-                    :align: middle   
+                    :align: middle
 
diff --git a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst
index 2175737ace..decdf31ef6 100644
--- a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.rst
@@ -10,8 +10,8 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :find_contours:`findContours <>` 
-   * Use the OpenCV function :draw_contours:`drawContours <>` 
+   * Use the OpenCV function :find_contours:`findContours <>`
+   * Use the OpenCV function :draw_contours:`drawContours <>`
 
 Theory
 ======
@@ -19,9 +19,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -71,20 +71,20 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      /// Detect edges using canny
      Canny( src_gray, canny_output, thresh, thresh*2, 3 );
-     /// Find contours  
+     /// Find contours
      findContours( canny_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
 
      /// Draw contours
      Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
      for( int i = 0; i< contours.size(); i++ )
-        { 
+        {
           Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-          drawContours( drawing, contours, i, color, 2, 8, hierarchy, 0, Point() ); 
+          drawContours( drawing, contours, i, color, 2, 8, hierarchy, 0, Point() );
         }
 
      /// Show in a window
      namedWindow( "Contours", CV_WINDOW_AUTOSIZE );
-     imshow( "Contours", drawing ); 
+     imshow( "Contours", drawing );
    }
 
 Explanation
@@ -95,13 +95,13 @@ Result
 
 #. Here it is:
 
-   ============= =============  
-    |contour_0|   |contour_1|   
-   ============= =============   
+   ============= =============
+    |contour_0|   |contour_1|
+   ============= =============
 
    .. |contour_0|  image:: images/Find_Contours_Original_Image.jpg
                      :align: middle
 
    .. |contour_1|  image:: images/Find_Contours_Result.jpg
-                     :align: middle   
+                     :align: middle
 
diff --git a/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst b/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst
index 220d4754f3..c6abdd2c82 100644
--- a/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/hull/hull.rst
@@ -10,7 +10,7 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :convex_hull:`convexHull <>` 
+   * Use the OpenCV function :convex_hull:`convexHull <>`
 
 
 Theory
@@ -19,11 +19,11 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
-   #include "opencv2/highgui/highgui.hpp" 
+   #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
    #include <iostream>
    #include <stdio.h>
@@ -33,7 +33,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
    using namespace std;
 
    Mat src; Mat src_gray;
-   int thresh = 100; 
+   int thresh = 100;
    int max_thresh = 255;
    RNG rng(12345);
 
@@ -73,21 +73,21 @@ This tutorial code's is shown lines below. You can also download it from `here <
      /// Detect edges using Threshold
      threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
 
-     /// Find contours  
+     /// Find contours
      findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
 
      /// Find the convex hull object for each contour
      vector<vector<Point> >hull( contours.size() );
      for( int i = 0; i < contours.size(); i++ )
-        {  convexHull( Mat(contours[i]), hull[i], false ); } 
+        {  convexHull( Mat(contours[i]), hull[i], false ); }
 
      /// Draw contours + hull results
      Mat drawing = Mat::zeros( threshold_output.size(), CV_8UC3 );
      for( int i = 0; i< contours.size(); i++ )
-        { 
+        {
           Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-          drawContours( drawing, contours, i, color, 1, 8, vector<Vec4i>(), 0, Point() ); 
-          drawContours( drawing, hull, i, color, 1, 8, vector<Vec4i>(), 0, Point() );       
+          drawContours( drawing, contours, i, color, 1, 8, vector<Vec4i>(), 0, Point() );
+          drawContours( drawing, hull, i, color, 1, 8, vector<Vec4i>(), 0, Point() );
         }
 
      /// Show in a window
@@ -104,13 +104,13 @@ Result
 
 #. Here it is:
 
-   ========== ==========  
-    |Hull_0|   |Hull_1|   
-   ========== ==========   
+   ========== ==========
+    |Hull_0|   |Hull_1|
+   ========== ==========
 
    .. |Hull_0|  image:: images/Hull_Original_Image.jpg
                      :align: middle
 
    .. |Hull_1|  image:: images/Hull_Result.jpg
-                     :align: middle   
+                     :align: middle
 
diff --git a/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst b/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst
index 66fcfb7b0e..6ef2de6ee6 100644
--- a/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/moments/moments.rst
@@ -11,9 +11,9 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :moments:`moments <>` 
+   * Use the OpenCV function :moments:`moments <>`
    * Use the OpenCV function :contour_area:`contourArea <>`
-   * Use the OpenCV function :arc_length:`arcLength <>`           
+   * Use the OpenCV function :arc_length:`arcLength <>`
 
 Theory
 ======
@@ -21,9 +21,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -73,7 +73,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      /// Detect edges using canny
      Canny( src_gray, canny_output, thresh, thresh*2, 3 );
-     /// Find contours  
+     /// Find contours
      findContours( canny_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
 
      /// Get the moments
@@ -81,7 +81,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      for( int i = 0; i < contours.size(); i++ )
         { mu[i] = moments( contours[i], false ); }
 
-     ///  Get the mass centers: 
+     ///  Get the mass centers:
      vector<Point2f> mc( contours.size() );
      for( int i = 0; i < contours.size(); i++ )
         { mc[i] = Point2f( mu[i].m10/mu[i].m00 , mu[i].m01/mu[i].m00 ); }
@@ -89,9 +89,9 @@ This tutorial code's is shown lines below. You can also download it from `here <
      /// Draw contours
      Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
      for( int i = 0; i< contours.size(); i++ )
-        { 
+        {
           Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-          drawContours( drawing, contours, i, color, 2, 8, hierarchy, 0, Point() ); 
+          drawContours( drawing, contours, i, color, 2, 8, hierarchy, 0, Point() );
           circle( drawing, mc[i], 4, color, -1, 8, 0 );
         }
 
@@ -103,9 +103,9 @@ This tutorial code's is shown lines below. You can also download it from `here <
      printf("\t Info: Area and Contour Length \n");
      for( int i = 0; i< contours.size(); i++ )
         {
-          printf(" * Contour[%d] - Area (M_00) = %.2f - Area OpenCV: %.2f - Length: %.2f \n", i, mu[i].m00, contourArea(contours[i]), arcLength( contours[i], true ) );  
+          printf(" * Contour[%d] - Area (M_00) = %.2f - Area OpenCV: %.2f - Length: %.2f \n", i, mu[i].m00, contourArea(contours[i]), arcLength( contours[i], true ) );
           Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-          drawContours( drawing, contours, i, color, 2, 8, hierarchy, 0, Point() ); 
+          drawContours( drawing, contours, i, color, 2, 8, hierarchy, 0, Point() );
           circle( drawing, mc[i], 4, color, -1, 8, 0 );
         }
    }
@@ -118,9 +118,9 @@ Result
 
 #. Here it is:
 
-   ========== ==========  ==========  
-    |MU_0|     |MU_1|      |MU_2|   
-   ========== ==========  ========== 
+   ========== ==========  ==========
+    |MU_0|     |MU_1|      |MU_2|
+   ========== ==========  ==========
 
    .. |MU_0|  image:: images/Moments_Source_Image.jpg
                     :width: 250pt
@@ -128,9 +128,9 @@ Result
 
    .. |MU_1|  image:: images/Moments_Result1.jpg
                     :width: 250pt
-                    :align: middle   
+                    :align: middle
 
    .. |MU_2|  image:: images/Moments_Result2.jpg
                     :width: 250pt
-                    :align: middle   
+                    :align: middle
 
diff --git a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst
index 676d29a99c..a73a8e92e5 100644
--- a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst
+++ b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.rst
@@ -10,8 +10,8 @@ In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Use the OpenCV function :point_polygon_test:`pointPolygonTest <>` 
-           
+   * Use the OpenCV function :point_polygon_test:`pointPolygonTest <>`
+
 
 Theory
 ======
@@ -19,9 +19,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp>`_
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/imgproc/imgproc.hpp"
@@ -51,13 +51,13 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
      /// Draw it in src
      for( int j = 0; j < 6; j++ )
-        { line( src, vert[j],  vert[(j+1)%6], Scalar( 255 ), 3, 8 ); } 
+        { line( src, vert[j],  vert[(j+1)%6], Scalar( 255 ), 3, 8 ); }
 
      /// Get the contours
      vector<vector<Point> > contours; vector<Vec4i> hierarchy;
      Mat src_copy = src.clone();
 
-     findContours( src_copy, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE);  
+     findContours( src_copy, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE);
 
      /// Calculate the distances to the contour
      Mat raw_dist( src.size(), CV_32FC1 );
@@ -70,19 +70,19 @@ This tutorial code's is shown lines below. You can also download it from `here <
      double minVal; double maxVal;
      minMaxLoc( raw_dist, &minVal, &maxVal, 0, 0, Mat() );
      minVal = abs(minVal); maxVal = abs(maxVal);
-  
+
      /// Depicting the  distances graphically
      Mat drawing = Mat::zeros( src.size(), CV_8UC3 );
 
      for( int j = 0; j < src.rows; j++ )
         { for( int i = 0; i < src.cols; i++ )
-             { 
+             {
                if( raw_dist.at<float>(j,i) < 0 )
                  { drawing.at<Vec3b>(j,i)[0] = 255 - (int) abs(raw_dist.at<float>(j,i))*255/minVal; }
                else if( raw_dist.at<float>(j,i) > 0 )
-                 { drawing.at<Vec3b>(j,i)[2] = 255 - (int) raw_dist.at<float>(j,i)*255/maxVal; }    
+                 { drawing.at<Vec3b>(j,i)[2] = 255 - (int) raw_dist.at<float>(j,i)*255/maxVal; }
                else
-                 { drawing.at<Vec3b>(j,i)[0] = 255; drawing.at<Vec3b>(j,i)[1] = 255; drawing.at<Vec3b>(j,i)[2] = 255; }     
+                 { drawing.at<Vec3b>(j,i)[0] = 255; drawing.at<Vec3b>(j,i)[1] = 255; drawing.at<Vec3b>(j,i)[2] = 255; }
              }
         }
 
@@ -105,13 +105,13 @@ Result
 
 #. Here it is:
 
-   ========== ==========  
-    |PPT_0|   |PPT_1|   
-   ========== ==========   
+   ========== ==========
+    |PPT_0|   |PPT_1|
+   ========== ==========
 
    .. |PPT_0|  image:: images/Point_Polygon_Test_Source_Image.png
                     :align: middle
 
    .. |PPT_1|  image:: images/Point_Polygon_Test_Result.jpg
-                    :align: middle   
+                    :align: middle
 
diff --git a/doc/tutorials/imgproc/threshold/threshold.rst b/doc/tutorials/imgproc/threshold/threshold.rst
index 432ca28db7..7788e6c515 100644
--- a/doc/tutorials/imgproc/threshold/threshold.rst
+++ b/doc/tutorials/imgproc/threshold/threshold.rst
@@ -26,18 +26,18 @@ What is Thresholding?
 
 * Application example: Separate out regions of an image corresponding to objects which we want to analyze. This separation is based on the variation of intensity between the object pixels and the background pixels.
 
-* To differentiate the pixels we are interested in from the rest (which will eventually be rejected), we perform a comparison of  each pixel intensity value with respect to a *threshold* (determined according to the problem to solve). 
+* To differentiate the pixels we are interested in from the rest (which will eventually be rejected), we perform a comparison of  each pixel intensity value with respect to a *threshold* (determined according to the problem to solve).
 
 * Once we have separated properly the important pixels, we can set them with a determined value to identify them (i.e. we can assign them a value of :math:`0` (black), :math:`255` (white) or any value  that suits your needs).
 
   .. image:: images/Threshold_Tutorial_Theory_Example.jpg
      :alt: Threshold simple example
-     :align: center 
+     :align: center
 
 Types of Thresholding
 -----------------------
 
-* OpenCV offers the function :threshold:`threshold <>` to perform thresholding operations. 
+* OpenCV offers the function :threshold:`threshold <>` to perform thresholding operations.
 
 * We can effectuate :math:`5` types of Thresholding operations with this function. We will explain them in the following subsections.
 
@@ -45,7 +45,7 @@ Types of Thresholding
 
   .. image:: images/Threshold_Tutorial_Theory_Base_Figure.png
      :alt: Threshold Binary
-     :align: center 
+     :align: center
 
 Threshold Binary
 ^^^^^^^^^^^^^^^^^
@@ -53,86 +53,86 @@ Threshold Binary
 * This thresholding operation can be expressed as:
 
   .. math::
-         
-     \texttt{dst} (x,y) =  \fork{\texttt{maxVal}}{if $\texttt{src}(x,y) > \texttt{thresh}$}{0}{otherwise}   
- 
+
+     \texttt{dst} (x,y) =  \fork{\texttt{maxVal}}{if $\texttt{src}(x,y) > \texttt{thresh}$}{0}{otherwise}
+
 * So, if the intensity of the pixel :math:`src(x,y)` is higher than :math:`thresh`, then the new pixel intensity is set to a :math:`MaxVal`. Otherwise, the pixels are set to :math:`0`.
 
   .. image:: images/Threshold_Tutorial_Theory_Binary.png
      :alt: Threshold Binary
-     :align: center 
+     :align: center
 
 
 Threshold Binary, Inverted
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 * This thresholding operation can be expressed as:
-                
+
   .. math::
-        
-     \texttt{dst} (x,y) =  \fork{0}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{maxVal}}{otherwise}   
+
+     \texttt{dst} (x,y) =  \fork{0}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{maxVal}}{otherwise}
 
 * If the intensity of the pixel :math:`src(x,y)` is higher than :math:`thresh`, then the new pixel intensity is set to a :math:`0`. Otherwise, it is set to :math:`MaxVal`.
-        
+
   .. image:: images/Threshold_Tutorial_Theory_Binary_Inverted.png
      :alt: Threshold Binary Inverted
-     :align: center 
+     :align: center
 
 Truncate
 ^^^^^^^^^
-        
+
 * This thresholding operation can be expressed as:
-               
+
   .. math::
-        
-     \texttt{dst} (x,y) =  \fork{\texttt{threshold}}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{src}(x,y)}{otherwise}   
-       
+
+     \texttt{dst} (x,y) =  \fork{\texttt{threshold}}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{src}(x,y)}{otherwise}
+
 * The maximum intensity value for the pixels is :math:`thresh`, if :math:`src(x,y)` is greater, then its value is *truncated*. See figure below:
- 
+
   .. image:: images/Threshold_Tutorial_Theory_Truncate.png
      :alt: Threshold Truncate
-     :align: center 
-        
+     :align: center
+
 
 
 Threshold to Zero
 ^^^^^^^^^^^^^^^^^^
 
 * This operation can be expressed as:
-        
+
    .. math::
-        
-      \texttt{dst} (x,y) =  \fork{\texttt{src}(x,y)}{if $\texttt{src}(x,y) > \texttt{thresh}$}{0}{otherwise}   
+
+      \texttt{dst} (x,y) =  \fork{\texttt{src}(x,y)}{if $\texttt{src}(x,y) > \texttt{thresh}$}{0}{otherwise}
 
 * If :math:`src(x,y)` is lower than :math:`thresh`, the new pixel value will be set to :math:`0`.
 
   .. image:: images/Threshold_Tutorial_Theory_Zero.png
      :alt: Threshold Zero
-     :align: center 
+     :align: center
 
 
 Threshold to Zero, Inverted
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 * This operation can be expressed as:
-        
+
    .. math::
-        
-      \texttt{dst} (x,y) =  \fork{0}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{src}(x,y)}{otherwise}   
+
+      \texttt{dst} (x,y) =  \fork{0}{if $\texttt{src}(x,y) > \texttt{thresh}$}{\texttt{src}(x,y)}{otherwise}
 
 * If  :math:`src(x,y)` is greater than :math:`thresh`, the new pixel value will be set to :math:`0`.
 
   .. image:: images/Threshold_Tutorial_Theory_Zero_Inverted.png
      :alt: Threshold Zero Inverted
-     :align: center 
+     :align: center
 
 
 Code
 ======
 
-The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/ImgProc/Threshold.cpp>`_
+The tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/ImgProc/Threshold.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -173,8 +173,8 @@ The tutorial code's is shown lines below. You can also download it from `here <h
      namedWindow( window_name, CV_WINDOW_AUTOSIZE );
 
      /// Create Trackbar to choose type of Threshold
-     createTrackbar( trackbar_type, 
-		     window_name, &threshold_type, 
+     createTrackbar( trackbar_type,
+		     window_name, &threshold_type,
 		     max_type, Threshold_Demo );
 
      createTrackbar( trackbar_value,
@@ -244,8 +244,8 @@ Explanation
 
      .. code-block:: cpp
 
-        createTrackbar( trackbar_type, 
-		     window_name, &threshold_type, 
+        createTrackbar( trackbar_type,
+		     window_name, &threshold_type,
 		     max_type, Threshold_Demo );
 
         createTrackbar( trackbar_value,
@@ -293,18 +293,18 @@ Results
 
    .. image:: images/Threshold_Tutorial_Original_Image.jpg
       :alt: Threshold Original Image
-      :align: center 
+      :align: center
 
 #. First, we try to threshold our image with a *binary threhold inverted*. We expect that the pixels brighter than the :math:`thresh` will turn dark, which is what actually happens, as we can see in the snapshot below (notice from the original image, that the doggie's tongue and eyes are particularly bright in comparison with the image, this is reflected in the output image).
 
 
    .. image:: images/Threshold_Tutorial_Result_Binary_Inverted.jpg
       :alt: Threshold Result Binary Inverted
-      :align: center 
+      :align: center
 
 
 #. Now we try with the *threshold to zero*. With this, we expect that the darkest pixels (below the threshold) will become completely black, whereas the pixels with value greater than the threshold will keep its original value. This is verified by the following snapshot of the output image:
 
    .. image:: images/Threshold_Tutorial_Result_Zero.jpg
       :alt: Threshold Result Zero
-      :align: center 
+      :align: center
diff --git a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
new file mode 100644
index 0000000000..3821e3fb97
--- /dev/null
+++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
@@ -0,0 +1,258 @@
+
+.. _O4A_SDK:
+
+
+OpenCV4Android SDK
+******************
+
+This tutorial was designed to help you with installation and configuration of OpenCV4Android SDK.
+
+This guide was written with Windows 7 in mind, though it should work with any other OS supported by OpenCV4Android SDK.
+
+This tutorial assumes you have the following installed and configured:
+
+* JDK
+
+* Android SDK and NDK
+
+* Eclipse IDE
+
+* ADT and CDT plugins for Eclipse
+
+     ..
+
+If you need help with anything of the above, you may refer to our :ref:`android_dev_intro` guide.
+
+If you encounter any error after thoroughly following these steps, feel free to contact us via `OpenCV4Android <https://groups.google.com/group/android-opencv/>`_ discussion group or OpenCV `Q&A forum <http://answers.opencv.org>`_ . We'll do our best to help you out.
+
+General info
+============
+**TODO:** rewrite this section.
+
+OpenCV4Android SDK uses Android OpenCV Manager for library initialization. OpenCV Manager provides the following benefits:
+
+* Compact apk-size, since all applications use the same binaries from Manager and do not store native libs within themselves;
+
+* Hardware specific optimizations are automatically enabled on all supported platforms;
+
+* Regular updates and bug fixes;
+
+* Trusted OpenCV library source. All packages with OpenCV are published on Google Play;
+
+     ..
+
+
+For additional information on OpenCV Manager see the:
+ 
+* |OpenCV4Android_Tutorial|_ 
+  
+* |OpenCV4Android_Reference|_ 
+
+     ..
+
+
+This package is quite close to the current OpenCV4Android distribution. If you're beginner with OpenCV, tutorial from above will help you to start.
+
+* Library Project for Java development with Eclipse. 
+
+* C++ headers and libraries for native application development.
+
+* Java samples, javadoc documentation.
+
+* prebuilt binaries for ARM-v7a platform.
+
+     ..
+
+.. |OpenCV4Android_Tutorial| replace:: Tutorial
+.. _OpenCV4Android_Tutorial: http://docs.opencv.org/doc/tutorials/introduction/android_binary_package/android_binary_package.html#android-binary-package
+.. |OpenCV4Android_Reference| replace:: Reference Manual
+.. _OpenCV4Android_Reference: http://docs.opencv.org/android/refman.html
+
+Tegra Android Development Pack users
+====================================
+
+You may have used `Tegra Android Development Pack <http://developer.nvidia.com/tegra-android-development-pack>`_
+(**TADP**) released by **NVIDIA** for Android development environment setup.
+
+Beside Android development tools the TADP 2.0 includes OpenCV4Android SDK 2.4.2, so it can be already installed in your system and you can skip to running the ``face-detection`` sample.
+
+More details regarding TADP can be found in the :ref:`android_dev_intro` guide.
+
+Manual OpenCV4Android SDK setup
+===============================
+
+Get the OpenCV4Android SDK
+--------------------------
+
+#. Go to the `OpenCV dowload page on SourceForge <http://sourceforge.net/projects/opencvlibrary/files/opencv-android/>`_ and download the latest available version. Currently it's |opencv_android_bin_pack_url|_
+
+#. Create a new folder for development for Android with OpenCV development. For this tutorial I have unpacked OpenCV to the :file:`C:\\Work\\OpenCV4Android\\` directory.
+
+      .. note:: Better to use a path without spaces in it. Otherwise you may have problems with :command:`ndk-build`.
+
+#. Unpack the OpenCV package into the chosen directory.
+
+   You can unpack it using any popular archiver (e.g with |seven_zip|_):
+
+   .. image:: images/android_package_7zip.png
+      :alt: Exploring OpenCV package with 7-Zip
+      :align: center
+
+   On Unix you can use the following command:
+
+   .. code-block:: bash
+
+      unzip ~/Downloads/OpenCV-2.4.2-android-sdk.zip
+
+.. |opencv_android_bin_pack| replace:: OpenCV-2.4.2-android-sdk.zip
+.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.2/OpenCV-2.4.2-android-sdk.zip/download
+.. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack|
+.. |seven_zip| replace:: 7-Zip
+.. _seven_zip: http://www.7-zip.org/
+
+Open OpenCV library and samples in Eclipse
+------------------------------------------
+
+#. Start *Eclipse* and choose your workspace location.
+
+   We recommend to start working with OpenCV for Android from a new clean workspace. A new Eclipse workspace can for example be created in the folder where you have unpacked OpenCV4Android SDK package:
+
+      .. image:: images/eclipse_1_choose_workspace.png
+         :alt: Choosing C:\Work\android-opencv\ as workspace location
+         :align: center
+
+#. Import OpenCV library and samples into workspace.
+
+   OpenCV library is packed as a ready-for-use `Android Library Project
+   <http://developer.android.com/guide/developing/projects/index.html#LibraryProjects>`_. You can simply reference it in your projects.
+
+   Each sample included into the |opencv_android_bin_pack| is a regular Android project that already references OpenCV library.
+   Follow the steps below to import OpenCV and samples into the workspace:
+
+   * Right click on the :guilabel:`Package Explorer` window and choose :guilabel:`Import...` option from the context menu:
+
+      .. image:: images/eclipse_5_import_command.png
+         :alt: Select Import... from context menu
+         :align: center
+
+   * In the main panel select :menuselection:`General --> Existing Projects into Workspace` and press :guilabel:`Next` button:
+
+      .. image:: images/eclipse_6_import_existing_projects.png
+         :alt: General > Existing Projects into Workspace
+         :align: center
+
+   * In the :guilabel:`Select root directory` field locate your OpenCV package folder. Eclipse should automatically locate OpenCV library and samples:
+
+      .. image:: images/eclipse_7_select_projects.png
+         :alt: Locate OpenCV library and samples
+         :align: center
+
+   * Click :guilabel:`Finish` button to complete the import operation.
+
+   After clicking :guilabel:`Finish` button Eclipse will load all selected projects into workspace. Numerous errors will be indicated:
+
+      .. image:: images/eclipse_8_false_alarm.png
+         :alt: Confusing Eclipse screen with numerous errors
+         :align: center
+
+   However, **all these errors are only false-alarms**!
+
+   Just give a minute to Eclipse to complete initialization.
+   
+   In some cases these errors disappear after :menuselection:`Project --> Clean... --> Clean all --> OK`
+   or after pressing :kbd:`F5` (for Refresh action) when selecting error-label-marked projects in :guilabel:`Package Explorer`.
+
+   Sometimes more advanced manipulations are required:
+
+   * The provided projects are configured for ``API 11`` target (and ``API 9`` for the library) that can be missing platform in your Android SDK.
+     After right click on any project select  :guilabel:`Properties` and then :guilabel:`Android` on the left pane.
+     Click some target with `API Level` 11 or higher:
+
+      .. image:: images/eclipse_8a_target.png
+         :alt: Updating target
+         :align: center
+
+   Eclipse will rebuild your workspace and error icons will disappear one by one:
+
+      .. image:: images/eclipse_9_errors_dissapearing.png
+         :alt: After small help Eclipse removes error icons!
+         :align: center
+
+   Once Eclipse completes build you will have the clean workspace without any build errors:
+
+      .. image:: images/eclipse_10_crystal_clean.png
+         :alt: OpenCV package imported into Eclipse
+         :align: center
+
+.. _Running_OpenCV_Samples:
+
+Running OpenCV Samples
+----------------------
+
+At this point you should be able to build and run the samples. Keep in mind, that ``face-detection``, ``Tutorial 3`` and ``Tutorial 4`` ones include some native code and require Android NDK and CDT plugin for Eclipse to build working applications.
+If you haven't installed these tools see the corresponding section of :ref:`Android_Dev_Intro`.
+
+Also, please consider that ``Tutorial 0`` and ``Tutorial 1`` samples use Java Camera API that definitelly accessible on emulator from the Android SDK.
+Other samples use OpenCV Native Camera which may not work with emulator.
+
+.. note:: Recent *Android SDK tools, revision 19+* can run ARM v7a OS images but they available not for all Android versions.
+
+Well, running samples from Eclipse is very simple:
+
+* Connect your device with :command:`adb` tool from Android SDK or create an emulator with camera support.
+
+   * See `Managing Virtual Devices
+     <http://developer.android.com/guide/developing/devices/index.html>`_ document for help with Android Emulator.
+   * See `Using Hardware Devices
+     <http://developer.android.com/guide/developing/device.html>`_ for help with real devices (not emulators).
+
+
+* Select project you want to start in :guilabel:`Package Explorer` and just press :kbd:`Ctrl + F11` or select option :menuselection:`Run --> Run` from the main menu, or click :guilabel:`Run` button on the toolbar.
+
+  .. note:: Android Emulator can take several minutes to start. So, please, be patient.
+
+* On the first run Eclipse will ask you about the running mode for your application:
+
+  .. image:: images/eclipse_11_run_as.png
+     :alt: Run sample as Android Application
+     :align: center
+
+* Select the :guilabel:`Android Application` option and click :guilabel:`OK` button. Eclipse will install and run the sample.
+
+  Chances are that on the first launch you will not have the `OpenCV Manager <https://docs.google.com/a/itseez.com/presentation/d/1EO_1kijgBg_BsjNp2ymk-aarg-0K279_1VZRcPplSuk/present#slide=id.p>`_ package installed.
+  In this case you will see the following message:
+
+  .. image:: images/android_emulator_opencv_manager_fail.png
+     :alt: You will see this message if you have no OpenCV Manager installed
+     :align: center
+     
+  To get rid of the message you will need to install `OpenCV Manager` and the appropriate `OpenCV binary pack`.
+  Simply tap :menuselection:`Yes` if you have *Google Play Market* installed on your device/emulator. It will redirect you to the corresponding page on *Google Play Market*.
+  
+  If you have no access to the *Market*, which is often the case with emulators - you will need to install the packages from OpenCV4Android SDK folder manually. Open the console/terminal and type in the following two commands:
+  
+  .. code-block:: sh
+    :linenos:
+
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.2_Manager.apk
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.2_binary_pack_armv7a.apk
+    
+  If you're running Windows, that will probably look like this:
+    
+  .. image:: images/install_opencv_manager_with_adb.png
+     :alt: Run these commands in the console to install OpenCV Manager
+     :align: center
+     
+  When done, you will be able to run OpenCV samples on your device/emulator seamlessly.
+  
+* Here is ``Tutorial 2 - Use OpenCV Camera`` sample, running on top of stock camera-preview of the emulator.
+
+  .. image:: images/emulator_canny.png
+     :height: 600px
+     :alt: Tutorial 1 Basic - 1. Add OpenCV - running Canny
+     :align: center
+  
+What's next
+===========
+
+Now, when you have your instance of OpenCV4Adroid SDK set up and configured, you may want to proceed to using OpenCV in your own application. You can learn how to do that in a separate *Development with OpenCV* tutorial.
\ No newline at end of file
diff --git a/doc/tutorials/introduction/android_binary_package/android_binary_package_using_with_NDK.rst b/doc/tutorials/introduction/android_binary_package/android_binary_package_using_with_NDK.rst
index 2d4744c489..75dbf80950 100644
--- a/doc/tutorials/introduction/android_binary_package/android_binary_package_using_with_NDK.rst
+++ b/doc/tutorials/introduction/android_binary_package/android_binary_package_using_with_NDK.rst
@@ -4,7 +4,7 @@
 
 
 Using OpenCV in C++ code with OpenCV4Android SDK
-*********************************************
+************************************************
 
 The Android way is writing all your code in Java. But sometimes, it is not enough and you need to go to the native level and write some parts of your application in C/C++.
 This is especially important when you already have some computer vision code which is written in C++ and uses OpenCV, and you want to reuse it in your Android application.
diff --git a/doc/tutorials/introduction/android_binary_package/android_dev_intro.rst b/doc/tutorials/introduction/android_binary_package/android_dev_intro.rst
new file mode 100644
index 0000000000..91948afbc2
--- /dev/null
+++ b/doc/tutorials/introduction/android_binary_package/android_dev_intro.rst
@@ -0,0 +1,340 @@
+
+.. _Android_Dev_Intro:
+
+
+Introduction into Android Development
+*************************************
+
+This guide was designed to help you in learning Android development basics and quickly seting up your working environment.
+
+This guide was written with Windows 7 in mind, though it works with Linux (Ubuntu), Mac OS X and any other OS supported by Android SDK.
+
+If you encounter any error after thoroughly following these steps, feel free to contact us via `OpenCV4Android <https://groups.google.com/group/android-opencv/>`_ discussion group or OpenCV `Q&A forum <http://answers.opencv.org>`_. We'll do our best to help you out.
+
+Quick environment setup for Android development
+===============================================
+
+If you are making a clean environment install, then you can try `Tegra Android Development Pack <http://developer.nvidia.com/mobile/tegra-android-development-pack>`_
+(**TADP**) released by **NVIDIA**.
+
+When unpacked, TADP will cover all of the environment setup automatically and you can skip the rest of the guide.
+
+If you are a beginner in Android development then we also recommend you to start with TADP.
+
+.. note:: *NVIDIA*\ 's Tegra Android Development Pack includes some special features for |Nvidia_Tegra_Platform|_ but its use is not limited to *Tegra* devices only.
+
+  * You need at least *1.6 Gb* free disk space for the install.
+
+  * TADP will download Android SDK platforms and Android NDK from Google's server, so Internet connection is required for the installation.
+
+  * TADP may ask you to flash your development kit at the end of installation process. Just skip this step if you have no |Tegra_Development_Kit|_\ .
+
+  * (``UNIX``) TADP will ask you for *root* in the middle of installation, so you need to be a member of *sudo* group.
+
+     ..
+
+
+.. |Nvidia_Tegra_Platform| replace:: *NVIDIA*\ ’s Tegra platform
+.. _Nvidia_Tegra_Platform: http://developer.nvidia.com/node/19071
+.. |Tegra_Development_Kit| replace:: Tegra Development Kit
+.. _Tegra_Development_Kit: http://developer.nvidia.com/mobile/tegra-hardware-sales-inquiries
+
+.. _Android_Environment_Setup_Lite:
+
+Manual environment setup for Android development
+================================================
+
+Development in Java
+-------------------
+
+You need the following software to be installed in order to develop for Android in Java:
+
+#. **Sun JDK 6**
+
+   Visit `Java SE Downloads page <http://www.oracle.com/technetwork/java/javase/downloads/>`_ and download an installer for your OS.
+
+   Here is a detailed :abbr:`JDK (Java Development Kit)` `installation guide <http://source.android.com/source/initializing.html#installing-the-jdk>`_
+   for Ubuntu and Mac OS (only JDK sections are applicable for OpenCV)
+
+   .. note:: OpenJDK is not suitable for Android development, since Android SDK supports only Sun JDK.
+        If you use Ubuntu, after installation of Sun JDK you should run the following command to set Sun java environment:
+
+        .. code-block:: bash
+
+           sudo update-java-alternatives --set java-6-sun
+
+   **TODO:** add a note on Sun/Oracle Java installation on Ubuntu 12.
+
+#. **Android SDK**
+
+   Get the latest ``Android SDK`` from http://developer.android.com/sdk/index.html
+
+   Here is Google's `install guide <http://developer.android.com/sdk/installing.html>`_ for the SDK.
+
+   .. note:: If you choose SDK packed into a Windows installer, then you should have 32-bit JRE installed. It is not a prerequisite for Android development, but installer is a x86 application and requires 32-bit Java runtime.
+
+   .. note:: If you are running x64 version of Ubuntu Linux, then you need ia32 shared libraries for use on amd64 and ia64 systems to be installed. You can install them with the following command:
+
+      .. code-block:: bash
+
+         sudo apt-get install ia32-libs
+
+      For Red Hat based systems the following command might be helpful:
+
+      .. code-block:: bash
+
+         sudo yum install libXtst.i386
+
+#. **Android SDK components**
+
+   You need the following SDK components to be installed:
+
+   * *Android SDK Tools, revision14* or newer.
+
+     Older revisions should also work, but they are not recommended.
+
+   * *SDK Platform Android 3.0*, ``API 11`` and *Android 2.3.1*, ``API 9``.
+
+     The minimal platform supported by OpenCV Java API is **Android 2.2** (``API 8``). This is also the minimum API Level required for the provided samples to run.
+     See the ``<uses-sdk android:minSdkVersion="8"/>`` tag in their **AndroidManifest.xml** files.
+     But for successful compilation of some samples the **target** platform should be set to Android 3.0 (API 11) or higher. It will not prevent them from running on  Android 2.2.
+
+     .. image:: images/android_sdk_and_avd_manager.png
+        :height: 500px
+        :alt: Android SDK Manager
+        :align: center
+
+     See `Adding Platforms and Packages  <http://developer.android.com/sdk/installing/adding-packages.html>`_ for help with installing/updating SDK components.
+
+#. **Eclipse IDE**
+
+   Check the `Android SDK System Requirements <http://developer.android.com/sdk/requirements.html>`_ document for a list of Eclipse versions that are compatible with the Android SDK.
+   For OpenCV 2.4.x we recommend **Eclipse 3.7 (Indigo)** or later versions. They work well for OpenCV under both Windows and Linux.
+
+   If you have no Eclipse installed, you can get it from the `official site <http://www.eclipse.org/downloads/>`_.
+
+#. **ADT plugin for Eclipse**
+
+   These instructions are copied from `Android Developers site <http://developer.android.com/sdk/installing/installing-adt.html>`_, check it out in case of any ADT-related problem.
+
+   Assuming that you have Eclipse IDE installed, as described above, follow these steps to download and install the ADT plugin:
+
+   #. Start Eclipse, then select :menuselection:`Help --> Install New Software...`
+   #. Click :guilabel:`Add` (in the top-right corner).
+   #. In the :guilabel:`Add Repository` dialog that appears, enter "ADT Plugin" for the Name and the following URL for the Location:
+
+      https://dl-ssl.google.com/android/eclipse/
+
+   #. Click :guilabel:`OK`
+
+      .. note:: If you have trouble acquiring the plugin, try using "http" in the Location URL, instead of "https" (https is preferred for security reasons).
+
+   #. In the :guilabel:`Available Software` dialog, select the checkbox next to :guilabel:`Developer Tools` and click :guilabel:`Next`.
+   #. In the next window, you'll see a list of the tools to be downloaded. Click :guilabel:`Next`.
+   #. Read and accept the license agreements, then click :guilabel:`Finish`.
+
+      .. note:: If you get a security warning saying that the authenticity or validity of the software can't be established, click :guilabel:`OK`.
+
+   #. When the installation completes, restart Eclipse.
+
+Native development in C++
+-------------------------
+
+You need the following software to be installed in order to develop for Android in C++:
+
+#. **Android NDK**
+
+   To compile C++ code for Android platform you need ``Android Native Development Kit`` (*NDK*).
+
+   You can get the latest version of NDK from the `download page <http://developer.android.com/tools/sdk/ndk/index.html>`_. To install Android NDK just extract the archive to some folder on your computer. Here are `installation instructions <http://developer.android.com/tools/sdk/ndk/index.html#Installing>`_.
+
+   .. note:: Before start you can read official Android NDK documentation which is in the Android NDK archive, in the folder :file:`docs/`.
+
+      The main article about using Android NDK build system is in the :file:`ANDROID-MK.html` file.
+
+      Some additional information you can find in the :file:`APPLICATION-MK.html`, :file:`NDK-BUILD.html` files, and :file:`CPU-ARM-NEON.html`, :file:`CPLUSPLUS-SUPPORT.html`, :file:`PREBUILTS.html`.
+
+#. **CDT plugin for Eclipse**
+
+   There are several possible ways to integrate compilation of C++ code by Android NDK into Eclipse compilation process.
+   We recommend the approach based on Eclipse :abbr:`CDT(C/C++ Development Tooling)` Builder.
+
+   .. important:: Make sure your Eclipse IDE has the :abbr:`CDT(C/C++ Development Tooling)` plugin installed. Menu :guilabel:`Help -> About Eclipse SDK` and push :guilabel:`Installation Details` button.
+
+   .. image:: images/eclipse_inst_details.png
+     :alt: Configure builders
+     :align: center
+
+   To install the `CDT plugin <http://eclipse.org/cdt/>`_ use menu :guilabel:`Help -> Install New Software...`,
+   then paste the CDT 8.0 repository URL http://download.eclipse.org/tools/cdt/releases/indigo as shown in the picture below and click :guilabel:`Add...`, name it *CDT* and click :guilabel:`OK`.
+
+   .. image:: images/eclipse_inst_cdt.png
+     :alt: Configure builders
+     :align: center
+
+   ``CDT Main Features`` should be enough:
+
+   .. image:: images/eclipse_inst_cdt_2.png
+     :alt: Configure builders
+     :align: center
+
+   That's it. Compilation of C++ code is fully integrated into Eclipse building process now.
+
+Android application structure
+=============================
+
+Usually source code of an Android application has the following structure:
+
++ :file:`root folder of the project/`
+
+  - :file:`jni/`
+
+  - :file:`libs/`
+
+  - :file:`res/`
+
+  - :file:`src/`
+
+  - :file:`AndroidManifest.xml`
+
+  - :file:`project.properties`
+
+  - :file:`... other files ...`
+
+where:
+
+* the :file:`src` folder contains Java code of the application,
+
+* the :file:`res` folder contains resources of the application (images, xml files describing UI layout , etc),
+
+* the :file:`libs` folder will contain native libraries after a successful build,
+
+* and the :file:`jni` folder contains C/C++ application source code and NDK's build scripts :file:`Android.mk` and :file:`Application.mk`
+  producing the native libraries,
+
+* :file:`AndroidManifest.xml` file presents essential information about application to the Android system
+  (name of the Application, name of main application's package, components of the application, required permissions, etc).
+
+  It can be created using Eclipse wizard or :command:`android` tool from Android SDK.
+
+* :file:`project.properties` is a text file containing information about target Android platform and other build details.
+  This file is generated by Eclipse or can be created with :command:`android` tool included in Android SDK.
+
+.. note:: Both files (:file:`AndroidManifest.xml` and :file:`project.properties`) are required to compile the C++ part of the application,
+          since Android NDK build system relies on them. If any of these files does not exist, compile the Java part of the project before the C++ part.
+
+:file:`Android.mk` and :file:`Application.mk` scripts
+==================================================================
+
+The script :file:`Android.mk` usually has the following structure:
+
+.. code-block:: make
+
+        LOCAL_PATH := $(call my-dir)
+
+        include $(CLEAR_VARS)
+        LOCAL_MODULE    := <module_name>
+        LOCAL_SRC_FILES := <list of .c and .cpp project files>
+        <some variable name> := <some variable value>
+        ...
+        <some variable name> := <some variable value>
+
+        include $(BUILD_SHARED_LIBRARY)
+
+This is the minimal file :file:`Android.mk`, which builds C++ source code of an Android application. Note that the first two lines and the last line are mandatory for any :file:`Android.mk`.
+
+Usually the file :file:`Application.mk` is optional, but in case of project using OpenCV, when STL and exceptions are used in C++, it also should be created. Example of the file :file:`Application.mk`:
+
+.. code-block:: make
+
+        APP_STL := gnustl_static
+        APP_CPPFLAGS := -frtti -fexceptions
+        APP_ABI := armeabi-v7a
+
+Debugging and Testing
+=====================
+In this section we will give you some easy-to-follow instructions on how to set up an emulator or hardware device for testing and debugging an Android project.
+
+AVD
+---
+AVD (*Android Virtual Device*) is not probably the most convenient way to test an OpenCV-dependent application, but sure the most uncomplicated one to configure.
+
+#. Assuming you already have *Android SDK* and *Eclipse IDE* installed, in Eclipse go :guilabel:`Window -> AVD Manager`.
+     **TBD:** how to start AVD Manager without Eclipse...
+#. Press the :guilabel:`New` button in :guilabel:`AVD Manager` window.
+#. :guilabel:`Create new Android Virtual Device` window will let you select some properties for your new device, like target API level, size of SD-card and other.
+    .. image:: images/AVD_create.png
+     :alt: Configure builders
+     :align: center
+#. When you click the :guilabel:`Create AVD` button, your new AVD will be availible in :guilabel:`AVD Manager`.
+#. Press :guilabel:`Start` to launch the device. Be aware that any AVD (aka Emulator) is usually much slower than a hardware Android device, so it may take up to several minutes to start.
+#. Go :guilabel:`Run -> Run/Debug`  in Eclipse IDE to run your application in regular or debugging mode. :guilabel:`Device Chooser` will let you choose among the running devices or to start a new one.
+
+Hardware Device
+---------------
+If you have an Android device, you can use it to test and debug your applications. This way is more authentic, though a little bit harder to set up.
+
+Windows host computer
+^^^^^^^^^^^^^^^^^^^^^
+
+#. Enable USB debugging on the Android device (settings menu).
+#. Attach the Android device to your PC with a USB cable.
+#. Go to :guilabel:`Start Menu` and **right-click** on :guilabel:`Computer`. Select :guilabel:`Manage` in the context menu. You may be asked for Administrative permittions.
+#. Select :guilabel:`Device Manager` in the left pane and find an unknown device in the list. You may try unplugging it and then plugging back in order to check whether it's your exact equipment appearing in the list.
+    .. image:: images/usb_device_connect_01.png
+     :alt: Unknown device
+     :align: center
+#. Right-click on the unknoen device, select :guilabel:`Properties` then select the :guilabel:`Details` tab. Select :guilabel:`Hardware Ids` and copy the line like ``USB\VID_XXXX&PID_XXXX&MI_XX``.
+    .. image:: images/usb_device_connect_02.png
+     :alt: Device properties details
+     :align: center
+#. Now open file :file:`<Android SDK folder>/extras/google/usb_driver/android_winusb.inf`. Select either ``Google.NTx86`` or ``Google.NTamd64`` section depending on your host system architecture.
+    .. image:: images/usb_device_connect_03.png
+     :alt: Device properties details
+     :align: center
+#. There should be a record like existing ones for your device and you need to add one manually. 
+    .. image:: images/usb_device_connect_04.png
+     :alt: Device properties details
+     :align: center
+#. TBD.
+    .. image:: images/usb_device_connect_05.png
+     :alt: Device properties details
+     :align: center
+#. TBD.
+    .. image:: images/usb_device_connect_06.png
+     :alt: Device properties details
+     :align: center
+#. TBD.
+    .. image:: images/usb_device_connect_07.png
+     :alt: Device properties details
+     :align: center
+#. TBD.
+    .. image:: images/usb_device_connect_08.png
+     :alt: Device properties details
+     :align: center
+#. TBD.
+    .. image:: images/usb_device_connect_09.png
+     :alt: Device properties details
+     :align: center
+#. TBD.
+    .. image:: images/usb_device_connect_10.png
+     :alt: Device properties details
+     :align: center
+#. TBD.
+    .. image:: images/usb_device_connect_11.png
+     :alt: Device properties details
+     :align: center
+
+#. Now, in Eclipse go :guilabel:`Run -> Run/Debug` to run your application in regular or debugging mode. :guilabel:`Device Chooser` will let you choose among the devices.
+
+Consult the official `Android Developers site <http://developer.android.com/tools/device.html>`_ for more information on configuring hardware devices to work with other operating systems.
+
+
+Linux & MacOS host computer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+**TODO:** Describe device setup.
+
+What's next
+===========
+
+Now, when you have your development environment set up and configured, you may want to proceed to installing OpenCV4Android SDK. You can learn how to do that in a separate :ref:`O4A_SDK` tutorial.
\ No newline at end of file
diff --git a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
new file mode 100644
index 0000000000..d1eb26e072
--- /dev/null
+++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
@@ -0,0 +1,471 @@
+
+.. _dev_with_OCV_on_Android:
+
+
+Android development with OpenCV
+*******************************
+
+This tutorial is created to help you use OpenCV library within your Android project.
+
+This guide was written with Windows 7 in mind, though it should work with any other OS supported by OpenCV4Android SDK.
+
+This tutorial assumes you have the following installed and configured:
+
+* JDK
+
+* Android SDK and NDK
+
+* Eclipse IDE
+
+* ADT and CDT plugins for Eclipse
+
+     ..
+
+If you need help with anything of the above, you may refer to our :ref:`android_dev_intro` guide.
+
+This tutorial also assumes you have OpenCV4Android SDK already installed on your development machine and OpenCV Manager on your testing device correspondingly. If you need help with any of these, you may consult our :ref:`O4A_SDK` tutorial.
+
+If you encounter any error after thoroughly following these steps, feel free to contact us via `OpenCV4Android <https://groups.google.com/group/android-opencv/>`_ discussion group or OpenCV `Q&A forum <http://answers.opencv.org>`_ . We'll do our best to help you out.
+
+Using OpenCV library within your Android project
+================================================
+
+In this section we will explain how to make some existing project to use OpenCV.
+Starting with 2.4.2 release for Android, *OpenCV Manager* is used to provide apps with the best available version of OpenCV.
+You can get more information here: :ref:`Android_OpenCV_Manager` and in these `slides <https://docs.google.com/a/itseez.com/presentation/d/1EO_1kijgBg_BsjNp2ymk-aarg-0K279_1VZRcPplSuk/present#slide=id.p>`_.
+
+Java
+----
+Application development with async initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Using async initialization is a **recommended** way for application development. It uses the OpenCV Manager to access OpenCV libraries externally installed in the target system.
+
+#. Add OpenCV library project to your workspace. Use menu :guilabel:`File -> Import -> Existing project in your workspace`,
+   press :guilabel:`Browse`  button and locate OpenCV4Android SDK (:file:`OpenCV-2.4.2-android-sdk/sdk`).
+
+   .. image:: images/eclipse_opencv_dependency0.png
+        :alt: Add dependency from OpenCV library
+        :align: center
+
+#. In application project add a reference to the OpenCV Java SDK in :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.2``.
+
+   .. image:: images/eclipse_opencv_dependency1.png
+        :alt: Add dependency from OpenCV library
+        :align: center
+
+To run OpenCV Manager-based application the first time you need to install packages with the `OpenCV Manager` and `OpenCV binary pack` for you platform.
+You can do it using Google Play Market or manually with ``adb`` tool:
+
+  .. code-block:: sh
+    :linenos:
+
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.2_Manager.apk
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.2_binary_pack_armv7a.apk
+	
+There is a very base code snippet implementing the async initialization. It shows basic principles. See the "15-puzzle" OpenCV sample for details.
+
+.. code-block:: java
+    :linenos:
+
+    public class MyActivity extends Activity implements HelperCallbackInterface
+    {
+    private BaseLoaderCallback mOpenCVCallBack = new BaseLoaderCallback(this) {
+    @Override
+    public void onManagerConnected(int status) {
+       switch (status) {
+           case LoaderCallbackInterface.SUCCESS:
+           {
+              Log.i(TAG, "OpenCV loaded successfully");
+              // Create and set View
+              mView = new puzzle15View(mAppContext);
+              setContentView(mView);
+           } break;
+           default:
+           {
+              super.onManagerConnected(status);
+           } break;
+       }
+        }
+    };
+
+    /** Called when the activity is first created. */
+    @Override
+    public void onCreate(Bundle savedInstanceState)
+    {
+        Log.i(TAG, "onCreate");
+        super.onCreate(savedInstanceState);
+
+        Log.i(TAG, "Trying to load OpenCV library");
+        if (!OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_2, this, mOpenCVCallBack))
+        {
+          Log.e(TAG, "Cannot connect to OpenCV Manager");
+        }
+    }
+
+    // ...
+    }
+
+It this case application works with OpenCV Manager in asynchronous fashion. ``OnManagerConnected`` callback will be called in UI thread, when initialization finishes.
+Please note, that it is not allowed to use OpenCV calls or load OpenCV-dependent native libs before invoking this callback. 
+Load your own native libraries that depend on OpenCV after the successful OpenCV initialization.
+
+Application development with static initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+According to this approach all OpenCV binaries are included into your application package. It is designed mostly for development purposes.
+This approach is deprecated for the production code, release package is recommended to communicate with OpenCV Manager via the async initialization described above.
+
+#. Add the OpenCV library project to your workspace the same way as for the async initialization above.
+   Use menu :guilabel:`File -> Import -> Existing project in your workspace`, push :guilabel:`Browse` button and select OpenCV SDK path (:file:`OpenCV-2.4.2-android-sdk/sdk`).
+
+   .. image:: images/eclipse_opencv_dependency0.png
+        :alt: Add dependency from OpenCV library
+        :align: center
+
+#. In the application project add a reference to the OpenCV4Android SDK in :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.2``;
+
+   .. image:: images/eclipse_opencv_dependency1.png
+       :alt: Add dependency from OpenCV library
+       :align: center
+
+#. If your application project **doesn't have a JNI part**, just copy the corresponding OpenCV native libs from :file:`<OpenCV-2.4.2-android-sdk>/sdk/native/libs/<target_arch>` to your project directory to folder :file:`libs/<target_arch>`.
+   
+   In case of the application project **with a JNI part**, instead of manual libraries copying you need to modify your ``Android.mk`` file: 
+   add the following two code lines after the ``"include $(CLEAR_VARS)"`` and before ``"include path_to_OpenCV-2.4.2-android-sdk/sdk/native/jni/OpenCV.mk"``
+
+   .. code-block:: make
+       :linenos:
+
+       OPENCV_CAMERA_MODULES:=on
+       OPENCV_INSTALL_MODULES:=on
+ 
+   The result should look like the following:
+  
+   .. code-block:: make
+       :linenos:
+
+       include $(CLEAR_VARS)
+
+       # OpenCV
+       OPENCV_CAMERA_MODULES:=on
+       OPENCV_INSTALL_MODULES:=on
+       include ../../sdk/native/jni/OpenCV.mk
+
+   After that the OpenCV libraries will be copied to your application :file:`libs` folder during the JNI part build.
+
+   Eclipse will automatically include all the libraries from the :file:`libs` folder to the application package (APK).
+
+#. The last step of enabling OpenCV in your application is Java initialization code before call to OpenCV API.
+   It can be done, for example, in the static section of the ``Activity`` class:
+
+    .. code-block:: java
+       :linenos:
+
+        static {
+            if (!OpenCVLoader.initDebug()) {
+                // Handle initialization error
+            }
+        }
+
+    If you application includes other OpenCV-dependent native libraries you should load them **after** OpenCV initialization:
+
+    .. code-block:: java
+        :linenos:
+
+        static {
+            if (!OpenCVLoader.initDebug()) {
+                // Handle initialization error
+            } else {
+                System.loadLibrary("my_jni_lib1");
+                System.loadLibrary("my_jni_lib2");
+            }
+        }
+
+Native/C++
+----------
+
+To build your own Android application, which uses OpenCV from native part, the following steps should be done:
+
+#. You can use an environment variable to specify the location of OpenCV package or just hardcode absolute or relative path in the :file:`jni/Android.mk` of your projects.
+
+#.  The file :file:`jni/Android.mk` should be written for the current application using the common rules for this file.
+
+    For detailed information see the Android NDK documentation from the Android NDK archive, in the file
+    :file:`<path_where_NDK_is_placed>/docs/ANDROID-MK.html`
+
+#. The line
+
+   .. code-block:: make
+
+           include C:\Work\OpenCV4Android\OpenCV-2.4.2-android-sdk\sdk\native\jni\OpenCV.mk
+
+   should be inserted into the :file:`jni/Android.mk` file **after** the line
+
+   .. code-block:: make
+
+        include $(CLEAR_VARS)
+
+#. Several variables can be used to customize OpenCV stuff, but you **don't need** to use them when your application uses the `async initialization` via the `OpenCV Manager` API.
+   
+   Note: these variables should be set **before**  the ``"include .../OpenCV.mk"`` line:
+
+   .. code-block:: make
+
+        OPENCV_INSTALL_MODULES:=on
+
+   Copies necessary OpenCV dynamic libs to the project ``libs`` folder in order to include them into the APK.
+
+   .. code-block:: make
+
+        OPENCV_CAMERA_MODULES:=off
+
+   Skip native OpenCV camera related libs copying to the project ``libs`` folder.
+
+   .. code-block:: make
+
+        OPENCV_LIB_TYPE:=STATIC
+
+   Perform static link with OpenCV. By default dynamic link is used and the project JNI lib depends on ``libopencv_java.so``.
+
+#. The file :file:`Application.mk` should exist and should contain lines:
+
+   .. code-block:: make
+
+        APP_STL := gnustl_static
+        APP_CPPFLAGS := -frtti -fexceptions
+
+   Also the line like this one:
+
+   .. code-block:: make
+
+                 APP_ABI := armeabi-v7a
+
+   should specify the application target platforms.
+
+   In some cases a linkage error (like ``"In function 'cv::toUtf16(std::basic_string<...>... undefined reference to 'mbstowcs'"``) happens
+   when building an application JNI library depending on OpenCV.
+   The following line in the :file:`Application.mk` usually fixes it:
+
+   .. code-block:: make
+
+                 APP_PLATFORM := android-9
+
+
+#. Either use :ref:`manual <NDK_build_cli>` ``ndk-build`` invocation or :ref:`setup Eclipse CDT Builder <Android_NDK_integration_with_Eclipse>` to build native JNI lib before Java part [re]build and APK creation.
+   
+   **TBD:** move this info from tutorial v1 to part 1 of  tutorial v2.
+
+
+Hello OpenCV Sample
+===================
+
+Here are basic steps to guide you trough the process of creating a simple OpenCV-centric application.
+It will be capable of accessing camera output, processing it and displaying the result.
+
+#. Open Eclipse IDE, create a new clean workspace, create a new Android project (*File -> New -> Android Project*).
+   
+#. Set name, target, package and minSDKVersion accordingly.
+   
+#. Create a new class (*File -> New -> Class*). Name it for example: *HelloOpenCVView*.
+	.. image:: images/dev_OCV_new_class.png
+         :alt: Add a new class.
+         :align: center
+
+    * It should extend *SurfaceView* class.
+
+    * It also should implement *SurfaceHolder.Callback*, *Runnable*.
+
+#. Edit *HelloOpenCVView* class.
+
+    * Add an *import* line for *android.content.context*.
+
+    * Modify autogenerated stubs: *HelloOpenCVView*, *surfaceCreated*, *surfaceDestroyed* and *surfaceChanged*.
+	 .. code-block:: java
+
+		  package com.hello.opencv.test;
+
+		  import android.content.Context;
+
+		  public class HelloOpenCVView extends SurfaceView implements Callback, Runnable {
+
+		  public HelloOpenCVView(Context context) {
+		  super(context);
+		  getHolder().addCallback(this);
+		  }
+		  
+		  public void surfaceCreated(SurfaceHolder holder) {
+		  (new Thread(this)).start();
+		  }
+		  
+		  public void surfaceDestroyed(SurfaceHolder holder) {
+		  cameraRelease();
+		  }
+		  
+		  public void surfaceChanged(SurfaceHolder holder, int format, int width,
+		  int height) {
+		  cameraSetup(width, height);
+		  }
+
+    * Add *cameraOpen*, *cameraRelease* and *cameraSetup* voids as shown below.
+
+    * Also, don't forget to add the public void *run()* as follows:
+	
+	 .. code-block:: java
+
+		  public void run() {
+			// TODO: loop { getFrame(), processFrame(), drawFrame() }
+		  }
+
+		  public boolean cameraOpen() {
+			return false; //TODO: open camera
+		  }
+	
+		  private void cameraRelease() {
+			// TODO release camera
+		  }
+
+		  private void cameraSetup(int width, int height) {
+			// TODO setup camera
+		  }
+  	
+
+       ..
+
+#. Create a new *Activity* (*New -> Other -> Android -> Android Activity*) and name it, for example: *HelloOpenCVActivity*. For this activity define *onCreate*, *onResume* and *onPause* voids.
+	 .. code-block:: java
+
+		  public void onCreate (Bundle savedInstanceState) {
+			super.onCreate(savedInstanceState);
+			mView = new HelloOpenCVView(this);
+			setContentView (mView);
+		  }
+
+		  protected void onPause() {
+			super.onPause();
+			mView.cameraRelease();
+		  }
+
+		  protected void onResume() {
+			super.onResume();
+			if( !mView.cameraOpen() ) {
+				// MessageBox and exit app
+				AlertDialog ad = new AlertDialog.Builder(this).create();
+				ad.setCancelable(false); // This blocks the "BACK" button
+				ad.setMessage("Fatal error: can't open camera!");
+				ad.setButton("OK", new DialogInterface.OnClickListener() {
+					public void onClick(DialogInterface dialog, int which) {
+						dialog.dismiss();
+						finish();
+					}
+				});
+				ad.show();
+			}
+		
+		}
+
+#. Add the following permissions to the AndroidManifest.xml file:
+	 .. code-block:: xml
+
+	  </application>
+
+	  <uses-permission android:name="android.permission.CAMERA" />
+	  <uses-feature android:name="android.hardware.camera" />
+	  <uses-feature android:name="android.hardware.camera.autofocus" />
+	  
+#. Reference OpenCV library within your project properties.
+	 .. image:: images/dev_OCV_reference.png
+          :alt: Reference OpenCV library.
+          :align: center
+
+#. We now need some code to handle the camera. Update the *HelloOpenCVView* class as follows:
+	 .. code-block:: java
+
+		  private VideoCapture		mCamera;
+		  
+		  public boolean cameraOpen() {
+			synchronized (this) {
+				cameraRelease();
+				mCamera = new VideoCapture(Highgui.CV_CAP_ANDROID);
+				if (!mCamera.isOpened()) {
+					mCamera.release();
+					mCamera = null;
+					Log.e("HelloOpenCVView", "Failed to open native camera");
+					return false;
+				}
+			}
+			return true;
+		  }
+		  public void cameraRelease() {
+			synchronized(this) {
+				if (mCamera != null) {
+					mCamera.release();
+					mCamera = null;
+				}
+			}
+		  }
+		  private void cameraSetup(int width, int height) {
+			synchronized (this) {
+				if (mCamera != null && mCamera.isOpened()) {
+					List<Size> sizes = mCamera.getSupportedPreviewSizes();
+					int mFrameWidth = width;
+					int mFrameHeight = height;
+					{ // selecting optimal camera preview size
+						double minDiff = Double.MAX_VALUE;
+						for (Size size : sizes) {
+							if (Math.abs(size.height - height) < minDiff) {
+								mFrameWidth = (int) size.width;
+								mFrameHeight = (int) size.height;
+								minDiff = Math.abs(size.height - height);
+							}
+						}
+					}
+					mCamera.set(Highgui.CV_CAP_PROP_FRAME_WIDTH, mFrameWidth);
+					mCamera.set(Highgui.CV_CAP_PROP_FRAME_HEIGHT, mFrameHeight);
+				}
+			}
+		  }
+
+#. The last step would be to update the *run()* void in *HelloOpenCVView* class as follows:
+	 .. code-block:: java
+
+		  public void run() {
+			while (true) {
+				Bitmap bmp = null;
+				synchronized (this) {
+					if (mCamera == null)
+						break;
+					if (!mCamera.grab())
+						break;
+				
+					bmp = processFrame(mCamera);
+				}
+				if (bmp != null) {
+					Canvas canvas = getHolder().lockCanvas();
+					if (canvas != null) {
+						canvas.drawBitmap(bmp, (canvas.getWidth() - bmp.getWidth()) / 2,
+								(canvas.getHeight() - bmp.getHeight()) / 2, null);
+						getHolder().unlockCanvasAndPost(canvas);
+					
+					}
+					bmp.recycle();
+				}
+			}
+		  }
+
+		  protected Bitmap processFrame(VideoCapture capture) {
+			Mat mRgba = new Mat();
+			capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
+			//process mRgba
+			Bitmap bmp = Bitmap.createBitmap(mRgba.cols(), mRgba.rows(), Bitmap.Config.ARGB_8888);
+			try {
+				Utils.matToBitmap(mRgba, bmp);
+			} catch(Exception e) {
+				Log.e("processFrame", "Utils.matToBitmap() throws an exception: " + e.getMessage());
+				bmp.recycle();
+				bmp = null;
+			}
+			return bmp;
+		  }
+
+
diff --git a/doc/tutorials/introduction/android_binary_package/images/AVD_create.png b/doc/tutorials/introduction/android_binary_package/images/AVD_create.png
new file mode 100644
index 0000000000..f55ea51d73
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/AVD_create.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/AVD_empty.png b/doc/tutorials/introduction/android_binary_package/images/AVD_empty.png
new file mode 100644
index 0000000000..6989f7e167
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/AVD_empty.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/cmd_adb_devices.png b/doc/tutorials/introduction/android_binary_package/images/cmd_adb_devices.png
new file mode 100644
index 0000000000..e0e4853dec
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/cmd_adb_devices.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/dev_OCV_new_class.png b/doc/tutorials/introduction/android_binary_package/images/dev_OCV_new_class.png
new file mode 100644
index 0000000000..3a75b11081
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/dev_OCV_new_class.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/dev_OCV_reference.png b/doc/tutorials/introduction/android_binary_package/images/dev_OCV_reference.png
new file mode 100644
index 0000000000..5179b23430
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/dev_OCV_reference.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/device_details.png b/doc/tutorials/introduction/android_binary_package/images/device_details.png
new file mode 100644
index 0000000000..9c0a94000b
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/device_details.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_01.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_01.png
new file mode 100644
index 0000000000..62f8768a4a
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_01.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_02.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_02.png
new file mode 100644
index 0000000000..9caf777001
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_02.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_03.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_03.png
new file mode 100644
index 0000000000..5f75f84d32
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_03.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_04.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_04.png
new file mode 100644
index 0000000000..d686d6d960
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_04.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_05.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_05.png
new file mode 100644
index 0000000000..1e5a37a313
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_05.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_06.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_06.png
new file mode 100644
index 0000000000..c6d275d8c6
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_06.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_07.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_07.png
new file mode 100644
index 0000000000..06ac71e486
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_07.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_08.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_08.png
new file mode 100644
index 0000000000..168d3ea232
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_08.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_09.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_09.png
new file mode 100644
index 0000000000..c8d30a0def
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_09.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_10.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_10.png
new file mode 100644
index 0000000000..85b9a75b2a
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_10.png differ
diff --git a/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_11.png b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_11.png
new file mode 100644
index 0000000000..87f05728f5
Binary files /dev/null and b/doc/tutorials/introduction/android_binary_package/images/usb_device_connect_11.png differ
diff --git a/doc/tutorials/introduction/display_image/display_image.rst b/doc/tutorials/introduction/display_image/display_image.rst
index 915b2cce06..303960e5e6 100644
--- a/doc/tutorials/introduction/display_image/display_image.rst
+++ b/doc/tutorials/introduction/display_image/display_image.rst
@@ -9,7 +9,7 @@ Goal
 In this tutorial you will learn how to:
 
 .. container:: enumeratevisibleitemswithsquare
-   
+
    * Load an image (using :imread:`imread <>`)
    * Create a named OpenCV window (using :named_window:`namedWindow <>`)
    * Display an image in an OpenCV window (using :imshow:`imshow <>`)
@@ -17,7 +17,7 @@ In this tutorial you will learn how to:
 Source Code
 ===========
 
-Download the source code from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp>`_.
+Download the source code from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp>`_.
 
 .. literalinclude:: ../../../../samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
    :language: cpp
@@ -29,7 +29,7 @@ Explanation
 
 In OpenCV 2 we have multiple modules. Each one takes care of a different area or approach towards image processing. You could already observe this in the structure of the user guide of these tutorials itself. Before you use any of them you first need to include the header files where the content of each individual module is declared.
 
-You'll almost always end up using the: 
+You'll almost always end up using the:
 
 .. container:: enumeratevisibleitemswithsquare
 
@@ -75,23 +75,23 @@ Now we call the :imread:`imread <>` function which loads the image name specifie
    :tab-width: 4
    :lines: 17
 
-.. note:: 
+.. note::
 
-   OpenCV offers support for the image formats Windows bitmap (bmp), portable image formats (pbm, pgm, ppm) and Sun raster (sr, ras). With help of plugins (you need to specify to use them if you build yourself the library, nevertheless in the packages we ship present by default) you may also load image formats like JPEG (jpeg, jpg, jpe), JPEG 2000 (jp2 - codenamed in the CMake as Jasper), TIFF files (tiff, tif) and portable network graphics (png). Furthermore, OpenEXR is also a possibility. 
+   OpenCV offers support for the image formats Windows bitmap (bmp), portable image formats (pbm, pgm, ppm) and Sun raster (sr, ras). With help of plugins (you need to specify to use them if you build yourself the library, nevertheless in the packages we ship present by default) you may also load image formats like JPEG (jpeg, jpg, jpe), JPEG 2000 (jp2 - codenamed in the CMake as Jasper), TIFF files (tiff, tif) and portable network graphics (png). Furthermore, OpenEXR is also a possibility.
 
-After checking that the image data was loaded correctly, we want to display our image, so we create an OpenCV window using the :named_window:`namedWindow <>` function. These are automatically managed by OpenCV once you create them. For this you need to specify its name and how it should handle the change of the image it contains from a size point of view. It may be: 
+After checking that the image data was loaded correctly, we want to display our image, so we create an OpenCV window using the :named_window:`namedWindow <>` function. These are automatically managed by OpenCV once you create them. For this you need to specify its name and how it should handle the change of the image it contains from a size point of view. It may be:
 
 .. container:: enumeratevisibleitemswithsquare
 
-   + *CV_WINDOW_AUTOSIZE* is the only supported one if you do not use the Qt backend. In this case the window size will take up the size of the image it shows. No resize permitted! 
-   + *CV_WINDOW_NORMAL* on Qt you may use this to allow window resize. The image will resize itself according to the current window size. By using the | operator you also need to specify if you would like the image to keep its aspect ratio (*CV_WINDOW_KEEPRATIO*) or not (*CV_WINDOW_FREERATIO*). 
+   + *CV_WINDOW_AUTOSIZE* is the only supported one if you do not use the Qt backend. In this case the window size will take up the size of the image it shows. No resize permitted!
+   + *CV_WINDOW_NORMAL* on Qt you may use this to allow window resize. The image will resize itself according to the current window size. By using the | operator you also need to specify if you would like the image to keep its aspect ratio (*CV_WINDOW_KEEPRATIO*) or not (*CV_WINDOW_FREERATIO*).
 
 .. literalinclude:: ../../../../samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
    :language: cpp
    :lines: 25
    :tab-width: 4
 
-Finally, to update the content of the OpenCV window with a new image use the :imshow:`imshow <>` function. Specify the OpenCV window name to update and the image to use during this operation: 
+Finally, to update the content of the OpenCV window with a new image use the :imshow:`imshow <>` function. Specify the OpenCV window name to update and the image to use during this operation:
 
 .. literalinclude:: ../../../../samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
    :language: cpp
@@ -110,7 +110,7 @@ Result
 
 .. container:: enumeratevisibleitemswithsquare
 
-   * Compile your code and then run the executable giving an image path as argument. If you're on Windows the executable will of course contain an *exe* extension too. Of course assure the image file is near your program file. 
+   * Compile your code and then run the executable giving an image path as argument. If you're on Windows the executable will of course contain an *exe* extension too. Of course assure the image file is near your program file.
 
      .. code-block:: bash
 
@@ -120,7 +120,7 @@ Result
 
      .. image:: images/Display_Image_Tutorial_Result.jpg
         :alt: Display Image Tutorial - Final Result
-        :align: center 
+        :align: center
 
    .. raw:: html
 
diff --git a/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst b/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst
index 96b415cad7..a37ef14904 100644
--- a/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst
+++ b/doc/tutorials/introduction/how_to_write_a_tutorial/how_to_write_a_tutorial.rst
@@ -1 +1 @@
-.. _howToWriteTutorial:How to write a tutorial for OpenCV?***********************************Okay, so assume you have just finished a project of yours implementing something based on OpenCV and you want to present/share it with the community. Luckily, OpenCV is an *open source project*. This means that in theory anyone has access to the full source code and may extend it. While making a robust and practical library (like OpenCV) is great, the success of a library also depends on how user friendly it is. To improve on this aspect, the OpenCV team has already been listening to user feedback from its :opencv_group:`Yahoo user group <>` and by making samples you can find in the source directories sample folder. The addition of the tutorials (in both online and PDF format) is an extension of these efforts. Goal====.. _reST: http://docutils.sourceforge.net/rst.html.. |reST| replace:: reStructuredText.. |Sphinx| replace:: Sphinx.. _Sphinx: http://sphinx.pocoo.org/The tutorials are just as an important part of the library as  the implementation of those crafty data structures and algorithms you can find in OpenCV. Therefore, the source codes for the tutorials are part of the library. And yes, I meant source codes. The reason for this formulation is that the tutorials are written by using the |Sphinx|_ documentation generation system. This is based on the popular python documentation system called |reST|_ (reST). ReStructuredText is a really neat language that by using a few simple conventions (indentation, directives) and emulating old school e-mail writing techniques (text only) tries to offer a simple way to create and edit documents. Sphinx extends this with some new features and creates the resulting document in both HTML (for web) and PDF (for offline usage) format.Usually, an OpenCV tutorial has the following parts:1. A source code demonstration of an OpenCV feature:      a. One or more CPP, Python, Java or other type of files depending for what OpenCV offers support and for what language you make the tutorial.    #. Occasionaly, input resource files required for running your tutorials application.#. A table of content entry (so people may easily find the tutorial):      a. Adding your stuff to the tutorials table of content (**reST** file).    #. Add an image file near the TOC entry. #. The content of the tutorial itself:       a. The **reST** text of the tutorial   #. Images following the idea that "*A picture is worth a thousand words*".    #. For more complex demonstrations you may create a video.As you can see you will need at least some basic knowledge of the *reST* system in order to complete the task at hand with success. However, don't worry *reST* (and *Sphinx*) was made with simplicity in mind. It is easy to grasp its basics. I found that the `OpenAlea documentations introduction on this subject <http://openalea.gforge.inria.fr/doc/openalea/doc/_build/html/source/tutorial/rest_syntax.html>`_ (or the `Thomas Cokelaer one <http://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html>`_ ) should enough for this. If for some directive or feature you need a more in-depth description look it up in the official |reST|_ help files or at the |Sphinx|_ documentation.In our world achieving some tasks is possible in multiple ways. However, some of the roads to take may have obvious or hidden advantages over others. Then again, in some other cases it may come down to just simple user preference. Here, I'll present how I decided to write the tutorials, based on my personal experience. If for some of them you know a better solution and you can back it up feel free to use that. I've nothing against it, as long as it gets the job done in an elegant fashion. Now the best would be if you could make the integration yourself. For this you need first to have the source code. I recommend following the guides for your operating system on acquiring OpenCV sources. For Linux users look :ref:`here <Linux-Installation>` and for :ref:`Windows here <Windows_Installation>`. You must also install python and sphinx with its dependencies in order to be able to build the documentation. Once you have downloaded the repository to your hard drive you can take a look in the OpenCV directory to make sure you have both the samples and doc folder present. Anyone may download the trunk source files from  :file:`/svn/opencv/trunk/` . Nevertheless, not everyone has upload (commit/submit) rights. This is to protect the integrity of the library. If you plan doing more than one tutorial, and would like to have an account with commit user rights you should first register an account at http://code.opencv.org/ and then contact dr. Gary Bradski at -delete-bradski@-delete-willowgarage.com. Otherwise, you can just send the resulting files to us via the :opencv_group:`Yahoo user group <>` or to me at -delete-bernat@-delete-primeranks.net and I'll add it. If you have questions, suggestions or constructive critics I will gladly listen to them. If you send it to the OpenCV group please tag its subject with a **[Tutorial]** entry. Format the Source Code======================Before I start this let it be clear: the main goal is to have a working sample code. However, for your tutorial to be of a top notch quality you should follow a few guide lines I am going to present here. In case you have an application by using the older interface (with *IplImage*, *CVMat*, *cvLoadImage* and such) consider migrating it to the new C++ interface. The tutorials are intended to be an up to date help for our users. And as of OpenCV 2 the OpenCV emphasis on using the less error prone and clearer C++ interface. Therefore, if possible please convert your code to the C++ interface. For this it may help to read the :ref:`InteroperabilityWithOpenCV1` tutorial. However, once you have an OpenCV 2 working code, then you should make your source code snippet as easy to read as possible. Here're a couple of advices for this: .. container:: enumeratevisibleitemswithsquare   + Add a standard output with the description of what your program does. Keep it short and yet, descriptive. This output is at the start of the program. In my example files this usually takes the form of a *help* function containing the output. This way both the source file viewer and application runner can see what all is about in your sample. Here's an instance of this:      .. code-block:: cpp        void help()        {        cout        << "--------------------------------------------------------------------------"   << endl        << "This program shows how to write video files. You can extract the R or G or B color channel "        << " of the input video. You can choose to use the source codec (Y) or select a custom one. (N)"<< endl        << "Usage:"                                                                       << endl        << "./video-write inputvideoName [ R | G | B] [Y | N]"                            << endl        << "--------------------------------------------------------------------------"   << endl        << endl;        }        // ...        int main(int argc, char *argv[], char *window_name)        {        help();        // here comes the actual source code        }     Additionally, finalize the description with a short usage guide. This way the user will know how to call your programs, what leads us to the next point.    + Prefer command line argument controlling instead of hard coded one. If your program has some variables that may be changed use command line arguments for this. The tutorials, can be a simple try-out ground for the user. If you offer command line controlling for the input image (for example), then you offer the possibility for the user to try it out with his/her own images, without the need to mess in the source code. In the upper example you can see that the input image, channel and codec selection may all be changed from the command line. Just compile the program and run it with your own input arguments.    + Be as verbose as possible. There is no shame in filling the source code with comments. This way the more advanced user may figure out what's happening right from the sample code. This advice goes for the output console too. Specify to the user what's happening. Never leave the user hanging there and thinking on: "Is this program now crashing or just doing some computationally intensive task?." So, if you do a training task that may take some time, make sure you print out a message about this before starting and after finishing it.    + Throw out unnecessary stuff from your source code. This is a warning to not take the previous point too seriously. Balance is the key. If it's something that can be done in a fewer lines or simpler than that's the way you should do it. Nevertheless, if for some reason you have such sections notify the user why you have chosen to do so. Keep the amount of information as low as possible, while still getting the job done in an elegant way.    + Put your sample file into the :file:`opencv/samples/cpp/tutorial_code/sectionName` folder. If you write a tutorial for other languages than cpp, then change that part of the path. Before completing this you need to decide that to what section (module) does your tutorial goes. Think about on what module relies most heavily your code and that is the one to use. If the answer to this question is more than one modules then the *general* section is the one to use. For finding the *opencv* directory open up your file system and navigate where you downloaded our repository.   + If the input resources are hard to acquire for the end user consider adding a few of them to the :file:`opencv/samples/cpp/tutorial_code/images`. Make sure that who reads your code can try it out!Add the TOC entry=================For this you will need to know some |reST|_. There is no going around this. |reST|_ files have **rst** extensions. However, these are simple text files. Use any text editor you like. Finding a text editor that offers syntax highlighting for |reST|_ was quite a challenge at the time of writing this tutorial. In my experience, `Intype <http://intype.info/>`_ is a solid option on Windows, although there is still place for improvement. Adding your source code to a table of content is important for multiple reasons. First and foremost this will allow for the user base to find your tutorial from our websites tutorial table of content. Secondly, if you omit this *Sphinx* will throw a warning that your tutorial file isn't part of any TOC tree entry. And there is nothing more than the developer team hates than an ever increasing warning/error list for their builds. *Sphinx* also uses this to build up the previous-back-up buttons on the website. Finally, omitting this step will lead to that your tutorial will **not** be added to the PDF version of the tutorials. Navigate to the :file:`opencv/doc/tutorials/section/table_of_content_section` folder (where the section is the module to which you're adding the tutorial). Open the *table_of_content_section* file. Now this may have two forms. If no prior tutorials are present in this section that there is a template message about this and has the following form:.. code-block:: rst  .. _Table-Of-Content-Section:   Section title   -----------------------------------------------------------   Description about the section.   .. include:: ../../definitions/noContent.rst   .. raw:: latex      \pagebreakThe first line is a reference to the section title in the reST system. The section title will be a link and you may refer to it via the ``:ref:`` directive. The *include* directive imports the template text from the definitions directories *noContent.rst* file. *Sphinx* does not creates the PDF from scratch. It does this by first creating a latex file. Then creates the PDF from the latex file. With the *raw* directive you can directly add to this output commands. Its unique argument is for what kind of output to add the content of the directive. For the PDFs it may happen that multiple sections will overlap on a single page. To avoid this at the end of the TOC we add a *pagebreak* latex command, that hints to the LATEX system that the next line should be on a new page. If you have one of this, try to transform it to the following form: .. include:: ../../definitions/tocDefinitions.rst .. code-block:: rst   .. _Table-Of-Content-Section:   Section title   -----------------------------------------------------------   .. include:: ../../definitions/tocDefinitions.rst   +     .. tabularcolumns:: m{100pt} m{300pt}     .. cssclass:: toctableopencv     =============== ======================================================      |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`                     *Compatibility:* > OpenCV 2.0                     *Author:* |Author_BernatG|                     You will learn how to store images in the memory and how to print out their content to the console.     =============== =====================================================     .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg                      :height: 90pt                      :width:  90pt   .. raw:: latex      \pagebreak   .. toctree::      :hidden:      ../mat - the basic image container/mat - the basic image containerIf this is already present just add a new section of the content between the include and the raw directives (excluding those lines). Here you'll see a new include directive. This should be present only once in a TOC tree and the reST file contains the definitions of all the authors contributing to the OpenCV tutorials. We are a multicultural community and some of our name may contain some funky characters. However, reST **only supports** ANSI characters. Luckily we can specify Unicode characters with the *unicode* directive. Doing this for all of your tutorials is a troublesome procedure. Therefore, the tocDefinitions file contains the definition of your author name. Add it here once and afterwards just use the replace construction. For example here's the definition for my name: .. code-block:: rst   .. |Author_BernatG| unicode:: Bern U+00E1 t U+0020 G U+00E1 borThe ``|Author_BernatG|`` is the text definitions alias. I can use later this to add the definition, like I've done in the TOCs *Author* part. After the ``::`` and a space you start the definition. If you want to add an UNICODE character (non-ASCI) leave an empty space and specify it in the format U+(UNICODE code). To find the UNICODE code of a character I recommend using the `FileFormat <http://www.fileformat.info>`_ websites service. Spaces are trimmed from the definition, therefore we add a space by its UNICODE character (U+0020). Until the *raw* directive what you can see is a TOC tree entry. Here's how a TOC entry will look like: +  .. tabularcolumns:: m{100pt} m{300pt}  .. cssclass:: toctableopencv  =============== ======================================================   |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`                  *Compatibility:* > OpenCV 2.0                  *Author:* |Author_BernatG|                  You will learn how to store images in the memory and how to print out their content to the console.  =============== ======================================================  .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg                   :height: 90pt                   :width:  90ptAs you can see we have an image to the left and a description box to the right. To create two boxes we use a table with two columns and a single row. In the left column is the image and in the right one the description. However, the image directive is way too long to fit in a column. Therefore, we need to use the substitution definition system. We add this definition after the TOC tree. All images for the TOC tree are to be put in the images folder near its |reST|_ file. We use the point measurement system because we are also creating PDFs. PDFs are printable documents, where there is no such thing that pixels (px), just points (pt). And while generally space is no problem for web pages (we have monitors with **huge** resolutions) the size of the paper (A4 or letter) is constant and will be for a long time in the future. Therefore, size constrains come in play more like for the PDF, than the generated HTML code. Now your images should be as small as possible, while still offering the intended information for the user. Remember that the tutorial will become part of the OpenCV source code. If you add large images (that manifest in form of large image size) it will just increase the size of the repository pointlessly. If someone wants to download it later, its download time will be that much longer. Not to mention the larger PDF size for the tutorials and the longer load time for the web pages. In terms of pixels a TOC image should not be larger than 120 X 120 pixels. Resize your images if they are larger! .. note::   If you add a larger image and specify a smaller image size, *Sphinx* will not resize that. At build time will add the full size image and the resize will be done by your browser after the image is loaded. A 120 X 120 image is somewhere below 10KB. If you add a 110KB image, you have just pointlessly added a 100KB extra data to transfer over the internet for every user!Generally speaking you shouldn't need to specify your images size (excluding the TOC entries). If no such is found *Sphinx* will use the size of the image itself (so no resize occurs). Then again if for some reason you decide to specify a size that should be the **width** of the image rather than its height. The reason for this again goes back to the PDFs. On a PDF page the height is larger than the width. In the PDF the images will not be resized. If you specify a size that does not fit in the page, then what does not fits in **will be cut off**. When creating your images for your tutorial you should try to keep the image widths below 500 pixels, and calculate with around 400 point page width when specifying image widths. The image format depends on the content of the image. If you have some complex scene (many random like colors) then use *jpg*. Otherwise, prefer using *png*. They are even some tools out there that optimize the size of *PNG* images, such as `PNGGauntlet <http://pnggauntlet.com/>`_. Use them to make your images as small as possible in size. Now on the right side column of the table we add the information about the tutorial: .. container:: enumeratevisibleitemswithsquare   + In the first line it is the title of the tutorial. However, there is no need to specify it explicitly. We use the reference system. We'll start up our tutorial with a reference specification, just like in case of this TOC entry with its  `` .. _Table-Of-Content-Section:`` . If after this you have a title (pointed out by the following line of -), then Sphinx will replace the ``:ref:`Table-Of-Content-Section``` directive with the tile of the section in reference form (creates a link in web page). Here's how the definition looks in my case:      .. code-block:: rst        .. _matTheBasicImageContainer:           Mat - The Basic Image Container           *******************************     Note, that according to the |reST|_ rules the * should be as long as your title.    + Compatibility. What version of OpenCV is required to run your sample code.    + Author. Use the substitution markup of |reST|_.    + A short sentence describing the essence of your tutorial. Now before each TOC entry you need to add the three lines of: .. code-block:: cpp   +      .. tabularcolumns:: m{100pt} m{300pt}     .. cssclass:: toctableopencvThe plus sign (+) is to enumerate tutorials by using bullet points. So for every TOC entry we have a corresponding bullet point represented by the +. Sphinx is highly indenting sensitive. Indentation is used to express from which point until to which point does a construction last. Un-indentation means end of that construction. So to keep all the bullet points to the same group the following TOC entries (until the next +) should be indented by two spaces. Here, I should also mention that **always** prefer using spaces instead of tabs. Working with only spaces makes possible that if we both use monotype fonts we will see the same thing. Tab size is text editor dependent and as should be avoided. *Sphinx* translates all tabs into 8 spaces before interpreting it. It turns out that the automatic formatting of both the HTML and PDF(LATEX) system messes up our tables. Therefore, we need to help them out a little. For the PDF generation we add the ``.. tabularcolumns:: m{100pt} m{300pt}`` directive. This means that the first column should be 100 points wide and middle aligned. For the HTML look we simply name the following table of a *toctableopencv* class type. Then, we can modify the look of the table by modifying the CSS of our web page. The CSS definitions go into the :file:`opencv/doc/_themes/blue/static/default.css_t` file. .. code-block:: css   .toctableopencv   {    width: 100% ;     table-layout: fixed;   }   .toctableopencv colgroup col:first-child   {    width: 100pt !important;    max-width: 100pt !important;    min-width: 100pt !important;   }   .toctableopencv colgroup col:nth-child(2)    {    width: 100% !important;   }However, you should not need to modify this. Just add these three lines (plus keep the two space indentation) for all TOC entries you add. At the end of the TOC file you'll find: .. code-block:: rst   .. raw:: latex      \pagebreak   .. toctree::      :hidden:      ../mat - the basic image container/mat - the basic image containerThe page break entry comes for separating sections and should be only one in a TOC tree |reST|_ file. Finally, at the end of the TOC tree we need to add our tutorial to the *Sphinx* TOC tree system. *Sphinx* will generate from this the previous-next-up information for the HTML file and add items to the PDF according to the order here. By default this TOC tree directive generates a simple table of contents. However, we already created a fancy looking one so we no longer need this basic one. Therefore, we add the *hidden* option to do not show it. The path is of a relative type. We step back in the file system and then go into the :file:`mat - the basic image container` directory for the :file:`mat - the basic image container.rst` file. Putting out the *rst* extension for the file is optional. Write the tutorial==================Create a folder with the name of your tutorial. Preferably, use small letters only. Then create a text file in this folder with *rst* extension and the same name. If you have images for the tutorial create an :file:`images` folder and add your images there. When creating your images follow the guidelines described in the previous part!Now here's our recommendation for the structure of the tutorial (although, remember that this is not carved in the stone; if you have a better idea, use it!): .. container:: enumeratevisibleitemswithsquare   + Create the reference point and the title.      .. code-block:: rst        .. _matTheBasicImageContainer:        Mat - The Basic Image Container        *******************************     You start the tutorial by specifying a reference point by the ``.. _matTheBasicImageContainer:`` and then its title. The name of the reference point should be a unique one over the whole documentation. Therefore, do not use general names like *tutorial1*. Use the * character to underline the title for its full width. The subtitles of the tutorial should be underlined with = charachter.   + Goals. You start your tutorial by specifying what you will present. You can also enumerate the sub jobs to be done. For this you can use a bullet point construction. There is a single configuration file for both the reference manual and the tutorial documentation. In the reference manuals at the argument enumeration we do not want any kind of bullet point style enumeration. Therefore, by default all the bullet points at this level are set to do not show the dot before the entries in the HTML. You can override this by putting the bullet point in a container. I've defined a square type bullet point view under the name *enumeratevisibleitemswithsquare*. The CSS style definition for this is again in the  :file:`opencv\doc\_themes\blue\static\default.css_t` file. Here's a quick example of using it:      .. code-block:: rst        .. container:: enumeratevisibleitemswithsquare           + Create the reference point and the title.            + Second entry           + Third entry     Note that you need the keep the indentation of the container directive. Directive indentations are always three (3) spaces. Here you may even give usage tips for your sample code.    + Source code. Present your samples code to the user. It's a good idea to offer a quick download link for the HTML page by using the *download* directive and pointing out where the user may find your source code in the file system by using the *file* directive:      .. code-block:: rst        Text :file:`samples/cpp/tutorial_code/highgui/video-write/` folder of the OpenCV source library        or :download:`text to appear in the webpage         <../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp>`.     For the download link the path is a relative one, hence the multiple back stepping operations (..). Then you can add the source code either by using the *code block* directive or the *literal include* one. In case of the code block you will need to actually add all the source code text into your |reST|_ text and also apply the required indentation:      .. code-block:: rst        .. code-block:: cpp            int i = 0;            l = ++j;      The only argument of the directive is the language used (here CPP). Then you add the source code into its content (meaning one empty line after the directive) by keeping the indentation of the directive (3 spaces). With the *literal include* directive you do not need to add the source code of the sample. You just specify the sample and *Sphinx* will load it for you, during build time. Here's an example usage:      .. code-block:: rst        .. literalinclude:: ../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp           :language: cpp           :linenos:           :tab-width: 4           :lines: 1-8, 21-22, 24-     After the directive you specify a relative path to the file from what to import. It has four options: the language to use, if you add the ``:linenos:`` the line numbers will be shown, you can specify the tab size with the ``:tab-width:`` and you do not need to load the whole file, you can show just the important lines. Use the *lines* option to do not show redundant information (such as the *help* function). Here basically you specify ranges, if the second range line number is missing than that means that until the end of the file. The ranges specified here do no need to be in an ascending order, you may even reorganize the structure of how you want to show your sample inside the tutorial.   + The tutorial. Well here goes the explanation for why and what have you used. Try to be short, clear, concise and yet a thorough one. There's no magic formula. Look into a few already made tutorials and start out from there. Try to mix sample OpenCV code with your explanations. If with words is hard to describe something do not hesitate to add in a reasonable size image, to overcome this issue.     When you present OpenCV functionality it's a good idea to give a link to the used OpenCV data structure or function. Because the OpenCV tutorials and reference manual are in separate PDF files it is not possible to make this link work for the PDF format. Therefore, we use here only web page links to the **opencv.itseez.com** website. The OpenCV functions and data structures may be used for multiple tasks. Nevertheless, we want to avoid that every users creates its own reference to a commonly used function. So for this we use the global link collection of *Sphinx*. This is defined in the file:`opencv/doc/conf.py` configuration file. Open it and go all the way down to the last entry:      .. code-block:: py       # ---- External links for tutorials -----------------       extlinks = {           'huivideo' : ('http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None)           }     In short here we defined a new **huivideo** directive that refers to an external webpage link. Its usage is:      .. code-block:: rst       A sample function of the highgui modules image write and read page is the :huivideo:`imread() function <imread>`.      Which turns to: A sample function of the highgui modules image write and read page is the :huivideo:`imread() function <imread>`. The argument you give between the <> will be put in place of the ``%s`` in the upper definition, and as the link will anchor to the correct function. To find out the anchor of a given function just open up a web page, search for the function and click on it. In the address bar it should appear like: ``http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#imread`` .  Look here for the name of the directives for each page of the OpenCV reference manual. If none present for one of them feel free to add one for it.      For formulas you can add LATEX code that will translate in the web pages into images. You do this by using the *math* directive. A usage tip:      .. code-block:: latex        .. math::           MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}     That after build turns into:      .. math::        MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}     You can even use it inline as ``:math:` MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}``` that turns into :math:`MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}`.      If you use some crazy LATEX library extension you need to add those to the ones to use at build time. Look into the file:`opencv/doc/conf.py` configuration file for more information on this.   + Results. Well, here depending on your program show one of more of the following:      - Console outputs by using the code block directive.      - Output images.      - Runtime videos, visualization. For this use your favorite screens capture software. `Camtasia Studio <http://www.techsmith.com/camtasia/>`_ certainly is one of the better choices, however their prices are out of this world. `CamStudio <http://camstudio.org/>`_ is a free alternative, but less powerful. If you do a video you can upload it to YouTube and then use the raw directive with HTML option to embed it into the generated web page:        .. code-block:: rst           You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.           .. raw:: html             <div align="center">             <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>             </div>       This results in the text and video: You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.        .. raw:: html          <div align="center">          <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>          </div>     When these aren't self-explanatory make sure to throw in a few guiding lines about what and why we can see.   + Build the documentation and check for errors or warnings. In the CMake make sure you check or pass the option for building documentation. Then simply build the **docs** project for the PDF file and the **docs_html** project for the web page. Read the output of the build and check for errors/warnings for what you have added. This is also the time to observe and correct any kind of *not so good looking* parts. Remember to keep clean our build logs.    + Read again your tutorial and check for both programming and spelling errors. If found any, please correct them.Take home the pride and joy of a job well done!===============================================Once you are done contact me or dr. Gary Bradski with the tutorial. We may submit the tutorial ourselves to the trunk branch of our repository or ask you to do so. Now, to see your work **live** you may need to wait some time. The PDFs are updated usually at the launch of a new OpenCV version. The web pages are a little more diverse. They are automatically rebuilt in each evening. However, the **opencv.itseez.com** website contains only the most recent **stable branch** of OpenCV. Currently this is 2.3. When we add something new (like a tutorial) that first goes to the **trunk branch** of our repository. A build of this you may find on the **opencv.itseez.com/trunk** website. Although, we try to make a build every night occasionally we might freeze any of the branches to fix upcoming issues. During this it may take a little longer to see your work *live*, however if you submited it, be sure that eventually it will show up. If you have any questions or advices relating to this tutorial you can contact me at -delete-bernat@-delete-primeranks.net. Of course, delete the -delete- parts of that e-mail address.
\ No newline at end of file
+.. _howToWriteTutorial:How to write a tutorial for OpenCV?***********************************Okay, so assume you have just finished a project of yours implementing something based on OpenCV and you want to present/share it with the community. Luckily, OpenCV is an *open source project*. This means that in theory anyone has access to the full source code and may extend it. While making a robust and practical library (like OpenCV) is great, the success of a library also depends on how user friendly it is. To improve on this aspect, the OpenCV team has already been listening to user feedback from its :opencv_group:`Yahoo user group <>` and by making samples you can find in the source directories sample folder. The addition of the tutorials (in both online and PDF format) is an extension of these efforts.Goal====.. _reST: http://docutils.sourceforge.net/rst.html.. |reST| replace:: reStructuredText.. |Sphinx| replace:: Sphinx.. _Sphinx: http://sphinx.pocoo.org/The tutorials are just as an important part of the library as  the implementation of those crafty data structures and algorithms you can find in OpenCV. Therefore, the source codes for the tutorials are part of the library. And yes, I meant source codes. The reason for this formulation is that the tutorials are written by using the |Sphinx|_ documentation generation system. This is based on the popular python documentation system called |reST|_ (reST). ReStructuredText is a really neat language that by using a few simple conventions (indentation, directives) and emulating old school e-mail writing techniques (text only) tries to offer a simple way to create and edit documents. Sphinx extends this with some new features and creates the resulting document in both HTML (for web) and PDF (for offline usage) format.Usually, an OpenCV tutorial has the following parts:1. A source code demonstration of an OpenCV feature:   a. One or more CPP, Python, Java or other type of files depending for what OpenCV offers support and for what language you make the tutorial.   #. Occasionaly, input resource files required for running your tutorials application.#. A table of content entry (so people may easily find the tutorial):   a. Adding your stuff to the tutorials table of content (**reST** file).   #. Add an image file near the TOC entry.#. The content of the tutorial itself:   a. The **reST** text of the tutorial   #. Images following the idea that "*A picture is worth a thousand words*".   #. For more complex demonstrations you may create a video.As you can see you will need at least some basic knowledge of the *reST* system in order to complete the task at hand with success. However, don't worry *reST* (and *Sphinx*) was made with simplicity in mind. It is easy to grasp its basics. I found that the `OpenAlea documentations introduction on this subject <http://openalea.gforge.inria.fr/doc/openalea/doc/_build/html/source/tutorial/rest_syntax.html>`_ (or the `Thomas Cokelaer one <http://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html>`_ ) should enough for this. If for some directive or feature you need a more in-depth description look it up in the official |reST|_ help files or at the |Sphinx|_ documentation.In our world achieving some tasks is possible in multiple ways. However, some of the roads to take may have obvious or hidden advantages over others. Then again, in some other cases it may come down to just simple user preference. Here, I'll present how I decided to write the tutorials, based on my personal experience. If for some of them you know a better solution and you can back it up feel free to use that. I've nothing against it, as long as it gets the job done in an elegant fashion.Now the best would be if you could make the integration yourself. For this you need first to have the source code. I recommend following the guides for your operating system on acquiring OpenCV sources. For Linux users look :ref:`here <Linux-Installation>` and for :ref:`Windows here <Windows_Installation>`. You must also install python and sphinx with its dependencies in order to be able to build the documentation.Once you have downloaded the repository to your hard drive you can take a look in the OpenCV directory to make sure you have both the samples and doc folder present. Anyone may download the trunk source files from  :file:`git://code.opencv.org/opencv.git` . Nevertheless, not everyone has upload (commit/submit) rights. This is to protect the integrity of the library. If you plan doing more than one tutorial, and would like to have an account with commit user rights you should first register an account at http://code.opencv.org/ and then contact dr. Gary Bradski at -delete-bradski@-delete-willowgarage.com. Otherwise, you can just send the resulting files to us via the :opencv_group:`Yahoo user group <>` or to me at -delete-bernat@-delete-primeranks.net and I'll add it. If you have questions, suggestions or constructive critics I will gladly listen to them. If you send it to the OpenCV group please tag its subject with a **[Tutorial]** entry.Format the Source Code======================Before I start this let it be clear: the main goal is to have a working sample code. However, for your tutorial to be of a top notch quality you should follow a few guide lines I am going to present here.In case you have an application by using the older interface (with *IplImage*, *CVMat*, *cvLoadImage* and such) consider migrating it to the new C++ interface. The tutorials are intended to be an up to date help for our users. And as of OpenCV 2 the OpenCV emphasis on using the less error prone and clearer C++ interface. Therefore, if possible please convert your code to the C++ interface. For this it may help to read the :ref:`InteroperabilityWithOpenCV1` tutorial. However, once you have an OpenCV 2 working code, then you should make your source code snippet as easy to read as possible. Here're a couple of advices for this:.. container:: enumeratevisibleitemswithsquare   + Add a standard output with the description of what your program does. Keep it short and yet, descriptive. This output is at the start of the program. In my example files this usually takes the form of a *help* function containing the output. This way both the source file viewer and application runner can see what all is about in your sample. Here's an instance of this:     .. code-block:: cpp        void help()        {        cout        << "--------------------------------------------------------------------------"   << endl        << "This program shows how to write video files. You can extract the R or G or B color channel "        << " of the input video. You can choose to use the source codec (Y) or select a custom one. (N)"<< endl        << "Usage:"                                                                       << endl        << "./video-write inputvideoName [ R | G | B] [Y | N]"                            << endl        << "--------------------------------------------------------------------------"   << endl        << endl;        }        // ...        int main(int argc, char *argv[], char *window_name)        {        help();        // here comes the actual source code        }     Additionally, finalize the description with a short usage guide. This way the user will know how to call your programs, what leads us to the next point.   + Prefer command line argument controlling instead of hard coded one. If your program has some variables that may be changed use command line arguments for this. The tutorials, can be a simple try-out ground for the user. If you offer command line controlling for the input image (for example), then you offer the possibility for the user to try it out with his/her own images, without the need to mess in the source code. In the upper example you can see that the input image, channel and codec selection may all be changed from the command line. Just compile the program and run it with your own input arguments.   + Be as verbose as possible. There is no shame in filling the source code with comments. This way the more advanced user may figure out what's happening right from the sample code. This advice goes for the output console too. Specify to the user what's happening. Never leave the user hanging there and thinking on: "Is this program now crashing or just doing some computationally intensive task?." So, if you do a training task that may take some time, make sure you print out a message about this before starting and after finishing it.   + Throw out unnecessary stuff from your source code. This is a warning to not take the previous point too seriously. Balance is the key. If it's something that can be done in a fewer lines or simpler than that's the way you should do it. Nevertheless, if for some reason you have such sections notify the user why you have chosen to do so. Keep the amount of information as low as possible, while still getting the job done in an elegant way.   + Put your sample file into the :file:`opencv/samples/cpp/tutorial_code/sectionName` folder. If you write a tutorial for other languages than cpp, then change that part of the path. Before completing this you need to decide that to what section (module) does your tutorial goes. Think about on what module relies most heavily your code and that is the one to use. If the answer to this question is more than one modules then the *general* section is the one to use. For finding the *opencv* directory open up your file system and navigate where you downloaded our repository.   + If the input resources are hard to acquire for the end user consider adding a few of them to the :file:`opencv/samples/cpp/tutorial_code/images`. Make sure that who reads your code can try it out!Add the TOC entry=================For this you will need to know some |reST|_. There is no going around this. |reST|_ files have **rst** extensions. However, these are simple text files. Use any text editor you like. Finding a text editor that offers syntax highlighting for |reST|_ was quite a challenge at the time of writing this tutorial. In my experience, `Intype <http://intype.info/>`_ is a solid option on Windows, although there is still place for improvement.Adding your source code to a table of content is important for multiple reasons. First and foremost this will allow for the user base to find your tutorial from our websites tutorial table of content. Secondly, if you omit this *Sphinx* will throw a warning that your tutorial file isn't part of any TOC tree entry. And there is nothing more than the developer team hates than an ever increasing warning/error list for their builds. *Sphinx* also uses this to build up the previous-back-up buttons on the website. Finally, omitting this step will lead to that your tutorial will **not** be added to the PDF version of the tutorials.Navigate to the :file:`opencv/doc/tutorials/section/table_of_content_section` folder (where the section is the module to which you're adding the tutorial). Open the *table_of_content_section* file. Now this may have two forms. If no prior tutorials are present in this section that there is a template message about this and has the following form:.. code-block:: rst  .. _Table-Of-Content-Section:   Section title   -----------------------------------------------------------   Description about the section.   .. include:: ../../definitions/noContent.rst   .. raw:: latex      \pagebreakThe first line is a reference to the section title in the reST system. The section title will be a link and you may refer to it via the ``:ref:`` directive. The *include* directive imports the template text from the definitions directories *noContent.rst* file. *Sphinx* does not creates the PDF from scratch. It does this by first creating a latex file. Then creates the PDF from the latex file. With the *raw* directive you can directly add to this output commands. Its unique argument is for what kind of output to add the content of the directive. For the PDFs it may happen that multiple sections will overlap on a single page. To avoid this at the end of the TOC we add a *pagebreak* latex command, that hints to the LATEX system that the next line should be on a new page.If you have one of this, try to transform it to the following form:.. include:: ../../definitions/tocDefinitions.rst.. code-block:: rst   .. _Table-Of-Content-Section:   Section title   -----------------------------------------------------------   .. include:: ../../definitions/tocDefinitions.rst   +     .. tabularcolumns:: m{100pt} m{300pt}     .. cssclass:: toctableopencv     =============== ======================================================      |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`                     *Compatibility:* > OpenCV 2.0                     *Author:* |Author_BernatG|                     You will learn how to store images in the memory and how to print out their content to the console.     =============== =====================================================     .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg                      :height: 90pt                      :width:  90pt   .. raw:: latex      \pagebreak   .. toctree::      :hidden:      ../mat - the basic image container/mat - the basic image containerIf this is already present just add a new section of the content between the include and the raw directives (excluding those lines). Here you'll see a new include directive. This should be present only once in a TOC tree and the reST file contains the definitions of all the authors contributing to the OpenCV tutorials. We are a multicultural community and some of our name may contain some funky characters. However, reST **only supports** ANSI characters. Luckily we can specify Unicode characters with the *unicode* directive. Doing this for all of your tutorials is a troublesome procedure. Therefore, the tocDefinitions file contains the definition of your author name. Add it here once and afterwards just use the replace construction. For example here's the definition for my name:.. code-block:: rst   .. |Author_BernatG| unicode:: Bern U+00E1 t U+0020 G U+00E1 borThe ``|Author_BernatG|`` is the text definitions alias. I can use later this to add the definition, like I've done in the TOCs *Author* part. After the ``::`` and a space you start the definition. If you want to add an UNICODE character (non-ASCI) leave an empty space and specify it in the format U+(UNICODE code). To find the UNICODE code of a character I recommend using the `FileFormat <http://www.fileformat.info>`_ websites service. Spaces are trimmed from the definition, therefore we add a space by its UNICODE character (U+0020).Until the *raw* directive what you can see is a TOC tree entry. Here's how a TOC entry will look like:+  .. tabularcolumns:: m{100pt} m{300pt}  .. cssclass:: toctableopencv  =============== ======================================================   |MatBasicIma|  **Title:** :ref:`matTheBasicImageContainer`                  *Compatibility:* > OpenCV 2.0                  *Author:* |Author_BernatG|                  You will learn how to store images in the memory and how to print out their content to the console.  =============== ======================================================  .. |MatBasicIma| image:: images/matTheBasicImageStructure.jpg                   :height: 90pt                   :width:  90ptAs you can see we have an image to the left and a description box to the right. To create two boxes we use a table with two columns and a single row. In the left column is the image and in the right one the description. However, the image directive is way too long to fit in a column. Therefore, we need to use the substitution definition system. We add this definition after the TOC tree. All images for the TOC tree are to be put in the images folder near its |reST|_ file. We use the point measurement system because we are also creating PDFs. PDFs are printable documents, where there is no such thing that pixels (px), just points (pt). And while generally space is no problem for web pages (we have monitors with **huge** resolutions) the size of the paper (A4 or letter) is constant and will be for a long time in the future. Therefore, size constrains come in play more like for the PDF, than the generated HTML code.Now your images should be as small as possible, while still offering the intended information for the user. Remember that the tutorial will become part of the OpenCV source code. If you add large images (that manifest in form of large image size) it will just increase the size of the repository pointlessly. If someone wants to download it later, its download time will be that much longer. Not to mention the larger PDF size for the tutorials and the longer load time for the web pages. In terms of pixels a TOC image should not be larger than 120 X 120 pixels. Resize your images if they are larger!.. note::   If you add a larger image and specify a smaller image size, *Sphinx* will not resize that. At build time will add the full size image and the resize will be done by your browser after the image is loaded. A 120 X 120 image is somewhere below 10KB. If you add a 110KB image, you have just pointlessly added a 100KB extra data to transfer over the internet for every user!Generally speaking you shouldn't need to specify your images size (excluding the TOC entries). If no such is found *Sphinx* will use the size of the image itself (so no resize occurs). Then again if for some reason you decide to specify a size that should be the **width** of the image rather than its height. The reason for this again goes back to the PDFs. On a PDF page the height is larger than the width. In the PDF the images will not be resized. If you specify a size that does not fit in the page, then what does not fits in **will be cut off**. When creating your images for your tutorial you should try to keep the image widths below 500 pixels, and calculate with around 400 point page width when specifying image widths.The image format depends on the content of the image. If you have some complex scene (many random like colors) then use *jpg*. Otherwise, prefer using *png*. They are even some tools out there that optimize the size of *PNG* images, such as `PNGGauntlet <http://pnggauntlet.com/>`_. Use them to make your images as small as possible in size.Now on the right side column of the table we add the information about the tutorial:.. container:: enumeratevisibleitemswithsquare   + In the first line it is the title of the tutorial. However, there is no need to specify it explicitly. We use the reference system. We'll start up our tutorial with a reference specification, just like in case of this TOC entry with its  `` .. _Table-Of-Content-Section:`` . If after this you have a title (pointed out by the following line of -), then Sphinx will replace the ``:ref:`Table-Of-Content-Section``` directive with the tile of the section in reference form (creates a link in web page). Here's how the definition looks in my case:     .. code-block:: rst        .. _matTheBasicImageContainer:           Mat - The Basic Image Container           *******************************     Note, that according to the |reST|_ rules the * should be as long as your title.   + Compatibility. What version of OpenCV is required to run your sample code.   + Author. Use the substitution markup of |reST|_.   + A short sentence describing the essence of your tutorial.Now before each TOC entry you need to add the three lines of:.. code-block:: cpp   +     .. tabularcolumns:: m{100pt} m{300pt}     .. cssclass:: toctableopencvThe plus sign (+) is to enumerate tutorials by using bullet points. So for every TOC entry we have a corresponding bullet point represented by the +. Sphinx is highly indenting sensitive. Indentation is used to express from which point until to which point does a construction last. Un-indentation means end of that construction. So to keep all the bullet points to the same group the following TOC entries (until the next +) should be indented by two spaces.Here, I should also mention that **always** prefer using spaces instead of tabs. Working with only spaces makes possible that if we both use monotype fonts we will see the same thing. Tab size is text editor dependent and as should be avoided. *Sphinx* translates all tabs into 8 spaces before interpreting it.It turns out that the automatic formatting of both the HTML and PDF(LATEX) system messes up our tables. Therefore, we need to help them out a little. For the PDF generation we add the ``.. tabularcolumns:: m{100pt} m{300pt}`` directive. This means that the first column should be 100 points wide and middle aligned. For the HTML look we simply name the following table of a *toctableopencv* class type. Then, we can modify the look of the table by modifying the CSS of our web page. The CSS definitions go into the :file:`opencv/doc/_themes/blue/static/default.css_t` file... code-block:: css   .toctableopencv   {    width: 100% ;    table-layout: fixed;   }   .toctableopencv colgroup col:first-child   {    width: 100pt !important;    max-width: 100pt !important;    min-width: 100pt !important;   }   .toctableopencv colgroup col:nth-child(2)   {    width: 100% !important;   }However, you should not need to modify this. Just add these three lines (plus keep the two space indentation) for all TOC entries you add. At the end of the TOC file you'll find:.. code-block:: rst   .. raw:: latex      \pagebreak   .. toctree::      :hidden:      ../mat - the basic image container/mat - the basic image containerThe page break entry comes for separating sections and should be only one in a TOC tree |reST|_ file. Finally, at the end of the TOC tree we need to add our tutorial to the *Sphinx* TOC tree system. *Sphinx* will generate from this the previous-next-up information for the HTML file and add items to the PDF according to the order here. By default this TOC tree directive generates a simple table of contents. However, we already created a fancy looking one so we no longer need this basic one. Therefore, we add the *hidden* option to do not show it.The path is of a relative type. We step back in the file system and then go into the :file:`mat - the basic image container` directory for the :file:`mat - the basic image container.rst` file. Putting out the *rst* extension for the file is optional.Write the tutorial==================Create a folder with the name of your tutorial. Preferably, use small letters only. Then create a text file in this folder with *rst* extension and the same name. If you have images for the tutorial create an :file:`images` folder and add your images there. When creating your images follow the guidelines described in the previous part!Now here's our recommendation for the structure of the tutorial (although, remember that this is not carved in the stone; if you have a better idea, use it!):.. container:: enumeratevisibleitemswithsquare   + Create the reference point and the title.     .. code-block:: rst        .. _matTheBasicImageContainer:        Mat - The Basic Image Container        *******************************     You start the tutorial by specifying a reference point by the ``.. _matTheBasicImageContainer:`` and then its title. The name of the reference point should be a unique one over the whole documentation. Therefore, do not use general names like *tutorial1*. Use the * character to underline the title for its full width. The subtitles of the tutorial should be underlined with = charachter.   + Goals. You start your tutorial by specifying what you will present. You can also enumerate the sub jobs to be done. For this you can use a bullet point construction. There is a single configuration file for both the reference manual and the tutorial documentation. In the reference manuals at the argument enumeration we do not want any kind of bullet point style enumeration. Therefore, by default all the bullet points at this level are set to do not show the dot before the entries in the HTML. You can override this by putting the bullet point in a container. I've defined a square type bullet point view under the name *enumeratevisibleitemswithsquare*. The CSS style definition for this is again in the  :file:`opencv\doc\_themes\blue\static\default.css_t` file. Here's a quick example of using it:     .. code-block:: rst        .. container:: enumeratevisibleitemswithsquare           + Create the reference point and the title.           + Second entry           + Third entry     Note that you need the keep the indentation of the container directive. Directive indentations are always three (3) spaces. Here you may even give usage tips for your sample code.   + Source code. Present your samples code to the user. It's a good idea to offer a quick download link for the HTML page by using the *download* directive and pointing out where the user may find your source code in the file system by using the *file* directive:     .. code-block:: rst        Text :file:`samples/cpp/tutorial_code/highgui/video-write/` folder of the OpenCV source library        or :download:`text to appear in the webpage        <../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp>`.     For the download link the path is a relative one, hence the multiple back stepping operations (..). Then you can add the source code either by using the *code block* directive or the *literal include* one. In case of the code block you will need to actually add all the source code text into your |reST|_ text and also apply the required indentation:     .. code-block:: rst        .. code-block:: cpp           int i = 0;           l = ++j;     The only argument of the directive is the language used (here CPP). Then you add the source code into its content (meaning one empty line after the directive) by keeping the indentation of the directive (3 spaces). With the *literal include* directive you do not need to add the source code of the sample. You just specify the sample and *Sphinx* will load it for you, during build time. Here's an example usage:     .. code-block:: rst        .. literalinclude:: ../../../../samples/cpp/tutorial_code/HighGUI/video-write/video-write.cpp           :language: cpp           :linenos:           :tab-width: 4           :lines: 1-8, 21-22, 24-     After the directive you specify a relative path to the file from what to import. It has four options: the language to use, if you add the ``:linenos:`` the line numbers will be shown, you can specify the tab size with the ``:tab-width:`` and you do not need to load the whole file, you can show just the important lines. Use the *lines* option to do not show redundant information (such as the *help* function). Here basically you specify ranges, if the second range line number is missing than that means that until the end of the file. The ranges specified here do no need to be in an ascending order, you may even reorganize the structure of how you want to show your sample inside the tutorial.   + The tutorial. Well here goes the explanation for why and what have you used. Try to be short, clear, concise and yet a thorough one. There's no magic formula. Look into a few already made tutorials and start out from there. Try to mix sample OpenCV code with your explanations. If with words is hard to describe something do not hesitate to add in a reasonable size image, to overcome this issue.     When you present OpenCV functionality it's a good idea to give a link to the used OpenCV data structure or function. Because the OpenCV tutorials and reference manual are in separate PDF files it is not possible to make this link work for the PDF format. Therefore, we use here only web page links to the **opencv.itseez.com** website. The OpenCV functions and data structures may be used for multiple tasks. Nevertheless, we want to avoid that every users creates its own reference to a commonly used function. So for this we use the global link collection of *Sphinx*. This is defined in the file:`opencv/doc/conf.py` configuration file. Open it and go all the way down to the last entry:     .. code-block:: py       # ---- External links for tutorials -----------------       extlinks = {           'huivideo' : ('http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#%s', None)           }     In short here we defined a new **huivideo** directive that refers to an external webpage link. Its usage is:     .. code-block:: rst       A sample function of the highgui modules image write and read page is the :huivideo:`imread() function <imread>`.     Which turns to: A sample function of the highgui modules image write and read page is the :huivideo:`imread() function <imread>`. The argument you give between the <> will be put in place of the ``%s`` in the upper definition, and as the link will anchor to the correct function. To find out the anchor of a given function just open up a web page, search for the function and click on it. In the address bar it should appear like: ``http://opencv.itseez.com/modules/highgui/doc/reading_and_writing_images_and_video.html#imread`` .  Look here for the name of the directives for each page of the OpenCV reference manual. If none present for one of them feel free to add one for it.     For formulas you can add LATEX code that will translate in the web pages into images. You do this by using the *math* directive. A usage tip:     .. code-block:: latex        .. math::           MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}     That after build turns into:     .. math::        MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}     You can even use it inline as ``:math:` MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}``` that turns into :math:`MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}`.     If you use some crazy LATEX library extension you need to add those to the ones to use at build time. Look into the file:`opencv/doc/conf.py` configuration file for more information on this.   + Results. Well, here depending on your program show one of more of the following:     - Console outputs by using the code block directive.     - Output images.     - Runtime videos, visualization. For this use your favorite screens capture software. `Camtasia Studio <http://www.techsmith.com/camtasia/>`_ certainly is one of the better choices, however their prices are out of this world. `CamStudio <http://camstudio.org/>`_ is a free alternative, but less powerful. If you do a video you can upload it to YouTube and then use the raw directive with HTML option to embed it into the generated web page:       .. code-block:: rst          You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.          .. raw:: html             <div align="center">             <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>             </div>       This results in the text and video: You may observe a runtime instance of this on the `YouTube here <https://www.youtube.com/watch?v=jpBwHxsl1_0>`_.       .. raw:: html          <div align="center">          <iframe title="Creating a video with OpenCV" width="560" height="349" src="http://www.youtube.com/embed/jpBwHxsl1_0?rel=0&loop=1" frameborder="0" allowfullscreen align="middle"></iframe>          </div>     When these aren't self-explanatory make sure to throw in a few guiding lines about what and why we can see.   + Build the documentation and check for errors or warnings. In the CMake make sure you check or pass the option for building documentation. Then simply build the **docs** project for the PDF file and the **docs_html** project for the web page. Read the output of the build and check for errors/warnings for what you have added. This is also the time to observe and correct any kind of *not so good looking* parts. Remember to keep clean our build logs.   + Read again your tutorial and check for both programming and spelling errors. If found any, please correct them.Take home the pride and joy of a job well done!===============================================Once you are done contact me or dr. Gary Bradski with the tutorial. We may submit the tutorial ourselves to the trunk branch of our repository or ask you to do so.Now, to see your work **live** you may need to wait some time. The PDFs are updated usually at the launch of a new OpenCV version. The web pages are a little more diverse. They are automatically rebuilt in each evening. However, the **opencv.itseez.com** website contains only the most recent **stable branch** of OpenCV. Currently this is 2.3. When we add something new (like a tutorial) that first goes to the **trunk branch** of our repository. A build of this you may find on the **opencv.itseez.com/trunk** website. Although, we try to make a build every night occasionally we might freeze any of the branches to fix upcoming issues. During this it may take a little longer to see your work *live*, however if you submited it, be sure that eventually it will show up.If you have any questions or advices relating to this tutorial you can contact me at -delete-bernat@-delete-primeranks.net. Of course, delete the -delete- parts of that e-mail address.
\ No newline at end of file
diff --git a/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst b/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst
index a77612e955..f7e186dc31 100644
--- a/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst
+++ b/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst
@@ -29,7 +29,7 @@ Here you can read tutorials about how to set up your computer to work with the O
 
   .. tabularcolumns:: m{100pt} m{300pt}
   .. cssclass:: toctableopencv
-  
+
   =========== ======================================================
    |Usage_1|  **Title:** :ref:`Linux_GCC_Usage`
 
@@ -47,7 +47,7 @@ Here you can read tutorials about how to set up your computer to work with the O
 
   .. tabularcolumns:: m{100pt} m{300pt}
   .. cssclass:: toctableopencv
-  
+
   =========== ======================================================
    |Usage_2|  **Title:** :ref:`Linux_Eclipse_Usage`
 
@@ -67,7 +67,7 @@ Here you can read tutorials about how to set up your computer to work with the O
 
   .. tabularcolumns:: m{100pt} m{300pt}
   .. cssclass:: toctableopencv
-    
+
   =========== ======================================================
   |WinInstal| **Title:** :ref:`Windows_Installation`
 
@@ -85,7 +85,7 @@ Here you can read tutorials about how to set up your computer to work with the O
 
   .. tabularcolumns:: m{100pt} m{300pt}
   .. cssclass:: toctableopencv
-  
+
   =========== ======================================================
   |WinVSHowT| **Title:** :ref:`Windows_Visual_Studio_How_To`
 
@@ -93,7 +93,7 @@ Here you can read tutorials about how to set up your computer to work with the O
 
               *Author:* |Author_BernatG|
 
-              You will learn what steps you need to perform in order to use the OpenCV library inside a new Microsoft Visual Studio project. 
+              You will learn what steps you need to perform in order to use the OpenCV library inside a new Microsoft Visual Studio project.
 
   =========== ======================================================
 
@@ -105,8 +105,8 @@ Here you can read tutorials about how to set up your computer to work with the O
 
   .. tabularcolumns:: m{100pt} m{300pt}
   .. cssclass:: toctableopencv
-    
-  ================ ======================================================
+
+  ================ =================================================
   |AndroidBinPack| **Title:** :ref:`Android_Binary_Package`
 
                    *Compatibility:* > OpenCV 2.3.1
@@ -115,17 +115,13 @@ Here you can read tutorials about how to set up your computer to work with the O
 
                    You will learn how to setup OpenCV for Android platform!
 
-  ================ ======================================================
-
-     .. |AndroidBinPack| image:: images/android_logo.png
-                           :height: 90pt
-                           :width:  90pt
+  ================ =================================================
 
   .. tabularcolumns:: m{100pt} m{300pt}
   .. cssclass:: toctableopencv
-    
-  ================ ======================================================
-  |AndroidNDKPack| **Title:** :ref:`Android_Binary_Package_with_NDK`
+
+  ================ =================================================
+  |AndroidBinPack| **Title:** :ref:`Android_Binary_Package_with_NDK`
 
                    *Compatibility:* > OpenCV 2.3.1
 
@@ -133,33 +129,83 @@ Here you can read tutorials about how to set up your computer to work with the O
 
                    You will learn how to work with C++ OpenCV code for Android platform
 
-  ================ ======================================================
+  ================ =================================================
 
-     .. |AndroidNDKPack| image:: images/android_logo.png
+     .. |AndroidBinPack| image:: images/android_logo.png
                            :height: 90pt
                            :width:  90pt
 
+
+* **Android** tutorials v2 [in progress]
+
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
+  ================ =================================================
+  |AndroidLogo|    **Title:** :ref:`Android_Dev_Intro`
+
+                   *Compatibility:* > OpenCV 2.4.2
+
+                   *Author:* |Author_VsevolodG|
+
+                   Not a tutorial, but a guide introducing Android development basics and environment setup
+
+  ================ =================================================
+
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
+  ================ =================================================
+  |AndroidLogo|    **Title:** :ref:`O4A_SDK`
+
+                   *Compatibility:* > OpenCV 2.4.2
+
+                   *Author:* |Author_VsevolodG|
+
+                   OpenCV4Android SDK: general info, installation, running samples
+
+  ================ =================================================
+
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
+  ================ =================================================
+  |AndroidLogo|    **Title:** :ref:`dev_with_OCV_on_Android`
+
+                   *Compatibility:* > OpenCV 2.4.2
+
+                   *Author:* |Author_VsevolodG|
+
+                   Development with OpenCV4Android SDK
+
+  ================ =================================================
+
+     .. |AndroidLogo| image:: images/android_logo.png
+                        :height: 90pt
+                        :width:  90pt
+
+
 * **iOS**
 
-.. tabularcolumns:: m{100pt} m{300pt}
-.. cssclass:: toctableopencv
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
 
-=========== ======================================================
-|Install_2| **Title:** :ref:`iOS-Installation`
+  =========== ======================================================
+  |Install_2| **Title:** :ref:`iOS-Installation`
 
-            *Compatibility:* > OpenCV 2.3.1
+              *Compatibility:* > OpenCV 2.3.1
 
-            *Author:* |Author_ArtemM|
+              *Author:* |Author_ArtemM|
 
-            We will learn how to setup OpenCV for using it in iOS!
+              We will learn how to setup OpenCV for using it in iOS!
 
-=========== ======================================================
+  =========== ======================================================
 
-.. |Install_2| image:: images/ios4_logo.jpg
-               :width:  90pt
+     .. |Install_2| image:: images/ios4_logo.jpg
+                 :width:  90pt
 
-.. tabularcolumns:: m{100pt} m{300pt}
-.. cssclass:: toctableopencv
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
 
   ============= ======================================================
   |Beginners_1| **Title:** :ref:`Display_Image`
@@ -172,36 +218,39 @@ Here you can read tutorials about how to set up your computer to work with the O
 
   ============= ======================================================
 
-  .. |Beginners_1| image:: images/Display_Image_Tutorial_Result.jpg
-                   :height: 90pt
-                   :width:  90pt
+     .. |Beginners_1| image:: images/Display_Image_Tutorial_Result.jpg
+                     :height: 90pt
+                     :width:  90pt
 
   .. tabularcolumns:: m{100pt} m{300pt}
-  .. cssclass:: toctableopencv  
-  
+  .. cssclass:: toctableopencv
+
   =============== ======================================================
    |Beginners_2|  **Title:** :ref:`Load_Save_Image`
 
-                  *Compatibility:* > OpenCV 2.0 
+                  *Compatibility:* > OpenCV 2.0
 
-                  *Author:* |Author_AnaH| 
+                  *Author:* |Author_AnaH|
 
                   We will learn how to save an Image in OpenCV...plus a small conversion to grayscale
 
   =============== ======================================================
 
-  .. |Beginners_2| image:: images/Load_Save_Image_Result_1.jpg
-                   :height: 90pt
-                   :width:  90pt
+     .. |Beginners_2| image:: images/Load_Save_Image_Result_1.jpg
+                     :height: 90pt
+                     :width:  90pt
 
 * **Want to contribute, and see your own work between the OpenCV tutorials?**
 
+  .. tabularcolumns:: m{100pt} m{300pt}
+  .. cssclass:: toctableopencv
+
   =============== ======================================================
    |HowToWriteT|  **Title:** :ref:`howToWriteTutorial`
 
                   *Compatibility:* > OpenCV 1.0
 
-                  *Author:* |Author_BernatG| 
+                  *Author:* |Author_BernatG|
 
                   If you already have a good grasp on using OpenCV and have made some projects that would be perfect presenting an OpenCV feature not yet part of these tutorials, here it is what you need to know.
 
@@ -217,7 +266,7 @@ Here you can read tutorials about how to set up your computer to work with the O
 
 .. We use a custom table of content format and as the table of content only imforms Sphinx about the hierarchy of the files, no need to show it.
 .. toctree::
-   :hidden:                
+   :hidden:
 
    ../linux_install/linux_install
    ../linux_gcc_cmake/linux_gcc_cmake
@@ -226,6 +275,9 @@ Here you can read tutorials about how to set up your computer to work with the O
    ../windows_visual_studio_Opencv/windows_visual_studio_Opencv
    ../android_binary_package/android_binary_package
    ../android_binary_package/android_binary_package_using_with_NDK
+   ../android_binary_package/android_dev_intro
+   ../android_binary_package/O4A_SDK
+   ../android_binary_package/dev_with_OCV_on_Android
    ../ios_install/ios_install
    ../display_image/display_image
    ../load_save_image/load_save_image
diff --git a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
index 4fbf187444..cabb81c010 100644
--- a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
+++ b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.rst
@@ -14,7 +14,7 @@ In this tutorial you will learn how to:
 
      * :cascade_classifier_load:`load <>` to load a .xml classifier file. It can be either a Haar or a LBP classifer
      * :cascade_classifier_detect_multiscale:`detectMultiScale <>` to perform the detection.
-           
+
 
 Theory
 ======
@@ -22,9 +22,9 @@ Theory
 Code
 ====
 
-This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp>`_ . The second version (using LBP for face detection) can be `found here <http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp>`_ 
+This tutorial code's is shown lines below. You can also download it from `here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp>`_ . The second version (using LBP for face detection) can be `found here <http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp>`_
 
-.. code-block:: cpp 
+.. code-block:: cpp
 
    #include "opencv2/objdetect/objdetect.hpp"
    #include "opencv2/highgui/highgui.hpp"
@@ -56,7 +56,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      //-- 1. Load the cascades
      if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
      if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
- 
+
      //-- 2. Read the video stream
      capture = cvCaptureFromCAM( -1 );
      if( capture )
@@ -64,15 +64,15 @@ This tutorial code's is shown lines below. You can also download it from `here <
        while( true )
        {
      frame = cvQueryFrame( capture );
-  
+
      //-- 3. Apply the classifier to the frame
          if( !frame.empty() )
          { detectAndDisplay( frame ); }
          else
          { printf(" --(!) No captured frame -- Break!"); break; }
-      
+
          int c = waitKey(10);
-         if( (char)c == 'c' ) { break; } 
+         if( (char)c == 'c' ) { break; }
         }
      }
      return 0;
@@ -103,11 +103,11 @@ This tutorial code's is shown lines below. You can also download it from `here <
 
       for( int j = 0; j < eyes.size(); j++ )
        {
-         Point center( faces[i].x + eyes[j].x + eyes[j].width*0.5, faces[i].y + eyes[j].y + eyes[j].height*0.5 ); 
+         Point center( faces[i].x + eyes[j].x + eyes[j].width*0.5, faces[i].y + eyes[j].y + eyes[j].height*0.5 );
          int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
          circle( frame, center, radius, Scalar( 255, 0, 0 ), 4, 8, 0 );
        }
-    } 
+    }
     //-- Show what you got
     imshow( window_name, frame );
    }
@@ -124,11 +124,11 @@ Result
       :align: center
       :height: 300pt
 
-   Remember to copy the files *haarcascade_frontalface_alt.xml* and *haarcascade_eye_tree_eyeglasses.xml* in your current directory. They are located in *opencv/data/haarcascades* 
+   Remember to copy the files *haarcascade_frontalface_alt.xml* and *haarcascade_eye_tree_eyeglasses.xml* in your current directory. They are located in *opencv/data/haarcascades*
 
-#. This is the result of using the file *lbpcascade_frontalface.xml* (LBP trained) for the face detection. For the eyes we keep using the file used in the tutorial. 
+#. This is the result of using the file *lbpcascade_frontalface.xml* (LBP trained) for the face detection. For the eyes we keep using the file used in the tutorial.
 
    .. image:: images/Cascade_Classifier_Tutorial_Result_LBP.jpg
       :align: center
-      :height: 300pt   
+      :height: 300pt
 
diff --git a/doc/tutorials/tutorials.rst b/doc/tutorials/tutorials.rst
index 068c72814c..1238745a7b 100644
--- a/doc/tutorials/tutorials.rst
+++ b/doc/tutorials/tutorials.rst
@@ -2,7 +2,7 @@
 OpenCV Tutorials
 ################
 
-The following links describe a set of basic OpenCV  tutorials. All the source code mentioned here is provide as part of the OpenCV regular releases, so check before you start copy & pasting the code. The list of tutorials below is automatically generated from reST files located in our SVN repository.
+The following links describe a set of basic OpenCV  tutorials. All the source code mentioned here is provide as part of the OpenCV regular releases, so check before you start copy & pasting the code. The list of tutorials below is automatically generated from reST files located in our GIT repository.
 
 As always, we would be happy to hear your comments and receive your contributions on any tutorial.
 
@@ -10,12 +10,12 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-  
+
    =========== =======================================================
    |Introduct| You will learn how to setup OpenCV on your computer!
-  
+
    =========== =======================================================
-  
+
    .. |Introduct| image:: images/introduction.jpg
                  :height: 80pt
                  :width:  80pt
@@ -25,12 +25,12 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-  
+
    =========== =======================================================
    |Core|      Here you will learn the about the basic building blocks of the library. A must read and know for     understanding how to manipulate the images on a pixel level.
-  
+
    =========== =======================================================
-  
+
    .. |Core| image:: images/core.jpg
                  :height: 80pt
                  :width:  80pt
@@ -40,12 +40,12 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
    |ImgProc|   In this section you will learn about the image processing (manipulation) functions inside OpenCV.
-  
+
    =========== =======================================================
-  
+
    .. |ImgProc| image:: images/imgproc.jpg
                  :height: 80pt
                  :width:  80pt
@@ -55,12 +55,12 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
-   |HighGui|   This section contains valuable tutorials about how to read/save your image/video files and how to use the built-in graphical user interface of the library. 
-  
+   |HighGui|   This section contains valuable tutorials about how to read/save your image/video files and how to use the built-in graphical user interface of the library.
+
    =========== =======================================================
-  
+
    .. |HighGui| image:: images/highgui.jpg
                  :height: 80pt
                  :width:  80pt
@@ -70,12 +70,12 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
-   |Calib3D|   Although we got most of our images in a 2D format they do come from a 3D world. Here you will learn how to find out from the 2D images information about the 3D world. 
-  
+   |Calib3D|   Although we got most of our images in a 2D format they do come from a 3D world. Here you will learn how to find out from the 2D images information about the 3D world.
+
    =========== =======================================================
-  
+
    .. |Calib3D| image:: images/calib3d.jpg
                  :height: 80pt
                  :width:  80pt
@@ -85,27 +85,27 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
    |Featur2D|  Learn about how to use the feature points  detectors, descriptors and matching framework found inside OpenCV.
-  
+
    =========== =======================================================
-  
+
    .. |Featur2D| image:: images/feature2D.jpg
                  :height: 80pt
                  :width:  80pt
                  :alt: feature2D Icon
 
 *  :ref:`Table-Of-Content-Video`
- 
+
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
-   |Video|     Look here in order to find use on your video stream algoritms like: motion extraction, feature tracking and foreground extractions. 
-  
+   |Video|     Look here in order to find use on your video stream algoritms like: motion extraction, feature tracking and foreground extractions.
+
    =========== =======================================================
-  
+
    .. |Video| image:: images/video.jpg
                  :height: 80pt
                  :width:  80pt
@@ -115,27 +115,27 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-  
+
    =========== =======================================================
    |ObjDetect| Ever wondered how your digital camera detects peoples and faces? Look here to find out!
-  
+
    =========== =======================================================
-  
+
    .. |ObjDetect| image:: images/objdetect.jpg
                  :height: 80pt
                  :width:  80pt
                  :alt: objdetect Icon
 
 *  :ref:`Table-Of-Content-Ml`
-   
+
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
    |ml|        Use the powerfull machine learning classes for statistical classification, regression and clustering of data.
-  
+
    =========== =======================================================
-  
+
    .. |ml| image:: images/ml.jpg
                  :height: 80pt
                  :width:  80pt
@@ -145,12 +145,12 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
-   |GPU|       Squeeze out every little computation power from your system by using the power of your video card to run the OpenCV algorithms. 
-  
+   |GPU|       Squeeze out every little computation power from your system by using the power of your video card to run the OpenCV algorithms.
+
    =========== =======================================================
-  
+
    .. |GPU| image:: images/gpu.jpg
                  :height: 80pt
                  :width:  80pt
@@ -160,12 +160,12 @@ As always, we would be happy to hear your comments and receive your contribution
 
    .. tabularcolumns:: m{100pt} m{300pt}
    .. cssclass:: toctableopencv
-   
+
    =========== =======================================================
    |General|   These tutorials are the bottom of the iceberg as they link together multiple of the modules presented above in order to solve complex problems.
-  
+
    =========== =======================================================
-  
+
    .. |General| image:: images/general.jpg
                  :height: 80pt
                  :width:  80pt
diff --git a/doc/user_guide/ug_highgui.rst b/doc/user_guide/ug_highgui.rst
index d425067b2d..a71e579282 100644
--- a/doc/user_guide/ug_highgui.rst
+++ b/doc/user_guide/ug_highgui.rst
@@ -15,7 +15,7 @@ In order to use depth sensor with OpenCV you should do the following preliminary
     Install OpenNI library (from here http://www.openni.org/downloadfiles) and PrimeSensor Module for OpenNI (from here https://github.com/avin2/SensorKinect). The installation should be done to default folders listed in the instructions of these products, e.g.:
 
     .. code-block:: text
-    
+
         OpenNI:
             Linux & MacOSX:
                 Libs into: /usr/lib
@@ -30,7 +30,7 @@ In order to use depth sensor with OpenCV you should do the following preliminary
                 Bins into: c:/Program Files/Prime Sense/Sensor/Bin
 
     If one or both products were installed to the other folders, the user should change corresponding CMake variables ``OPENNI_LIB_DIR``, ``OPENNI_INCLUDE_DIR`` or/and ``OPENNI_PRIME_SENSOR_MODULE_BIN_DIR``.
-    
+
 #.
     Configure OpenCV with OpenNI support by setting ``WITH_OPENNI`` flag in CMake. If OpenNI is found in install folders OpenCV will be built with OpenNI library (see a status ``OpenNI`` in CMake log) whereas PrimeSensor Modules can not be found (see a status ``OpenNI PrimeSensor Modules`` in CMake log). Without PrimeSensor module OpenCV will be successfully compiled with OpenNI library, but ``VideoCapture`` object will not grab data from Kinect sensor.
 
@@ -56,9 +56,9 @@ In order to get depth map from depth sensor use ``VideoCapture::operator >>``, e
     VideoCapture capture( CV_CAP_OPENNI );
     for(;;)
     {
-        Mat depthMap;    
+        Mat depthMap;
         capture >> depthMap;
-    
+
         if( waitKey( 30 ) >= 0 )
             break;
     }
@@ -70,19 +70,19 @@ For getting several data maps use ``VideoCapture::grab`` and ``VideoCapture::ret
     {
         Mat depthMap;
         Mat rgbImage
-    
+
         capture.grab();
-    
+
         capture.retrieve( depthMap, OPENNI_DEPTH_MAP );
         capture.retrieve( bgrImage, OPENNI_BGR_IMAGE );
-    
+
         if( waitKey( 30 ) >= 0 )
             break;
     }
 
 For setting and getting some property of sensor` data generators use ``VideoCapture::set`` and ``VideoCapture::get`` methods respectively, e.g. ::
 
-    VideoCapture capture( CV_CAP_OPENNI );    
+    VideoCapture capture( CV_CAP_OPENNI );
     capture.set( CV_CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE, CV_CAP_OPENNI_VGA_30HZ );
     cout << "FPS    " << capture.get( CV_CAP_OPENNI_IMAGE_GENERATOR+CV_CAP_PROP_FPS ) << endl;
 
@@ -100,34 +100,34 @@ Some depth sensors (for example XtionPRO) do not have image generator. In order
 
 Flags specifing the needed generator type must be used in combination with particular generator property. The following properties of cameras available through OpenNI interfaces are supported:
 
-* 
+*
   For image generator:
-  
-  - ``CV_CAP_PROP_OPENNI_OUTPUT_MODE`` -- Three output modes are supported: ``CV_CAP_OPENNI_VGA_30HZ`` used by default (image generator returns images in VGA resolution with 30 FPS), ``CV_CAP_OPENNI_SXGA_15HZ`` (image generator returns images in SXGA resolution with 15 FPS) and ``CV_CAP_OPENNI_SXGA_30HZ`` (image generator returns images in SXGA resolution with 30 FPS, the mode is supported by XtionPRO Live); depth generator's maps are always in VGA resolution.
-  
 
-* 
+  - ``CV_CAP_PROP_OPENNI_OUTPUT_MODE`` -- Three output modes are supported: ``CV_CAP_OPENNI_VGA_30HZ`` used by default (image generator returns images in VGA resolution with 30 FPS), ``CV_CAP_OPENNI_SXGA_15HZ`` (image generator returns images in SXGA resolution with 15 FPS) and ``CV_CAP_OPENNI_SXGA_30HZ`` (image generator returns images in SXGA resolution with 30 FPS, the mode is supported by XtionPRO Live); depth generator's maps are always in VGA resolution.
+
+
+*
   For depth generator:
 
   - ``CV_CAP_PROP_OPENNI_REGISTRATION`` -- Flag that registers the remapping depth map to image map  by changing depth generator's view point (if the flag is ``"on"``) or sets this view point to its normal one (if the flag is ``"off"``). The registration process’s resulting images are pixel-aligned,which means that every pixel in the image is aligned to a pixel in the depth image.
-  
+
     Next properties are available for getting only:
-  
+
   - ``CV_CAP_PROP_OPENNI_FRAME_MAX_DEPTH`` -- A maximum supported depth of Kinect in mm.
-  - ``CV_CAP_PROP_OPENNI_BASELINE`` -- Baseline value in mm. 
-  - ``CV_CAP_PROP_OPENNI_FOCAL_LENGTH`` -- A focal length in pixels. 
+  - ``CV_CAP_PROP_OPENNI_BASELINE`` -- Baseline value in mm.
+  - ``CV_CAP_PROP_OPENNI_FOCAL_LENGTH`` -- A focal length in pixels.
   - ``CV_CAP_PROP_FRAME_WIDTH`` -- Frame width in pixels.
   - ``CV_CAP_PROP_FRAME_HEIGHT`` -- Frame height in pixels.
   - ``CV_CAP_PROP_FPS`` -- Frame rate in FPS.
 
 *
   Some typical flags combinations "generator type + property" are defined as single flags:
-  
+
     - ``CV_CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE = CV_CAP_OPENNI_IMAGE_GENERATOR + CV_CAP_PROP_OPENNI_OUTPUT_MODE``
     - ``CV_CAP_OPENNI_DEPTH_GENERATOR_BASELINE = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_BASELINE``
     - ``CV_CAP_OPENNI_DEPTH_GENERATOR_FOCAL_LENGTH = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_FOCAL_LENGTH``
     - ``CV_CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_REGISTRATION``
-    
+
 For more information please refer to the example of usage openni_capture.cpp_ in ``opencv/samples/cpp`` folder.
 
-.. _openni_capture.cpp: http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/openni_capture.cpp
+.. _openni_capture.cpp: http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/openni_capture.cpp
diff --git a/ios/build_framework.py b/ios/build_framework.py
index e5c8b16833..21ec9db45d 100755
--- a/ios/build_framework.py
+++ b/ios/build_framework.py
@@ -5,12 +5,12 @@ The built framework is universal, it can be used to build app and run it on eith
 
 Usage:
     ./build_framework.py <outputdir>
-    
-By cmake conventions (and especially if you work with OpenCV SVN repository),
+
+By cmake conventions (and especially if you work with OpenCV repository),
 the output dir should not be a subdirectory of OpenCV source tree.
-    
+
 Script will create <outputdir>, if it's missing, and a few its subdirectories:
-    
+
     <outputdir>
         build/
             iPhoneOS/
@@ -29,7 +29,7 @@ import glob, re, os, os.path, shutil, string, sys
 
 def build_opencv(srcroot, buildroot, target):
     "builds OpenCV for device or simulator"
-    
+
     builddir = os.path.join(buildroot, target)
     if not os.path.isdir(builddir):
         os.makedirs(builddir)
@@ -46,23 +46,23 @@ def build_opencv(srcroot, buildroot, target):
         os.system("cmake %s ." % (cmakeargs,))
     else:
         os.system("cmake %s %s" % (cmakeargs, srcroot))
-    
+
     for wlib in [builddir + "/modules/world/UninstalledProducts/libopencv_world.a",
                  builddir + "/lib/Release/libopencv_world.a"]:
         if os.path.isfile(wlib):
             os.remove(wlib)
-    
+
     os.system("xcodebuild -parallelizeTargets -jobs 8 -sdk %s -configuration Release -target ALL_BUILD" % target.lower())
     os.system("xcodebuild -sdk %s -configuration Release -target install install" % target.lower())
     os.chdir(currdir)
-    
+
 def put_framework_together(srcroot, dstroot):
     "constructs the framework directory after all the targets are built"
-    
+
     # find the list of targets (basically, ["iPhoneOS", "iPhoneSimulator"])
     targetlist = glob.glob(os.path.join(dstroot, "build", "*"))
     targetlist = [os.path.basename(t) for t in targetlist]
-    
+
     # set the current dir to the dst root
     currdir = os.getcwd()
     framework_dir = dstroot + "/opencv2.framework"
@@ -70,7 +70,7 @@ def put_framework_together(srcroot, dstroot):
         shutil.rmtree(framework_dir)
     os.makedirs(framework_dir)
     os.chdir(framework_dir)
-    
+
     # determine OpenCV version (without subminor part)
     tdir0 = "../build/" + targetlist[0]
     cfg = open(tdir0 + "/cvconfig.h", "rt")
@@ -79,18 +79,18 @@ def put_framework_together(srcroot, dstroot):
             opencv_version = l[l.find("\"")+1:l.rfind(".")]
             break
     cfg.close()
-    
+
     # form the directory tree
     dstdir = "Versions/A"
     os.makedirs(dstdir + "/Resources")
 
     # copy headers
     shutil.copytree(tdir0 + "/install/include/opencv2", dstdir + "/Headers")
-    
+
     # make universal static lib
     wlist = " ".join(["../build/" + t + "/lib/Release/libopencv_world.a" for t in targetlist])
     os.system("lipo -create " + wlist + " -o " + dstdir + "/opencv2")
-    
+
     # form Info.plist
     srcfile = open(srcroot + "/ios/Info.plist.in", "rt")
     dstfile = open(dstdir + "/Resources/Info.plist", "wt")
@@ -98,29 +98,29 @@ def put_framework_together(srcroot, dstroot):
         dstfile.write(l.replace("${VERSION}", opencv_version))
     srcfile.close()
     dstfile.close()
-    
+
     # copy cascades
     # TODO ...
-    
+
     # make symbolic links
     os.symlink(dstdir + "/Headers", "Headers")
     os.symlink(dstdir + "/Resources", "Resources")
     os.symlink(dstdir + "/opencv2", "opencv2")
     os.symlink("A", "Versions/Current")
-        
-        
+
+
 def build_framework(srcroot, dstroot):
     "main function to do all the work"
-    
+
     for target in ["iPhoneOS", "iPhoneSimulator"]:
         build_opencv(srcroot, os.path.join(dstroot, "build"), target)
-    
+
     put_framework_together(srcroot, dstroot)
-    
+
 
 if __name__ == "__main__":
     if len(sys.argv) != 2:
         print "Usage:\n\t./build_framework.py <outputdir>\n\n"
         sys.exit(0)
-    
+
     build_framework(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "..")), os.path.abspath(sys.argv[1]))
diff --git a/modules/contrib/src/stereovar.cpp b/modules/contrib/src/stereovar.cpp
index 88640d86b2..1b542bbf52 100755
--- a/modules/contrib/src/stereovar.cpp
+++ b/modules/contrib/src/stereovar.cpp
@@ -67,11 +67,12 @@ StereoVar::~StereoVar()
 
 static Mat diffX(Mat &src)
 {
-    register int x, y, cols = src.cols - 1;
+    int cols = src.cols - 1;
     Mat dst(src.size(), src.type());
-    for(y = 0; y < src.rows; y++){
+    for(int y = 0; y < src.rows; y++){
         const float* pSrc = src.ptr<float>(y);
         float* pDst = dst.ptr<float>(y);
+        int x = 0;
 #if CV_SSE2
         for (x = 0; x <= cols - 8; x += 8) {
             __m128 a0 = _mm_loadu_ps(pSrc + x);
diff --git a/modules/core/doc/basic_structures.rst b/modules/core/doc/basic_structures.rst
index a763c4aa11..a2d2b5431d 100644
--- a/modules/core/doc/basic_structures.rst
+++ b/modules/core/doc/basic_structures.rst
@@ -2446,6 +2446,6 @@ The above methods are usually enough for users. If you want to make your own alg
  * Make a class and specify ``Algorithm`` as its base class.
  * The algorithm parameters should be the class members. See ``Algorithm::get()`` for the list of possible types of the parameters.
  * Add public virtual method ``AlgorithmInfo* info() const;`` to your class.
- * Add constructor function, ``AlgorithmInfo`` instance and implement the ``info()`` method. The simplest way is to take  http://code.opencv.org/svn/opencv/trunk/opencv/modules/ml/src/ml_init.cpp as the reference and modify it according to the list of your parameters.
+ * Add constructor function, ``AlgorithmInfo`` instance and implement the ``info()`` method. The simplest way is to take  http://code.opencv.org/projects/opencv/repository/revisions/master/entry/modules/ml/src/ml_init.cpp as the reference and modify it according to the list of your parameters.
  * Add some public function (e.g. ``initModule_<mymodule>()``) that calls info() of your algorithm and put it into the same source file as ``info()`` implementation. This is to force C++ linker to include this object file into the target application. See ``Algorithm::create()`` for details.
 
diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp
index 1e8210e9b1..8cf7b7e277 100644
--- a/modules/core/include/opencv2/core/core.hpp
+++ b/modules/core/include/opencv2/core/core.hpp
@@ -2492,32 +2492,32 @@ CV_EXPORTS void randShuffle(InputOutputArray dst, double iterFactor=1., RNG* rng
 CV_EXPORTS_AS(randShuffle) void randShuffle_(InputOutputArray dst, double iterFactor=1.);
 
 //! draws the line segment (pt1, pt2) in the image
-CV_EXPORTS_W void line(Mat& img, Point pt1, Point pt2, const Scalar& color,
+CV_EXPORTS_W void line(CV_IN_OUT Mat& img, Point pt1, Point pt2, const Scalar& color,
                      int thickness=1, int lineType=8, int shift=0);
 
 //! draws the rectangle outline or a solid rectangle with the opposite corners pt1 and pt2 in the image
-CV_EXPORTS_W void rectangle(Mat& img, Point pt1, Point pt2,
+CV_EXPORTS_W void rectangle(CV_IN_OUT Mat& img, Point pt1, Point pt2,
                           const Scalar& color, int thickness=1,
                           int lineType=8, int shift=0);
 
 //! draws the rectangle outline or a solid rectangle covering rec in the image
-CV_EXPORTS void rectangle(Mat& img, Rect rec,
+CV_EXPORTS void rectangle(CV_IN_OUT Mat& img, Rect rec,
                           const Scalar& color, int thickness=1,
                           int lineType=8, int shift=0);
 
 //! draws the circle outline or a solid circle in the image
-CV_EXPORTS_W void circle(Mat& img, Point center, int radius,
+CV_EXPORTS_W void circle(CV_IN_OUT Mat& img, Point center, int radius,
                        const Scalar& color, int thickness=1,
                        int lineType=8, int shift=0);
 
 //! draws an elliptic arc, ellipse sector or a rotated ellipse in the image
-CV_EXPORTS_W void ellipse(Mat& img, Point center, Size axes,
+CV_EXPORTS_W void ellipse(CV_IN_OUT Mat& img, Point center, Size axes,
                         double angle, double startAngle, double endAngle,
                         const Scalar& color, int thickness=1,
                         int lineType=8, int shift=0);
 
 //! draws a rotated ellipse in the image
-CV_EXPORTS_W void ellipse(Mat& img, const RotatedRect& box, const Scalar& color,
+CV_EXPORTS_W void ellipse(CV_IN_OUT Mat& img, const RotatedRect& box, const Scalar& color,
                         int thickness=1, int lineType=8);
 
 //! draws a filled convex polygon in the image
diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp
index 1c76df6413..13cbdd14c6 100644
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -1024,7 +1024,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
 							__m128 s0 = _mm_or_ps(t0, t1);
 							__m128 det =_mm_set1_ps((float)d);
 							s0 =  _mm_mul_ps(s0, det);
-							const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0};
+							static const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0};
 							__m128 pattern = _mm_load_ps((const float*)inv); 
 							s0 = _mm_xor_ps(s0, pattern);//==-1*s0
 							s0 = _mm_shuffle_ps(s0, s0, _MM_SHUFFLE(0,2,1,3));
@@ -1064,7 +1064,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
 							__m128d det = _mm_load1_pd((const double*)&d);
 							sm =  _mm_mul_pd(sm, det);
 				
-							uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80};
+							static const uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80};
 							__m128d pattern = _mm_load1_pd((double*)inv); 
 							ss = _mm_mul_pd(ss, det);
 							ss = _mm_xor_pd(ss, pattern);//==-1*ss
@@ -1097,24 +1097,66 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
                 double d = det3(Sf);
                 if( d != 0. )
                 {
+                    float CV_DECL_ALIGNED(16) t[12];
+                    
                     result = true;
                     d = 1./d;
-                    float t[9];
-                    t[0] = (float)(((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
-                    t[1] = (float)(((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
-                    t[2] = (float)(((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
-                   
-                    t[3] = (float)(((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
-                    t[4] = (float)(((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
-                    t[5] = (float)(((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
-                   
-                    t[6] = (float)(((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
-                    t[7] = (float)(((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
-                    t[8] = (float)(((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
+                #if CV_SSE2
+                    if(USE_SSE2)
+                    {
+                        __m128 det =_mm_set1_ps((float)d);
+                        __m128 s0 = _mm_loadu_ps((const float*)srcdata);//s0 = Sf(0,0) Sf(0,1) Sf(0,2) ***
+                        __m128 s1 = _mm_loadu_ps((const float*)(srcdata+srcstep));//s1 = Sf(1,0) Sf(1,1) Sf(1,2) ***
+                        __m128 s2 = _mm_set_ps(0.f, Sf(2,2), Sf(2,1), Sf(2,0)); //s2 = Sf(2,0) Sf(2,1) Sf(2,2) ***
 
-                    Df(0,0) = t[0]; Df(0,1) = t[1]; Df(0,2) = t[2];
-                    Df(1,0) = t[3]; Df(1,1) = t[4]; Df(1,2) = t[5];
-                    Df(2,0) = t[6]; Df(2,1) = t[7]; Df(2,2) = t[8];
+                        __m128 r0 =  _mm_shuffle_ps(s1,s1,_MM_SHUFFLE(3,0,2,1)); //r0 = Sf(1,1) Sf(1,2) Sf(1,0) ***
+                        __m128 r1 =  _mm_shuffle_ps(s2,s2,_MM_SHUFFLE(3,1,0,2)); //r1 = Sf(2,2) Sf(2,0) Sf(2,1) ***
+                        __m128 r2 =  _mm_shuffle_ps(s2,s2,_MM_SHUFFLE(3,0,2,1)); //r2 = Sf(2,1) Sf(2,2) Sf(2,0) ***
+                        
+                        __m128 t0 = _mm_mul_ps(s0, r0);//t0 = Sf(0,0)*Sf(1,1) Sf(0,1)*Sf(1,2) Sf(0,2)*Sf(1,0) ***
+                        __m128 t1 = _mm_mul_ps(s0, r1);//t1 = Sf(0,0)*Sf(2,2) Sf(0,1)*Sf(2,0) Sf(0,2)*Sf(2,1) ***
+                        __m128 t2 = _mm_mul_ps(s1, r2);//t2 = Sf(1,0)*Sf(2,1) Sf(1,1)*Sf(2,2) Sf(1,2)*Sf(2,0) ***
+                        
+                        __m128 r3 = _mm_shuffle_ps(s0,s0,_MM_SHUFFLE(3,0,2,1));//r3 = Sf(0,1) Sf(0,2) Sf(0,0) ***
+                        __m128 r4 = _mm_shuffle_ps(s0,s0,_MM_SHUFFLE(3,1,0,2));//r4 = Sf(0,2) Sf(0,0) Sf(0,1) ***
+                        
+                        __m128 t00 = _mm_mul_ps(s1, r3);//t00 = Sf(1,0)*Sf(0,1) Sf(1,1)*Sf(0,2) Sf(1,2)*Sf(0,0) ***
+                        __m128 t11 = _mm_mul_ps(s2, r4);//t11 = Sf(2,0)*Sf(0,2) Sf(2,1)*Sf(0,0) Sf(2,2)*Sf(0,1) ***
+                        __m128 t22 = _mm_mul_ps(s2, r0);//t22 = Sf(2,0)*Sf(1,1) Sf(2,1)*Sf(1,2) Sf(2,2)*Sf(1,0) ***
+                        
+                        t0 = _mm_mul_ps(_mm_sub_ps(t0,t00), det);//Sf(0,0)*Sf(1,1)   Sf(0,1)*Sf(1,2)   Sf(0,2)*Sf(1,0) ***
+                                                                //-Sf(1,0)*Sf(0,1)  -Sf(1,1)*Sf(0,2)  -Sf(1,2)*Sf(0,0)
+                        t1 = _mm_mul_ps(_mm_sub_ps(t1,t11), det);//Sf(0,0)*Sf(2,2)   Sf(0,1)*Sf(2,0)   Sf(0,2)*Sf(2,1) ***
+                                                                //-Sf(2,0)*Sf(0,2)  -Sf(2,1)*Sf(0,0)  -Sf(2,2)*Sf(0,1) 
+                        t2 = _mm_mul_ps(_mm_sub_ps(t2,t22), det);//Sf(1,0)*Sf(2,1)   Sf(1,1)*Sf(2,2)   Sf(1,2)*Sf(2,0) ***
+                                                                //-Sf(2,0)*Sf(1,1)  -Sf(2,1)*Sf(1,2)  -Sf(2,2)*Sf(1,0)
+                        _mm_store_ps(t, t0);
+                        _mm_store_ps(t+4, t1);
+                        _mm_store_ps(t+8, t2);
+                        
+                        Df(0,0) = t[9]; Df(0,1) = t[6]; Df(0,2) = t[1];
+                        Df(1,0) = t[10]; Df(1,1) = t[4]; Df(1,2) = t[2];
+                        Df(2,0) = t[8]; Df(2,1) = t[5]; Df(2,2) = t[0];
+                    }
+                    else
+                #endif
+                    {
+                        t[0] = (float)(((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
+                        t[1] = (float)(((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
+                        t[2] = (float)(((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
+                       
+                        t[3] = (float)(((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
+                        t[4] = (float)(((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
+                        t[5] = (float)(((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
+                       
+                        t[6] = (float)(((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
+                        t[7] = (float)(((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
+                        t[8] = (float)(((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
+
+                        Df(0,0) = t[0]; Df(0,1) = t[1]; Df(0,2) = t[2];
+                        Df(1,0) = t[3]; Df(1,1) = t[4]; Df(1,2) = t[5];
+                        Df(2,0) = t[6]; Df(2,1) = t[7]; Df(2,2) = t[8];
+                    }
                 }
             }
             else
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index b8a46611bc..42bf6593db 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -96,6 +96,10 @@
 #endif
 #endif
 
+#ifdef ANDROID
+# include <android/log.h>
+#endif
+
 namespace cv
 {
 
@@ -556,6 +560,9 @@ void error( const Exception& exc )
             exc.func.c_str() : "unknown function", exc.file.c_str(), exc.line );
         fprintf( stderr, "%s\n", buf );
         fflush( stderr );
+#  ifdef ANDROID
+        __android_log_print(ANDROID_LOG_ERROR, "cv::error()", "%s", buf);
+#  endif
     }
 
     if(breakOnError)
diff --git a/modules/features2d/doc/common_interfaces_of_feature_detectors.rst b/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
index 5ee4da08cf..9a80a1db7c 100644
--- a/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
+++ b/modules/features2d/doc/common_interfaces_of_feature_detectors.rst
@@ -148,7 +148,7 @@ Wrapping class for feature detection using the
     class FastFeatureDetector : public FeatureDetector
     {
     public:
-        FastFeatureDetector( int threshold=1, bool nonmaxSuppression=true );
+        FastFeatureDetector( int threshold=1, bool nonmaxSuppression=true, type=FastFeatureDetector::TYPE_9_16 );
         virtual void read( const FileNode& fn );
         virtual void write( FileStorage& fs ) const;
     protected:
diff --git a/modules/features2d/doc/feature_detection_and_description.rst b/modules/features2d/doc/feature_detection_and_description.rst
index 062228ee29..8c284e63bd 100644
--- a/modules/features2d/doc/feature_detection_and_description.rst
+++ b/modules/features2d/doc/feature_detection_and_description.rst
@@ -7,7 +7,7 @@ FAST
 --------
 Detects corners using the FAST algorithm
 
-.. ocv:function:: void FAST( InputArray image, vector<KeyPoint>& keypoints, int threshold, bool nonmaxSupression=true )
+.. ocv:function:: void FAST( InputArray image, vector<KeyPoint>& keypoints, int threshold, bool nonmaxSupression=true, type=FastFeatureDetector::TYPE_9_16 )
 
     :param image: Image where keypoints (corners) are detected.
 
@@ -17,6 +17,8 @@ Detects corners using the FAST algorithm
 
     :param nonmaxSupression: If it is true, non-maximum suppression is applied to detected corners (keypoints).
 
+    :param type: one of the three neighborhoods as defined in the paper: ``FastFeatureDetector::TYPE_9_16``, ``FastFeatureDetector::TYPE_7_12``, ``FastFeatureDetector::TYPE_5_8``
+
 Detects corners using the FAST algorithm by [Rosten06]_.
 
 .. [Rosten06] E. Rosten. Machine Learning for High-speed Corner Detection, 2006.
diff --git a/modules/features2d/src/fast.cpp b/modules/features2d/src/fast.cpp
index f496de3d51..fe496762ed 100644
--- a/modules/features2d/src/fast.cpp
+++ b/modules/features2d/src/fast.cpp
@@ -9,16 +9,16 @@ Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 
-	*Redistributions of source code must retain the above copyright
-	 notice, this list of conditions and the following disclaimer.
+    *Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
 
-	*Redistributions in binary form must reproduce the above copyright
-	 notice, this list of conditions and the following disclaimer in the
-	 documentation and/or other materials provided with the distribution.
+    *Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
 
-	*Neither the name of the University of Cambridge nor the names of
-	 its contributors may be used to endorse or promote products derived
-	 from this software without specific prior written permission.
+    *Neither the name of the University of Cambridge nor the names of
+     its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -350,7 +350,7 @@ int cornerScore<8>(const uchar* ptr, const int pixel[], int threshold)
     }
 
     int b0 = -a0;
-    for( k = 0; k < 12; k += 2 )
+    for( k = 0; k < 8; k += 2 )
     {
         int b = std::max((int)d[k+1], (int)d[k+2]);
         b = std::max(b, (int)d[k+3]);
@@ -375,7 +375,10 @@ template<int patternSize>
 void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
 {
     Mat img = _img.getMat();
-    const int K = patternSize/2, N = patternSize + K + 1, quarterPatternSize = patternSize/4;
+    const int K = patternSize/2, N = patternSize + K + 1;
+#if CV_SSE2
+    const int quarterPatternSize = patternSize/4;
+#endif
     int i, j, k, pixel[25];
     makeOffsets(pixel, (int)img.step, patternSize);
     for(k = patternSize; k < 25; k++)
@@ -585,7 +588,7 @@ FastFeatureDetector::FastFeatureDetector( int _threshold, bool _nonmaxSuppressio
 FastFeatureDetector::FastFeatureDetector( int _threshold, bool _nonmaxSuppression, int _type )
 : threshold(_threshold), nonmaxSuppression(_nonmaxSuppression), type(_type)
 {}
-    
+
 void FastFeatureDetector::detectImpl( const Mat& image, vector<KeyPoint>& keypoints, const Mat& mask ) const
 {
     Mat grayImage = image;
diff --git a/modules/features2d/test/test_rotation_and_scale_invariance.cpp b/modules/features2d/test/test_rotation_and_scale_invariance.cpp
index 73d351c521..d98431ba57 100644
--- a/modules/features2d/test/test_rotation_and_scale_invariance.cpp
+++ b/modules/features2d/test/test_rotation_and_scale_invariance.cpp
@@ -596,7 +596,7 @@ TEST(Features2d_RotationInvariance_Detector_ORB, regression)
 {
     DetectorRotationInvarianceTest test(Algorithm::create<FeatureDetector>("Feature2D.ORB"),
                                         0.47f,
-                                        0.77f);
+                                        0.76f);
     test.safe_run();
 }
 
@@ -605,9 +605,9 @@ TEST(Features2d_RotationInvariance_Detector_ORB, regression)
  */
 TEST(Features2d_RotationInvariance_Descriptor_ORB, regression)
 {
-    DescriptorRotationInvarianceTest test(Algorithm::create<FeatureDetector>("Feature2D.ORB"), 
-										  Algorithm::create<DescriptorExtractor>("Feature2D.ORB"), 
-									      NORM_HAMMING, 
+    DescriptorRotationInvarianceTest test(Algorithm::create<FeatureDetector>("Feature2D.ORB"),
+										  Algorithm::create<DescriptorExtractor>("Feature2D.ORB"),
+									      NORM_HAMMING,
                                           0.99f);
     test.safe_run();
 }
diff --git a/modules/flann/include/opencv2/flann/lsh_index.h b/modules/flann/include/opencv2/flann/lsh_index.h
index 8ce019a006..fc4cebb63c 100644
--- a/modules/flann/include/opencv2/flann/lsh_index.h
+++ b/modules/flann/include/opencv2/flann/lsh_index.h
@@ -90,9 +90,11 @@ public:
              Distance d = Distance()) :
         dataset_(input_data), index_params_(params), distance_(d)
     {
-        table_number_ = get_param<unsigned>(index_params_,"table_number",12);
-        key_size_ = get_param<unsigned>(index_params_,"key_size",20);
-        multi_probe_level_ = get_param<unsigned>(index_params_,"multi_probe_level",2);
+        // cv::flann::IndexParams sets integer params as 'int', so it is used with get_param 
+        // in place of 'unsigned int'
+        table_number_ = (unsigned int)get_param<int>(index_params_,"table_number",12);
+        key_size_ = (unsigned int)get_param<int>(index_params_,"key_size",20);
+        multi_probe_level_ = (unsigned int)get_param<int>(index_params_,"multi_probe_level",2);
 
         feature_size_ = (unsigned)dataset_.cols;
         fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_);
diff --git a/modules/gpu/doc/introduction.rst b/modules/gpu/doc/introduction.rst
index ec562b81c1..ef34c369b9 100644
--- a/modules/gpu/doc/introduction.rst
+++ b/modules/gpu/doc/introduction.rst
@@ -42,7 +42,7 @@ You can always determine at runtime whether the OpenCV GPU-built binaries (or PT
 Utilizing Multiple GPUs
 -----------------------
 
-In the current version, each of the OpenCV GPU algorithms can use only a single GPU. So, to utilize multiple GPUs, you have to manually distribute the work between GPUs. 
+In the current version, each of the OpenCV GPU algorithms can use only a single GPU. So, to utilize multiple GPUs, you have to manually distribute the work between GPUs.
 Switching active devie can be done using :ocv:func:`gpu::setDevice()` function.  For more details please read Cuda C Programing Guide.
 
 While developing algorithms for multiple GPUs, note a data passing overhead. For primitive functions and small images, it can be significant, which may eliminate all the advantages of having multiple GPUs. But for high-level algorithms, consider using multi-GPU acceleration. For example, the Stereo Block Matching algorithm has been successfully parallelized using the following algorithm:
@@ -59,5 +59,5 @@ While developing algorithms for multiple GPUs, note a data passing overhead. For
 With this algorithm, a dual GPU gave a 180
 %
 performance increase comparing to the single Fermi GPU. For a source code example, see
-http://code.opencv.org/svn/opencv/trunk/opencv/samples/gpu/.
+http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/gpu/.
 
diff --git a/modules/gpu/doc/video.rst b/modules/gpu/doc/video.rst
index 9a9b2cfffa..378cca71ab 100644
--- a/modules/gpu/doc/video.rst
+++ b/modules/gpu/doc/video.rst
@@ -324,9 +324,9 @@ Class used for background/foreground segmentation. ::
         std::vector< std::vector<cv::Point> > foreground_regions;
     };
 
-The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [FGD2003]_.
+  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [FGD2003]_.
 
-The results are available through the class fields:
+  The results are available through the class fields:
 
     .. ocv:member:: cv::gpu::GpuMat background
 
@@ -489,9 +489,9 @@ Gaussian Mixture-based Background/Foreground Segmentation Algorithm. ::
         ...
     };
 
-The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_.
+  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_.
 
-Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
+  Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
 
     .. ocv:member:: float backgroundRatio
 
@@ -649,6 +649,114 @@ Releases all inner buffer's memory.
 
 
 
+gpu::GMG_GPU
+------------
+.. ocv:class:: gpu::GMG_GPU
+
+Class used for background/foreground segmentation. ::
+
+    class GMG_GPU_GPU
+    {
+    public:
+        GMG_GPU();
+
+        void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
+
+        void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+
+        void release();
+
+        int    maxFeatures;
+        float  learningRate;
+        int    numInitializationFrames;
+        int    quantizationLevels;
+        float  backgroundPrior;
+        float  decisionThreshold;
+        int    smoothingRadius;
+
+        ...
+    };
+
+The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
+
+Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
+
+    .. ocv:member:: int maxFeatures
+
+        Total number of distinct colors to maintain in histogram.
+
+    .. ocv:member:: float learningRate
+
+        Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
+
+    .. ocv:member:: int numInitializationFrames
+
+        Number of frames of video to use to initialize histograms.
+
+    .. ocv:member:: int quantizationLevels
+
+        Number of discrete levels in each channel to be used in histograms.
+
+    .. ocv:member:: float backgroundPrior
+
+        Prior probability that any given pixel is a background pixel. A sensitivity parameter.
+
+    .. ocv:member:: float decisionThreshold
+
+        Value above which pixel is determined to be FG.
+
+    .. ocv:member:: float smoothingRadius
+
+        Smoothing radius, in pixels, for cleaning up FG image.
+
+
+
+gpu::GMG_GPU::GMG_GPU
+---------------------
+The default constructor.
+
+.. ocv:function:: gpu::GMG_GPU::GMG_GPU()
+
+Default constructor sets all parameters to default values.
+
+
+
+gpu::GMG_GPU::initialize
+------------------------
+Initialize background model and allocates all inner buffers.
+
+.. ocv:function:: void gpu::GMG_GPU::initialize(Size frameSize, float min = 0.0f, float max = 255.0f)
+
+    :param frameSize: Input frame size.
+
+    :param min: Minimum value taken on by pixels in image sequence. Usually 0.
+
+    :param max: Maximum value taken on by pixels in image sequence, e.g. 1.0 or 255.
+
+
+
+gpu::GMG_GPU::operator()
+------------------------
+Updates the background model and returns the foreground mask
+
+.. ocv:function:: void gpu::GMG_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null())
+
+    :param frame: Next video frame.
+
+    :param fgmask: The output foreground mask as an 8-bit binary image.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::GMG_GPU::release
+---------------------
+Releases all inner buffer's memory.
+
+.. ocv:function:: void gpu::GMG_GPU::release()
+
+
+
 gpu::VideoWriter_GPU
 ---------------------
 Video writer class.
@@ -1093,3 +1201,4 @@ Parse next video frame. Implementation must call this method after new frame was
 .. [MOG2004] Z. Zivkovic. *Improved adaptive Gausian mixture model for background subtraction*. International Conference Pattern Recognition, UK, August, 2004
 .. [ShadowDetect2003] Prati, Mikic, Trivedi and Cucchiarra. *Detecting Moving Shadows...*. IEEE PAMI, 2003
 .. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
+.. [GMG2012] A. Godbehere, A. Matsukawa and K. Goldberg. *Visual Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive Audio Art Installation*. American Control Conference, Montreal, June 2012
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index f6d869435c..ca9ad89889 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -917,6 +917,12 @@ CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTra
                          GpuMat& labels,
                          GpuMat& buf, Stream& stream = Stream::Null());
 
+//! compute mask for Generalized Flood fill componetns labeling.
+CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
+
+//! performs connected componnents labeling.
+CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
+
 ////////////////////////////////// Histograms //////////////////////////////////
 
 //! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
@@ -2121,6 +2127,77 @@ private:
     GpuMat samples_;
 };
 
+/**
+ * Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1)
+ * images of the same size, where 255 indicates Foreground and 0 represents Background.
+ * This class implements an algorithm described in "Visual Tracking of Human Visitors under
+ * Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere,
+ * A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012.
+ */
+class CV_EXPORTS GMG_GPU
+{
+public:
+    GMG_GPU();
+
+    /**
+     * Validate parameters and set up data structures for appropriate frame size.
+     * @param frameSize Input frame size
+     * @param min       Minimum value taken on by pixels in image sequence. Usually 0
+     * @param max       Maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
+     */
+    void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
+
+    /**
+     * Performs single-frame background subtraction and builds up a statistical background image
+     * model.
+     * @param frame        Input frame
+     * @param fgmask       Output mask image representing foreground and background pixels
+     * @param stream       Stream for the asynchronous version
+     */
+    void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+
+    //! Releases all inner buffers
+    void release();
+
+    //! Total number of distinct colors to maintain in histogram.
+    int maxFeatures;
+
+    //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
+    float learningRate;
+
+    //! Number of frames of video to use to initialize histograms.
+    int numInitializationFrames;
+
+    //! Number of discrete levels in each channel to be used in histograms.
+    int quantizationLevels;
+
+    //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
+    float backgroundPrior;
+
+    //! Value above which pixel is determined to be FG.
+    float decisionThreshold;
+
+    //! Smoothing radius, in pixels, for cleaning up FG image.
+    int smoothingRadius;
+
+    //! Perform background model update.
+    bool updateBackgroundModel;
+
+private:
+    float maxVal_, minVal_;
+
+    Size frameSize_;
+
+    int frameNum_;
+
+    GpuMat nfeatures_;
+    GpuMat colors_;
+    GpuMat weights_;
+
+    Ptr<FilterEngine_GPU> boxFilter_;
+    GpuMat buf_;
+};
+
 ////////////////////////////////// Video Encoding //////////////////////////////////
 
 // Works only under Windows
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index ab7fb4240d..b5c986d220 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -1148,6 +1148,9 @@ GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, CvtColorInfo)
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    if (info.code >= cv::COLOR_BayerBG2BGR && info.code <= cv::COLOR_BayerGR2BGR)
+        info.dcn = 4;
+
     cv::gpu::cvtColor(src, dst, info.code, info.dcn);
 
     TEST_CYCLE()
@@ -1172,7 +1175,20 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
                     CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
                     CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
                     CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
-                    CvtColorInfo(3, 3, cv::COLOR_HLS2BGR))));
+                    CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
+                    CvtColorInfo(3, 3, cv::COLOR_RGB2Lab),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
+                    CvtColorInfo(3, 3, cv::COLOR_RGB2Luv),
+                    CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_Lab2RGB),
+                    CvtColorInfo(3, 3, cv::COLOR_Luv2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
+                    CvtColorInfo(4, 4, cv::COLOR_RGBA2mRGBA))));
 
 //////////////////////////////////////////////////////////////////////
 // SwapChannels
diff --git a/modules/gpu/perf/perf_labeling.cpp b/modules/gpu/perf/perf_labeling.cpp
new file mode 100644
index 0000000000..5417133095
--- /dev/null
+++ b/modules/gpu/perf/perf_labeling.cpp
@@ -0,0 +1,75 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+//                          License Agreement
+//               For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+//  * The name of the copyright holders may not be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//M*/
+
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+
+    cv::threshold(image, image, 150, 255, CV_THRESH_BINARY);
+
+    cv::gpu::GpuMat mask;
+    mask.create(image.rows, image.cols, CV_8UC1);
+
+    cv::gpu::GpuMat components;
+    components.create(image.rows, image.cols, CV_32SC1);
+
+    cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
+
+    ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
+
+    declare.time(1.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::labelComponents(mask, components);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Labeling, ConnectedComponents, testing::Combine(ALL_DEVICES, testing::Values(cv::Size(261, 262))));
+
+#endif
\ No newline at end of file
diff --git a/modules/gpu/perf/perf_utility.cpp b/modules/gpu/perf/perf_utility.cpp
index c54d2ace66..bf6bdfe739 100644
--- a/modules/gpu/perf/perf_utility.cpp
+++ b/modules/gpu/perf/perf_utility.cpp
@@ -65,19 +65,19 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         "BGR2HSV",
         "RGB2HSV",
 
-        0,
-        0,
+        "",
+        "",
 
-        0,
-        0,
+        "BGR2Lab",
+        "RGB2Lab",
 
-        0,
-        0,
-        0,
-        0,
+        "BayerBG2BGR",
+        "BayerGB2BGR",
+        "BayerRG2BGR",
+        "BayerGR2BGR",
 
-        0,
-        0,
+        "BGR2Luv",
+        "RGB2Luv",
 
         "BGR2HLS",
         "RGB2HLS",
@@ -85,18 +85,18 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         "HSV2BGR",
         "HSV2RGB",
 
-        0,
-        0,
-        0,
-        0,
+        "Lab2BGR",
+        "Lab2RGB",
+        "Luv2BGR",
+        "Luv2RGB",
 
         "HLS2BGR",
         "HLS2RGB",
 
-        0,
-        0,
-        0,
-        0,
+        "BayerBG2BGR_VNG",
+        "BayerGB2BGR_VNG",
+        "BayerRG2BGR_VNG",
+        "BayerGR2BGR_VNG",
 
         "BGR2HSV_FULL",
         "RGB2HSV_FULL",
@@ -108,30 +108,78 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         "HLS2BGR_FULL",
         "HLS2RGB_FULL",
 
-        0,
-        0,
-        0,
-        0,
+        "LBGR2Lab",
+        "LRGB2Lab",
+        "LBGR2Luv",
+        "LRGB2Luv",
 
-        0,
-        0,
-        0,
-        0,
+        "Lab2LBGR",
+        "Lab2LRGB",
+        "Luv2LBGR",
+        "Luv2LRGB",
 
         "BGR2YUV",
         "RGB2YUV",
         "YUV2BGR",
         "YUV2RGB",
 
-        0,
-        0,
-        0,
-        0,
+        "BayerBG2GRAY",
+        "BayerGB2GRAY",
+        "BayerRG2GRAY",
+        "BayerGR2GRAY",
 
-        0,
-        0,
-        0,
-        0
+        //YUV 4:2:0 formats family
+        "YUV2RGB_NV12",
+        "YUV2BGR_NV12",
+        "YUV2RGB_NV21",
+        "YUV2BGR_NV21",
+
+        "YUV2RGBA_NV12",
+        "YUV2BGRA_NV12",
+        "YUV2RGBA_NV21",
+        "YUV2BGRA_NV21",
+
+        "YUV2RGB_YV12",
+        "YUV2BGR_YV12",
+        "YUV2RGB_IYUV",
+        "YUV2BGR_IYUV",
+
+        "YUV2RGBA_YV12",
+        "YUV2BGRA_YV12",
+        "YUV2RGBA_IYUV",
+        "YUV2BGRA_IYUV",
+
+        "YUV2GRAY_420",
+
+        //YUV 4:2:2 formats family
+        "YUV2RGB_UYVY",
+        "YUV2BGR_UYVY",
+        "YUV2RGB_VYUY",
+        "YUV2BGR_VYUY",
+
+        "YUV2RGBA_UYVY",
+        "YUV2BGRA_UYVY",
+        "YUV2RGBA_VYUY",
+        "YUV2BGRA_VYUY",
+
+        "YUV2RGB_YUY2",
+        "YUV2BGR_YUY2",
+        "YUV2RGB_YVYU",
+        "YUV2BGR_YVYU",
+
+        "YUV2RGBA_YUY2",
+        "YUV2BGRA_YUY2",
+        "YUV2RGBA_YVYU",
+        "YUV2BGRA_YVYU",
+
+        "YUV2GRAY_UYVY",
+        "YUV2GRAY_YUY2",
+
+        // alpha premultiplication
+        "RGBA2mRGBA",
+        "mRGBA2RGBA",
+
+        "COLORCVT_MAX"
     };
 
     *os << str[info.code];
diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp
index 4ae18bd09c..6e577a4a40 100644
--- a/modules/gpu/perf/perf_video.cpp
+++ b/modules/gpu/perf/perf_video.cpp
@@ -578,6 +578,77 @@ INSTANTIATE_TEST_CASE_P(Video, VIBE, testing::Combine(
     testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
     testing::Values(Channels(1), Channels(3), Channels(4))));
 
+//////////////////////////////////////////////////////
+// GMG
+
+IMPLEMENT_PARAM_CLASS(MaxFeatures, int)
+
+GPU_PERF_TEST(GMG, cv::gpu::DeviceInfo, std::string, Channels, MaxFeatures)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+    int maxFeatures = GET_PARAM(3);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    cv::gpu::GpuMat d_frame(frame);
+    cv::gpu::GpuMat d_fgmask;
+
+    cv::gpu::GMG_GPU gmg;
+    gmg.maxFeatures = maxFeatures;
+
+    gmg(d_frame, d_fgmask);
+
+    for (int i = 0; i < 150; ++i)
+    {
+        cap >> frame;
+        if (frame.empty())
+        {
+            cap.open(inputFile);
+            cap >> frame;
+        }
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        d_frame.upload(frame);
+
+        startTimer(); next();
+        gmg(d_frame, d_fgmask);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, GMG, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(MaxFeatures(20), MaxFeatures(40), MaxFeatures(60))));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/modules/gpu/perf_cpu/perf_imgproc.cpp b/modules/gpu/perf_cpu/perf_imgproc.cpp
index 9a1adde810..b6686b7eda 100644
--- a/modules/gpu/perf_cpu/perf_imgproc.cpp
+++ b/modules/gpu/perf_cpu/perf_imgproc.cpp
@@ -712,6 +712,19 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
                     CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
                     CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
                     CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
-                    CvtColorInfo(3, 3, cv::COLOR_HLS2BGR))));
+                    CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
+                    CvtColorInfo(3, 3, cv::COLOR_RGB2Lab),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
+                    CvtColorInfo(3, 3, cv::COLOR_RGB2Luv),
+                    CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_Lab2RGB),
+                    CvtColorInfo(3, 3, cv::COLOR_Luv2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
+                    CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
+                    CvtColorInfo(4, 4, cv::COLOR_RGBA2mRGBA))));
 
 #endif
diff --git a/modules/gpu/perf_cpu/perf_labeling.cpp b/modules/gpu/perf_cpu/perf_labeling.cpp
new file mode 100644
index 0000000000..47d62839eb
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_labeling.cpp
@@ -0,0 +1,157 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+//                          License Agreement
+//               For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+//  * The name of the copyright holders may not be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//M*/
+
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+namespace {
+
+    struct GreedyLabeling
+    {
+        struct dot
+        {
+            int x;
+            int y;
+
+            static dot make(int i, int j)
+            {
+                dot d; d.x = i; d.y = j;
+                return d;
+            }
+        };
+
+        struct InInterval
+        {
+            InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {};
+            const int lo, hi;
+
+            bool operator() (const unsigned char a, const unsigned char b) const
+            {
+                int d = a - b;
+                return lo <= d && d <= hi;
+            }
+        };
+
+        GreedyLabeling(cv::Mat img)
+        : image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {stack = new dot[image.cols * image.rows];}
+
+        ~GreedyLabeling(){delete[] stack;}
+
+        void operator() (cv::Mat labels) const
+        {
+            InInterval inInt(0, 2);
+            int cc = -1;
+
+            int* dist_labels = (int*)labels.data;
+            int pitch = labels.step1();
+
+            unsigned char* source = (unsigned char*)image.data;
+            int width = image.cols;
+            int height = image.rows;
+
+            for (int j = 0; j < image.rows; ++j)
+                for (int i = 0; i < image.cols; ++i)
+                {
+                    if (dist_labels[j * pitch + i] != -1) continue;
+
+                    dot* top = stack;
+                    dot p = dot::make(i, j);
+                    cc++;
+
+                    dist_labels[j * pitch + i] = cc;
+
+                    while (top >= stack)
+                    {
+                        int*  dl = &dist_labels[p.y * pitch + p.x];
+                        unsigned char* sp = &source[p.y * image.step1() + p.x];
+
+                        dl[0] = cc;
+
+                        //right
+                        if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1]))
+                            *top++ = dot::make(p.x + 1, p.y);
+
+                        //left
+                        if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1]))
+                            *top++ = dot::make(p.x - 1, p.y);
+
+                        //bottom
+                        if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()]))
+                            *top++ = dot::make(p.x, p.y + 1);
+
+                        //top
+                        if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-image.step1()]))
+                            *top++ = dot::make(p.x, p.y - 1);
+
+                        p = *--top;
+                    }
+                }
+        }
+
+        cv::Mat image;
+        cv::Mat _labels;
+        dot* stack;
+    };
+}
+
+GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+
+    GreedyLabeling host(image);
+
+    host(host._labels);
+
+    declare.time(1.0);
+
+    TEST_CYCLE()
+    {
+        host(host._labels);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Labeling, ConnectedComponents, testing::Combine(ALL_DEVICES, testing::Values(cv::Size(261, 262))));
+
+#endif
\ No newline at end of file
diff --git a/modules/gpu/perf_cpu/perf_utility.cpp b/modules/gpu/perf_cpu/perf_utility.cpp
index 88d5111f57..541e6fdc7b 100644
--- a/modules/gpu/perf_cpu/perf_utility.cpp
+++ b/modules/gpu/perf_cpu/perf_utility.cpp
@@ -65,19 +65,19 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         "BGR2HSV",
         "RGB2HSV",
 
-        0,
-        0,
+        "",
+        "",
 
-        0,
-        0,
+        "BGR2Lab",
+        "RGB2Lab",
 
-        0,
-        0,
-        0,
-        0,
+        "BayerBG2BGR",
+        "BayerGB2BGR",
+        "BayerRG2BGR",
+        "BayerGR2BGR",
 
-        0,
-        0,
+        "BGR2Luv",
+        "RGB2Luv",
 
         "BGR2HLS",
         "RGB2HLS",
@@ -85,18 +85,18 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         "HSV2BGR",
         "HSV2RGB",
 
-        0,
-        0,
-        0,
-        0,
+        "Lab2BGR",
+        "Lab2RGB",
+        "Luv2BGR",
+        "Luv2RGB",
 
         "HLS2BGR",
         "HLS2RGB",
 
-        0,
-        0,
-        0,
-        0,
+        "BayerBG2BGR_VNG",
+        "BayerGB2BGR_VNG",
+        "BayerRG2BGR_VNG",
+        "BayerGR2BGR_VNG",
 
         "BGR2HSV_FULL",
         "RGB2HSV_FULL",
@@ -108,30 +108,78 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         "HLS2BGR_FULL",
         "HLS2RGB_FULL",
 
-        0,
-        0,
-        0,
-        0,
+        "LBGR2Lab",
+        "LRGB2Lab",
+        "LBGR2Luv",
+        "LRGB2Luv",
 
-        0,
-        0,
-        0,
-        0,
+        "Lab2LBGR",
+        "Lab2LRGB",
+        "Luv2LBGR",
+        "Luv2LRGB",
 
         "BGR2YUV",
         "RGB2YUV",
         "YUV2BGR",
         "YUV2RGB",
 
-        0,
-        0,
-        0,
-        0,
+        "BayerBG2GRAY",
+        "BayerGB2GRAY",
+        "BayerRG2GRAY",
+        "BayerGR2GRAY",
 
-        0,
-        0,
-        0,
-        0
+        //YUV 4:2:0 formats family
+        "YUV2RGB_NV12",
+        "YUV2BGR_NV12",
+        "YUV2RGB_NV21",
+        "YUV2BGR_NV21",
+
+        "YUV2RGBA_NV12",
+        "YUV2BGRA_NV12",
+        "YUV2RGBA_NV21",
+        "YUV2BGRA_NV21",
+
+        "YUV2RGB_YV12",
+        "YUV2BGR_YV12",
+        "YUV2RGB_IYUV",
+        "YUV2BGR_IYUV",
+
+        "YUV2RGBA_YV12",
+        "YUV2BGRA_YV12",
+        "YUV2RGBA_IYUV",
+        "YUV2BGRA_IYUV",
+
+        "YUV2GRAY_420",
+
+        //YUV 4:2:2 formats family
+        "YUV2RGB_UYVY",
+        "YUV2BGR_UYVY",
+        "YUV2RGB_VYUY",
+        "YUV2BGR_VYUY",
+
+        "YUV2RGBA_UYVY",
+        "YUV2BGRA_UYVY",
+        "YUV2RGBA_VYUY",
+        "YUV2BGRA_VYUY",
+
+        "YUV2RGB_YUY2",
+        "YUV2BGR_YUY2",
+        "YUV2RGB_YVYU",
+        "YUV2BGR_YVYU",
+
+        "YUV2RGBA_YUY2",
+        "YUV2BGRA_YUY2",
+        "YUV2RGBA_YVYU",
+        "YUV2BGRA_YVYU",
+
+        "YUV2GRAY_UYVY",
+        "YUV2GRAY_YUY2",
+
+        // alpha premultiplication
+        "RGBA2mRGBA",
+        "mRGBA2RGBA",
+
+        "COLORCVT_MAX"
     };
 
     *os << str[info.code];
diff --git a/modules/gpu/perf_cpu/perf_video.cpp b/modules/gpu/perf_cpu/perf_video.cpp
index f635f42b0b..2c3aeb31ce 100644
--- a/modules/gpu/perf_cpu/perf_video.cpp
+++ b/modules/gpu/perf_cpu/perf_video.cpp
@@ -328,6 +328,74 @@ INSTANTIATE_TEST_CASE_P(Video, MOG2_getBackgroundImage, testing::Combine(
     testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
     testing::Values(/*Channels(1),*/ Channels(3)/*, Channels(4)*/)));
 
+//////////////////////////////////////////////////////
+// GMG
+
+IMPLEMENT_PARAM_CLASS(MaxFeatures, int)
+
+GPU_PERF_TEST(GMG, cv::gpu::DeviceInfo, std::string, Channels, MaxFeatures)
+{
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+    int maxFeatures = GET_PARAM(3);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    cv::Mat fgmask;
+    cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
+
+    cv::BackgroundSubtractorGMG gmg;
+    gmg.set("maxFeatures", maxFeatures);
+    gmg.initialize(frame.size(), 0.0, 255.0);
+
+    gmg(frame, fgmask);
+
+    for (int i = 0; i < 150; ++i)
+    {
+        cap >> frame;
+        if (frame.empty())
+        {
+            cap.open(inputFile);
+            cap >> frame;
+        }
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        startTimer(); next();
+        gmg(frame, fgmask);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, GMG, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(MaxFeatures(20), MaxFeatures(40), MaxFeatures(60))));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/modules/gpu/src/bgfg_gmg.cpp b/modules/gpu/src/bgfg_gmg.cpp
new file mode 100644
index 0000000000..6e0ed9e631
--- /dev/null
+++ b/modules/gpu/src/bgfg_gmg.cpp
@@ -0,0 +1,168 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifndef HAVE_CUDA
+
+cv::gpu::GMG_GPU::GMG_GPU() { throw_nogpu(); }
+void cv::gpu::GMG_GPU::initialize(cv::Size, float, float) { throw_nogpu(); }
+void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, float, cv::gpu::Stream&) { throw_nogpu(); }
+void cv::gpu::GMG_GPU::release() {}
+
+#else
+
+namespace cv { namespace gpu { namespace device {
+    namespace bgfg_gmg
+    {
+        void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
+                           float decisionThreshold, int maxFeatures, int numInitializationFrames);
+
+        template <typename SrcT>
+        void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
+                        int frameNum,  float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+    }
+}}}
+
+cv::gpu::GMG_GPU::GMG_GPU()
+{
+    maxFeatures = 64;
+    learningRate = 0.025f;
+    numInitializationFrames = 120;
+    quantizationLevels = 16;
+    backgroundPrior = 0.8f;
+    decisionThreshold = 0.8f;
+    smoothingRadius = 7;
+    updateBackgroundModel = true;
+}
+
+void cv::gpu::GMG_GPU::initialize(cv::Size frameSize, float min, float max)
+{
+    using namespace cv::gpu::device::bgfg_gmg;
+
+    CV_Assert(min < max);
+    CV_Assert(maxFeatures > 0);
+    CV_Assert(learningRate >= 0.0f && learningRate <= 1.0f);
+    CV_Assert(numInitializationFrames >= 1);
+    CV_Assert(quantizationLevels >= 1 && quantizationLevels <= 255);
+    CV_Assert(backgroundPrior >= 0.0f && backgroundPrior <= 1.0f);
+
+    minVal_ = min;
+    maxVal_ = max;
+
+    frameSize_ = frameSize;
+
+    frameNum_ = 0;
+
+    nfeatures_.create(frameSize_, CV_32SC1);
+    colors_.create(maxFeatures * frameSize_.height, frameSize_.width, CV_32SC1);
+    weights_.create(maxFeatures * frameSize_.height, frameSize_.width, CV_32FC1);
+
+    nfeatures_.setTo(cv::Scalar::all(0));
+
+    if (smoothingRadius > 0)
+        boxFilter_ = cv::gpu::createBoxFilter_GPU(CV_8UC1, CV_8UC1, cv::Size(smoothingRadius, smoothingRadius));
+
+    loadConstants(frameSize_.width, frameSize_.height, minVal_, maxVal_, quantizationLevels, backgroundPrior, decisionThreshold, maxFeatures, numInitializationFrames);
+}
+
+void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat& fgmask, float newLearningRate, cv::gpu::Stream& stream)
+{
+    using namespace cv::gpu::device::bgfg_gmg;
+
+    typedef void (*func_t)(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
+                           int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+    static const func_t funcs[6][4] =
+    {
+        {update_gpu<uchar>, 0, update_gpu<uchar3>, update_gpu<uchar4>},
+        {0,0,0,0},
+        {update_gpu<ushort>, 0, update_gpu<ushort3>, update_gpu<ushort4>},
+        {0,0,0,0},
+        {0,0,0,0},
+        {update_gpu<float>, 0, update_gpu<float3>, update_gpu<float4>}
+    };
+
+    CV_Assert(frame.depth() == CV_8U || frame.depth() == CV_16U || frame.depth() == CV_32F);
+    CV_Assert(frame.channels() == 1 || frame.channels() == 3 || frame.channels() == 4);
+
+    if (newLearningRate != -1.0f)
+    {
+        CV_Assert(newLearningRate >= 0.0f && newLearningRate <= 1.0f);
+        learningRate = newLearningRate;
+    }
+
+    if (frame.size() != frameSize_)
+        initialize(frame.size(), 0.0f, frame.depth() == CV_8U ? 255.0f : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0f);
+
+    fgmask.create(frameSize_, CV_8UC1);
+    if (stream)
+        stream.enqueueMemSet(fgmask, cv::Scalar::all(0));
+    else
+        fgmask.setTo(cv::Scalar::all(0));
+
+    funcs[frame.depth()][frame.channels() - 1](frame, fgmask, colors_, weights_, nfeatures_, frameNum_, learningRate, updateBackgroundModel, cv::gpu::StreamAccessor::getStream(stream));
+
+    // medianBlur
+    if (smoothingRadius > 0)
+    {
+        boxFilter_->apply(fgmask, buf_, cv::Rect(0,0,-1,-1), stream);
+        int minCount = (smoothingRadius * smoothingRadius + 1) / 2;
+        double thresh = 255.0 * minCount / (smoothingRadius * smoothingRadius);
+        cv::gpu::threshold(buf_, fgmask, thresh, 255.0, cv::THRESH_BINARY, stream);
+    }
+
+    // keep track of how many frames we have processed
+    ++frameNum_;
+}
+
+void cv::gpu::GMG_GPU::release()
+{
+    frameSize_ = Size();
+
+    nfeatures_.release();
+    colors_.release();
+    weights_.release();
+    boxFilter_.release();
+    buf_.release();
+}
+
+#endif
diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp
index 543227aeb3..cb2ae33d71 100644
--- a/modules/gpu/src/color.cpp
+++ b/modules/gpu/src/color.cpp
@@ -54,6 +54,17 @@ void cv::gpu::gammaCorrection(const GpuMat&, GpuMat&, bool, Stream&) { throw_nog
 #else /* !defined (HAVE_CUDA) */
 
 #include <cvt_colot_internal.h>
+
+namespace cv { namespace gpu {
+    namespace device
+    {
+        template <int cn>
+        void Bayer2BGR_8u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+        template <int cn>
+        void Bayer2BGR_16u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+    }
+}}
+
 using namespace ::cv::gpu::device;
 
 namespace
@@ -1144,13 +1155,13 @@ namespace
         funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream));
     }
 
-    void bgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void bgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& st)
     {
         #if (CUDA_VERSION < 5000)
             (void)src;
             (void)dst;
             (void)dcn;
-            (void)stream;
+            (void)st;
             CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
         #else
             CV_Assert(src.depth() == CV_8U);
@@ -1160,13 +1171,17 @@ namespace
 
             dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
 
-            NppStreamHandler h(StreamAccessor::getStream(stream));
+            cudaStream_t stream = StreamAccessor::getStream(st);
+            NppStreamHandler h(stream);
 
             NppiSize oSizeROI;
             oSizeROI.width = src.cols;
             oSizeROI.height = src.rows;
 
             nppSafeCall( nppiBGRToLab_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), oSizeROI) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
         #endif
     }
 
@@ -1176,13 +1191,13 @@ namespace
         bgr_to_lab(dst, dst, -1, stream);
     }
 
-    void lab_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void lab_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& st)
     {
         #if (CUDA_VERSION < 5000)
             (void)src;
             (void)dst;
             (void)dcn;
-            (void)stream;
+            (void)st;
             CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
         #else
             CV_Assert(src.depth() == CV_8U);
@@ -1192,13 +1207,17 @@ namespace
 
             dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
 
-            NppStreamHandler h(StreamAccessor::getStream(stream));
+            cudaStream_t stream = StreamAccessor::getStream(st);
+            NppStreamHandler h(stream);
 
             NppiSize oSizeROI;
             oSizeROI.width = src.cols;
             oSizeROI.height = src.rows;
 
             nppSafeCall( nppiLabToBGR_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), oSizeROI) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
         #endif
     }
 
@@ -1208,13 +1227,13 @@ namespace
         bgr_to_rgb(dst, dst, -1, stream);
     }
 
-    void rgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void rgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& st)
     {
         #if (CUDA_VERSION < 5000)
             (void)src;
             (void)dst;
             (void)dcn;
-            (void)stream;
+            (void)st;
             CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
         #else
             CV_Assert(src.depth() == CV_8U);
@@ -1224,7 +1243,8 @@ namespace
 
             dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
 
-            NppStreamHandler h(StreamAccessor::getStream(stream));
+            cudaStream_t stream = StreamAccessor::getStream(st);
+            NppStreamHandler h(stream);
 
             NppiSize oSizeROI;
             oSizeROI.width = src.cols;
@@ -1234,6 +1254,9 @@ namespace
                 nppSafeCall( nppiRGBToLUV_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), oSizeROI) );
             else
                 nppSafeCall( nppiRGBToLUV_8u_AC4R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), oSizeROI) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
         #endif
     }
 
@@ -1243,13 +1266,13 @@ namespace
         rgb_to_luv(dst, dst, -1, stream);
     }
 
-    void luv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    void luv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& st)
     {
         #if (CUDA_VERSION < 5000)
             (void)src;
             (void)dst;
             (void)dcn;
-            (void)stream;
+            (void)st;
             CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
         #else
             CV_Assert(src.depth() == CV_8U);
@@ -1259,7 +1282,8 @@ namespace
 
             dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
 
-            NppStreamHandler h(StreamAccessor::getStream(stream));
+            cudaStream_t stream = StreamAccessor::getStream(st);
+            NppStreamHandler h(stream);
 
             NppiSize oSizeROI;
             oSizeROI.width = src.cols;
@@ -1269,6 +1293,9 @@ namespace
                 nppSafeCall( nppiLUVToRGB_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), oSizeROI) );
             else
                 nppSafeCall( nppiLUVToRGB_8u_AC4R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), oSizeROI) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
         #endif
     }
 
@@ -1278,19 +1305,20 @@ namespace
         bgr_to_rgb(dst, dst, -1, stream);
     }
 
-    void rgba_to_mbgra(const GpuMat& src, GpuMat& dst, int, Stream& stream)
+    void rgba_to_mbgra(const GpuMat& src, GpuMat& dst, int, Stream& st)
     {
     #if (CUDA_VERSION < 5000)
         (void)src;
         (void)dst;
-        (void)stream;
+        (void)st;
         CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
     #else
         CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4);
 
         dst.create(src.size(), src.type());
 
-        NppStreamHandler h(StreamAccessor::getStream(stream));
+        cudaStream_t stream = StreamAccessor::getStream(st);
+        NppStreamHandler h(stream);
 
         NppiSize oSizeROI;
         oSizeROI.width = src.cols;
@@ -1300,8 +1328,52 @@ namespace
             nppSafeCall( nppiAlphaPremul_8u_AC4R(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), oSizeROI) );
         else
             nppSafeCall( nppiAlphaPremul_16u_AC4R(src.ptr<Npp16u>(), static_cast<int>(src.step), dst.ptr<Npp16u>(), static_cast<int>(dst.step), oSizeROI) );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
     #endif
     }
+
+    void bayer_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, bool blue_last, bool start_with_green, Stream& stream)
+    {
+        typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+        static const func_t funcs[3][4] =
+        {
+            {0,0,Bayer2BGR_8u_gpu<3>, Bayer2BGR_8u_gpu<4>},
+            {0,0,0,0},
+            {0,0,Bayer2BGR_16u_gpu<3>, Bayer2BGR_16u_gpu<4>}
+        };
+
+        if (dcn <= 0) dcn = 3;
+
+        CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1);
+        CV_Assert(src.rows > 2 && src.cols > 2);
+        CV_Assert(dcn == 3 || dcn == 4);
+
+        dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn));
+
+        funcs[src.depth()][dcn - 1](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream));
+    }
+
+    void bayerBG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    {
+        bayer_to_bgr(src, dst, dcn, false, false, stream);
+    }
+
+    void bayerGB_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    {
+        bayer_to_bgr(src, dst, dcn, false, true, stream);
+    }
+
+    void bayerRG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    {
+        bayer_to_bgr(src, dst, dcn, true, false, stream);
+    }
+
+    void bayerGR_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
+    {
+        bayer_to_bgr(src, dst, dcn, true, true, stream);
+    }
 }
 
 void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
@@ -1366,10 +1438,10 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
         bgr_to_lab,             // CV_BGR2Lab     =44
         rgb_to_lab,             // CV_RGB2Lab     =45
 
-        0,                      // CV_BayerBG2BGR =46
-        0,                      // CV_BayerGB2BGR =47
-        0,                      // CV_BayerRG2BGR =48
-        0,                      // CV_BayerGR2BGR =49
+        bayerBG_to_bgr,         // CV_BayerBG2BGR =46
+        bayerGB_to_bgr,         // CV_BayerGB2BGR =47
+        bayerRG_to_bgr,         // CV_BayerRG2BGR =48
+        bayerGR_to_bgr,         // CV_BayerGR2BGR =49
 
         bgr_to_luv,             // CV_BGR2Luv     =50
         rgb_to_luv,             // CV_RGB2Luv     =51
@@ -1424,57 +1496,57 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
         0,                      // CV_BayerGR2GRAY = 89
 
         //YUV 4:2:0 formats family
-        0,                      // COLOR_YUV2RGB_NV12 = 90,
-        0,                      // COLOR_YUV2BGR_NV12 = 91,
-        0,                      // COLOR_YUV2RGB_NV21 = 92,
-        0,                      // COLOR_YUV2BGR_NV21 = 93,
+        0,                      // CV_YUV2RGB_NV12 = 90,
+        0,                      // CV_YUV2BGR_NV12 = 91,
+        0,                      // CV_YUV2RGB_NV21 = 92,
+        0,                      // CV_YUV2BGR_NV21 = 93,
 
-        0,                      // COLOR_YUV2RGBA_NV12 = 94,
-        0,                      // COLOR_YUV2BGRA_NV12 = 95,
-        0,                      // COLOR_YUV2RGBA_NV21 = 96,
-        0,                      // COLOR_YUV2BGRA_NV21 = 97,
+        0,                      // CV_YUV2RGBA_NV12 = 94,
+        0,                      // CV_YUV2BGRA_NV12 = 95,
+        0,                      // CV_YUV2RGBA_NV21 = 96,
+        0,                      // CV_YUV2BGRA_NV21 = 97,
 
-        0,                      // COLOR_YUV2RGB_YV12 = 98,
-        0,                      // COLOR_YUV2BGR_YV12 = 99,
-        0,                      // COLOR_YUV2RGB_IYUV = 100,
-        0,                      // COLOR_YUV2BGR_IYUV = 101,
+        0,                      // CV_YUV2RGB_YV12 = 98,
+        0,                      // CV_YUV2BGR_YV12 = 99,
+        0,                      // CV_YUV2RGB_IYUV = 100,
+        0,                      // CV_YUV2BGR_IYUV = 101,
 
-        0,                      // COLOR_YUV2RGBA_YV12 = 102,
-        0,                      // COLOR_YUV2BGRA_YV12 = 103,
-        0,                      // COLOR_YUV2RGBA_IYUV = 104,
-        0,                      // COLOR_YUV2BGRA_IYUV = 105,
+        0,                      // CV_YUV2RGBA_YV12 = 102,
+        0,                      // CV_YUV2BGRA_YV12 = 103,
+        0,                      // CV_YUV2RGBA_IYUV = 104,
+        0,                      // CV_YUV2BGRA_IYUV = 105,
 
-        0,                      // COLOR_YUV2GRAY_420 = 106,
+        0,                      // CV_YUV2GRAY_420 = 106,
 
         //YUV 4:2:2 formats family
-        0,                      // COLOR_YUV2RGB_UYVY = 107,
-        0,                      // COLOR_YUV2BGR_UYVY = 108,
-        0,                      // //COLOR_YUV2RGB_VYUY = 109,
-        0,                      // //COLOR_YUV2BGR_VYUY = 110,
+        0,                      // CV_YUV2RGB_UYVY = 107,
+        0,                      // CV_YUV2BGR_UYVY = 108,
+        0,                      // //CV_YUV2RGB_VYUY = 109,
+        0,                      // //CV_YUV2BGR_VYUY = 110,
 
-        0,                      // COLOR_YUV2RGBA_UYVY = 111,
-        0,                      // COLOR_YUV2BGRA_UYVY = 112,
-        0,                      // //COLOR_YUV2RGBA_VYUY = 113,
-        0,                      // //COLOR_YUV2BGRA_VYUY = 114,
+        0,                      // CV_YUV2RGBA_UYVY = 111,
+        0,                      // CV_YUV2BGRA_UYVY = 112,
+        0,                      // //CV_YUV2RGBA_VYUY = 113,
+        0,                      // //CV_YUV2BGRA_VYUY = 114,
 
-        0,                      // COLOR_YUV2RGB_YUY2 = 115,
-        0,                      // COLOR_YUV2BGR_YUY2 = 116,
-        0,                      // COLOR_YUV2RGB_YVYU = 117,
-        0,                      // COLOR_YUV2BGR_YVYU = 118,
+        0,                      // CV_YUV2RGB_YUY2 = 115,
+        0,                      // CV_YUV2BGR_YUY2 = 116,
+        0,                      // CV_YUV2RGB_YVYU = 117,
+        0,                      // CV_YUV2BGR_YVYU = 118,
 
-        0,                      // COLOR_YUV2RGBA_YUY2 = 119,
-        0,                      // COLOR_YUV2BGRA_YUY2 = 120,
-        0,                      // COLOR_YUV2RGBA_YVYU = 121,
-        0,                      // COLOR_YUV2BGRA_YVYU = 122,
+        0,                      // CV_YUV2RGBA_YUY2 = 119,
+        0,                      // CV_YUV2BGRA_YUY2 = 120,
+        0,                      // CV_YUV2RGBA_YVYU = 121,
+        0,                      // CV_YUV2BGRA_YVYU = 122,
 
-        0,                      // COLOR_YUV2GRAY_UYVY = 123,
-        0,                      // COLOR_YUV2GRAY_YUY2 = 124,
+        0,                      // CV_YUV2GRAY_UYVY = 123,
+        0,                      // CV_YUV2GRAY_YUY2 = 124,
 
         // alpha premultiplication
-        rgba_to_mbgra,          // COLOR_RGBA2mRGBA = 125,
-        0,                      // COLOR_mRGBA2RGBA = 126,
+        rgba_to_mbgra,          // CV_RGBA2mRGBA = 125,
+        0,                      // CV_mRGBA2RGBA = 126,
 
-        0,                      // COLOR_COLORCVT_MAX  = 127
+        0,                      // CV_COLORCVT_MAX  = 127
     };
 
     CV_Assert(code < 128);
diff --git a/modules/gpu/src/cuda/bgfg_gmg.cu b/modules/gpu/src/cuda/bgfg_gmg.cu
new file mode 100644
index 0000000000..76ebb2da09
--- /dev/null
+++ b/modules/gpu/src/cuda/bgfg_gmg.cu
@@ -0,0 +1,253 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/gpu/device/common.hpp"
+#include "opencv2/gpu/device/vec_traits.hpp"
+#include "opencv2/gpu/device/limits.hpp"
+
+namespace cv { namespace gpu { namespace device {
+    namespace bgfg_gmg
+    {
+        __constant__ int   c_width;
+        __constant__ int   c_height;
+        __constant__ float c_minVal;
+        __constant__ float c_maxVal;
+        __constant__ int   c_quantizationLevels;
+        __constant__ float c_backgroundPrior;
+        __constant__ float c_decisionThreshold;
+        __constant__ int   c_maxFeatures;
+        __constant__ int   c_numInitializationFrames;
+
+        void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
+                           float decisionThreshold, int maxFeatures, int numInitializationFrames)
+        {
+            cudaSafeCall( cudaMemcpyToSymbol(c_width, &width, sizeof(width)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_height, &height, sizeof(height)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_minVal, &minVal, sizeof(minVal)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_maxVal, &maxVal, sizeof(maxVal)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_quantizationLevels, &quantizationLevels, sizeof(quantizationLevels)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_backgroundPrior, &backgroundPrior, sizeof(backgroundPrior)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_decisionThreshold, &decisionThreshold, sizeof(decisionThreshold)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_maxFeatures, &maxFeatures, sizeof(maxFeatures)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_numInitializationFrames, &numInitializationFrames, sizeof(numInitializationFrames)) );
+        }
+
+        __device__ float findFeature(const int color, const PtrStepi& colors, const PtrStepf& weights, const int x, const int y, const int nfeatures)
+        {
+            for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+            {
+                if (color == colors(fy, x))
+                    return weights(fy, x);
+            }
+
+            // not in histogram, so return 0.
+            return 0.0f;
+        }
+
+        __device__ void normalizeHistogram(PtrStepf weights, const int x, const int y, const int nfeatures)
+        {
+            float total = 0.0f;
+            for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                total += weights(fy, x);
+
+            if (total != 0.0f)
+            {
+                for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                    weights(fy, x) /= total;
+            }
+        }
+
+        __device__ bool insertFeature(const int color, const float weight, PtrStepi colors, PtrStepf weights, const int x, const int y, int& nfeatures)
+        {
+            for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+            {
+                if (color == colors(fy, x))
+                {
+                    // feature in histogram
+
+                    weights(fy, x) += weight;
+
+                    return false;
+                }
+            }
+
+            if (nfeatures == c_maxFeatures)
+            {
+                // discard oldest feature
+
+                int idx = -1;
+                float minVal = numeric_limits<float>::max();
+                for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                {
+                    const float w = weights(fy, x);
+                    if (w < minVal)
+                    {
+                        minVal = w;
+                        idx = fy;
+                    }
+                }
+
+                colors(idx, x) = color;
+                weights(idx, x) = weight;
+
+                return false;
+            }
+
+            colors(nfeatures * c_height + y, x) = color;
+            weights(nfeatures * c_height + y, x) = weight;
+
+            ++nfeatures;
+
+            return true;
+        }
+
+        namespace detail
+        {
+            template <int cn> struct Quantization
+            {
+                template <typename T>
+                __device__ static int apply(const T& val)
+                {
+                    int res = 0;
+                    res |= static_cast<int>((val.x - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal));
+                    res |= static_cast<int>((val.y - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal)) << 8;
+                    res |= static_cast<int>((val.z - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal)) << 16;
+                    return res;
+                }
+            };
+
+            template <> struct Quantization<1>
+            {
+                template <typename T>
+                __device__ static int apply(T val)
+                {
+                    return static_cast<int>((val - c_minVal) * c_quantizationLevels / (c_maxVal - c_minVal));
+                }
+            };
+        }
+
+        template <typename T> struct Quantization : detail::Quantization<VecTraits<T>::cn> {};
+
+        template <typename SrcT>
+        __global__ void update(const PtrStep_<SrcT> frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_,
+                               const int frameNum, const float learningRate, const bool updateBackgroundModel)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= c_width || y >= c_height)
+                return;
+
+            const SrcT pix = frame(y, x);
+            const int newFeatureColor = Quantization<SrcT>::apply(pix);
+
+            int nfeatures = nfeatures_(y, x);
+
+            if (frameNum >= c_numInitializationFrames)
+            {
+                // typical operation
+
+                const float weight = findFeature(newFeatureColor, colors_, weights_, x, y, nfeatures);
+
+                // see Godbehere, Matsukawa, Goldberg (2012) for reasoning behind this implementation of Bayes rule
+                const float posterior = (weight * c_backgroundPrior) / (weight * c_backgroundPrior + (1.0f - weight) * (1.0f - c_backgroundPrior));
+
+                const bool isForeground = ((1.0f - posterior) > c_decisionThreshold);
+                fgmask(y, x) = (uchar)(-isForeground);
+
+                // update histogram.
+
+                if (updateBackgroundModel)
+                {
+                    for (int i = 0, fy = y; i < nfeatures; ++i, fy += c_height)
+                        weights_(fy, x) *= 1.0f - learningRate;
+
+                    bool inserted = insertFeature(newFeatureColor, learningRate, colors_, weights_, x, y, nfeatures);
+
+                    if (inserted)
+                    {
+                        normalizeHistogram(weights_, x, y, nfeatures);
+                        nfeatures_(y, x) = nfeatures;
+                    }
+                }
+            }
+            else if (updateBackgroundModel)
+            {
+                // training-mode update
+
+                insertFeature(newFeatureColor, 1.0f, colors_, weights_, x, y, nfeatures);
+
+                if (frameNum == c_numInitializationFrames - 1)
+                    normalizeHistogram(weights_, x, y, nfeatures);
+            }
+        }
+
+        template <typename SrcT>
+        void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
+                        int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream)
+        {
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
+
+            update<SrcT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
+
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        template void update_gpu<uchar  >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<uchar3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<uchar4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+
+        template void update_gpu<ushort >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<ushort3>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<ushort4>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+
+        template void update_gpu<float  >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<float3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+        template void update_gpu<float4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
+    }
+}}}
diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu
new file mode 100644
index 0000000000..1f9dc114ef
--- /dev/null
+++ b/modules/gpu/src/cuda/ccomponetns.cu
@@ -0,0 +1,523 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+//                          License Agreement
+//               For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+//  * The name of the copyright holders may not be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//M*/
+
+#include <opencv2/gpu/device/common.hpp>
+#include <opencv2/gpu/device/vec_traits.hpp>
+#include <opencv2/gpu/device/vec_math.hpp>
+#include <opencv2/gpu/device/emulation.hpp>
+#include <iostream>
+#include <stdio.h>
+
+namespace cv { namespace gpu { namespace device
+{
+    namespace ccl
+    {
+        enum
+        {
+            WARP_SIZE  = 32,
+            WARP_LOG   = 5,
+
+            CTA_SIZE_X = 32,
+            CTA_SIZE_Y = 8,
+
+            STA_SIZE_MARGE_Y = 4,
+            STA_SIZE_MARGE_X = 32,
+
+            TPB_X = 1,
+            TPB_Y = 4,
+
+            TILE_COLS = CTA_SIZE_X * TPB_X,
+            TILE_ROWS = CTA_SIZE_Y * TPB_Y
+        };
+
+        template<typename T> struct IntervalsTraits
+        {
+            typedef T elem_type;
+        };
+
+        template<> struct IntervalsTraits<unsigned char>
+        {
+            typedef int dist_type;
+            enum {ch = 1};
+        };
+
+        template<> struct IntervalsTraits<uchar3>
+        {
+            typedef int3 dist_type;
+            enum {ch = 3};
+        };
+
+        template<> struct IntervalsTraits<uchar4>
+        {
+            typedef int4 dist_type;
+            enum {ch = 4};
+        };
+
+        template<> struct IntervalsTraits<unsigned short>
+        {
+            typedef int dist_type;
+            enum {ch = 1};
+        };
+
+        template<> struct IntervalsTraits<ushort3>
+        {
+            typedef int3 dist_type;
+            enum {ch = 3};
+        };
+
+        template<> struct IntervalsTraits<ushort4>
+        {
+            typedef int4 dist_type;
+            enum {ch = 4};
+        };
+
+        template<> struct IntervalsTraits<float>
+        {
+            typedef float dist_type;
+            enum {ch = 1};
+        };
+
+        template<> struct IntervalsTraits<int>
+        {
+            typedef int dist_type;
+            enum {ch = 1};
+        };
+
+        typedef unsigned char component;
+        enum Edges { UP = 1, DOWN = 2, LEFT = 4, RIGHT = 8, EMPTY = 0xF0 };
+
+        template<typename T, int CH> struct InInterval {};
+
+        template<typename T> struct InInterval<T, 1>
+        {
+            __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi) : lo(-_lo.x), hi(_hi.x) {};
+            T lo, hi;
+
+            template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
+            {
+                I d = a - b;
+                return lo <= d && d <= hi;
+            }
+        };
+
+        template<typename T> struct InInterval<T, 3>
+        {
+            __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi)
+            : lo (VecTraits<T>::make(-_lo.x, -_lo.y, -_lo.z)), hi (VecTraits<T>::make(_hi.x, _hi.y, _hi.z)){};
+            T lo, hi;
+
+            template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
+            {
+                I d = a - b;
+                return lo.x <= d.x && d.x <= hi.x &&
+                       lo.y <= d.y && d.y <= hi.y &&
+                       lo.z <= d.z && d.z <= hi.z;
+            }
+        };
+
+        template<typename T> struct InInterval<T, 4>
+        {
+            __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi)
+            : lo (VecTraits<T>::make(-_lo.x, -_lo.y, -_lo.z, -_lo.w)), hi (VecTraits<T>::make(_hi.x, _hi.y, _hi.z, -_hi.w)){};
+            T lo, hi;
+
+            template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
+            {
+                I d = a - b;
+                return lo.x <= d.x && d.x <= hi.x &&
+                       lo.y <= d.y && d.y <= hi.y &&
+                       lo.z <= d.z && d.z <= hi.z &&
+                       lo.w <= d.w && d.w <= hi.w;
+            }
+        };
+
+
+        template<typename T, typename F>
+        __global__ void computeConnectivity(const DevMem2D_<T> image, DevMem2D components, F connected)
+        {
+            int x = threadIdx.x + blockIdx.x * blockDim.x;
+            int y = threadIdx.y + blockIdx.y * blockDim.y;
+
+            if (x >= image.cols || y >= image.rows) return;
+
+            T intensity = image(y, x);
+            component c = 0;
+
+            if ( x > 0 && connected(intensity, image(y, x - 1)))
+                c |= LEFT;
+
+            if ( y > 0 && connected(intensity, image(y - 1, x)))
+                c |= UP;
+
+            if ( x - 1 < image.cols && connected(intensity, image(y, x + 1)))
+                c |= RIGHT;
+
+            if ( y - 1 < image.rows && connected(intensity, image(y + 1, x)))
+                c |= DOWN;
+
+            components(y, x) = c;
+        }
+
+        template< typename T>
+        void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream)
+        {
+            dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
+            dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y));
+
+            typedef InInterval<typename IntervalsTraits<T>::dist_type, IntervalsTraits<T>::ch> Int_t;
+
+            Int_t inInt(lo, hi);
+            computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const DevMem2D_<T> >(image), edges, inInt);
+
+            cudaSafeCall( cudaGetLastError() );
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        template void computeEdges<uchar>  (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<uchar3> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<uchar4> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<ushort> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<ushort3>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<ushort4>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<int>    (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<float>  (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+
+        __global__ void lableTiles(const DevMem2D edges, DevMem2Di comps)
+        {
+            int x = threadIdx.x + blockIdx.x * TILE_COLS;
+            int y = threadIdx.y + blockIdx.y * TILE_ROWS;
+
+            if (x >= edges.cols || y >= edges.rows) return;
+
+            //currently x is 1
+            int bounds = ((y + TPB_Y) < edges.rows);
+
+            __shared__ int labelsTile[TILE_ROWS][TILE_COLS];
+            __shared__ int  edgesTile[TILE_ROWS][TILE_COLS];
+
+            int new_labels[TPB_Y][TPB_X];
+            int old_labels[TPB_Y][TPB_X];
+
+            #pragma unroll
+            for (int i = 0; i < TPB_Y; ++i)
+                #pragma unroll
+                for (int j = 0; j < TPB_X; ++j)
+                {
+                    int yloc = threadIdx.y + CTA_SIZE_Y * i;
+                    int xloc = threadIdx.x + CTA_SIZE_X * j;
+                    component c = edges(bounds * (y + CTA_SIZE_Y * i), x + CTA_SIZE_X * j);
+
+                    if (!xloc) c &= ~LEFT;
+                    if (!yloc) c &= ~UP;
+
+                    if (xloc == TILE_COLS -1) c &= ~RIGHT;
+                    if (yloc == TILE_ROWS -1) c &= ~DOWN;
+
+                    new_labels[i][j] = yloc * TILE_COLS + xloc;
+                    edgesTile[yloc][xloc] = c;
+                }
+
+            for (int k = 0; ;++k)
+            {
+                //1. backup
+                #pragma unroll
+                for (int i = 0; i < TPB_Y; ++i)
+                    #pragma unroll
+                    for (int j = 0; j < TPB_X; ++j)
+                    {
+                        int yloc = threadIdx.y + CTA_SIZE_Y * i;
+                        int xloc = threadIdx.x + CTA_SIZE_X * j;
+
+                        old_labels[i][j]       = new_labels[i][j];
+                        labelsTile[yloc][xloc] = new_labels[i][j];
+                    }
+
+                __syncthreads();
+
+                //2. compare local arrays
+                #pragma unroll
+                for (int i = 0; i < TPB_Y; ++i)
+                    #pragma unroll
+                    for (int j = 0; j < TPB_X; ++j)
+                    {
+                        int yloc = threadIdx.y + CTA_SIZE_Y * i;
+                        int xloc = threadIdx.x + CTA_SIZE_X * j;
+
+                        component c = edgesTile[yloc][xloc];
+                        int label = new_labels[i][j];
+
+                        if (c & UP)
+                           label = ::min(label, labelsTile[yloc - 1][xloc]);
+
+                        if (c &  DOWN)
+                           label = ::min(label, labelsTile[yloc + 1][xloc]);
+
+                        if (c & LEFT)
+                           label = ::min(label, labelsTile[yloc][xloc - 1]);
+
+                        if (c & RIGHT)
+                           label = ::min(label, labelsTile[yloc][xloc + 1]);
+
+                       new_labels[i][j] = label;
+                    }
+
+                __syncthreads();
+
+                //3. determine: Is any value changed?
+                int changed = 0;
+                #pragma unroll
+                for (int i = 0; i < TPB_Y; ++i)
+                    #pragma unroll
+                    for (int j = 0; j < TPB_X; ++j)
+                    {
+                        if (new_labels[i][j] < old_labels[i][j])
+                        {
+                            changed = 1;
+                            Emulation::smem::atomicMin(&labelsTile[0][0] + old_labels[i][j], new_labels[i][j]);
+                        }
+                    }
+
+                changed = Emulation::sycthOr(changed);
+
+                if (!changed)
+                    break;
+
+                //4. Compact paths
+                const int *labels = &labelsTile[0][0];
+                #pragma unroll
+                for (int i = 0; i < TPB_Y; ++i)
+                    #pragma unroll
+                    for (int j = 0; j < TPB_X; ++j)
+                    {
+                        int label = new_labels[i][j];
+
+                        while( labels[label] < label ) label = labels[label];
+
+                        new_labels[i][j] = label;
+                    }
+                __syncthreads();
+            }
+
+            #pragma unroll
+            for (int i = 0; i < TPB_Y; ++i)
+            #pragma unroll
+                for (int j = 0; j < TPB_X; ++j)
+                {
+                    int label = new_labels[i][j];
+                    int yloc = label / TILE_COLS;
+                    int xloc = label - yloc * TILE_COLS;
+
+                    xloc += blockIdx.x * TILE_COLS;
+                    yloc += blockIdx.y * TILE_ROWS;
+
+                    label = yloc * edges.cols + xloc;
+                    // do it for x too.
+                    if (y + CTA_SIZE_Y * i < comps.rows) comps(y + CTA_SIZE_Y * i, x + CTA_SIZE_X * j) = label;
+                }
+        }
+
+        __device__ __forceinline__ int root(const DevMem2Di& comps, int label)
+        {
+            while(1)
+            {
+                int y = label / comps.cols;
+                int x = label - y * comps.cols;
+
+                int parent = comps(y, x);
+
+                if (label == parent) break;
+
+                label = parent;
+            }
+            return label;
+        }
+
+        __device__ __forceinline__ void isConnected(DevMem2Di& comps, int l1, int l2, bool& changed)
+        {
+            int r1 = root(comps, l1);
+            int r2 = root(comps, l2);
+
+            if (r1 == r2) return;
+
+            int mi = ::min(r1, r2);
+            int ma = ::max(r1, r2);
+
+            int y = ma / comps.cols;
+            int x = ma - y * comps.cols;
+
+            atomicMin(&comps.ptr(y)[x], mi);
+            changed = true;
+        }
+
+        __global__ void crossMerge(const int tilesNumY, const int tilesNumX, int tileSizeY, int tileSizeX,
+            const DevMem2D edges, DevMem2Di comps, const int yIncomplete, int xIncomplete)
+        {
+            int tid = threadIdx.y * blockDim.x + threadIdx.x;
+            int stride = blockDim.y * blockDim.x;
+
+            int ybegin = blockIdx.y * (tilesNumY * tileSizeY);
+            int yend   = ybegin + tilesNumY * tileSizeY;
+
+            if (blockIdx.y == gridDim.y - 1)
+            {
+                yend -= yIncomplete * tileSizeY;
+                yend -= tileSizeY;
+                tileSizeY = (edges.rows % tileSizeY);
+
+                yend += tileSizeY;
+            }
+
+            int xbegin = blockIdx.x * tilesNumX * tileSizeX;
+            int xend   = xbegin + tilesNumX * tileSizeX;
+
+            if (blockIdx.x == gridDim.x - 1)
+            {
+                if (xIncomplete) yend = ybegin;
+                xend -= xIncomplete * tileSizeX;
+                xend -= tileSizeX;
+                tileSizeX = (edges.cols % tileSizeX);
+
+                xend += tileSizeX;
+            }
+
+            if (blockIdx.y == (gridDim.y - 1) && yIncomplete)
+            {
+                xend = xbegin;
+            }
+
+            int tasksV = (tilesNumX - 1) * (yend - ybegin);
+            int tasksH = (tilesNumY - 1) * (xend - xbegin);
+
+            int total = tasksH + tasksV;
+
+            bool changed;
+            do
+            {
+                changed = false;
+                for (int taskIdx = tid; taskIdx < total; taskIdx += stride)
+                {
+                    if (taskIdx < tasksH)
+                    {
+                        int indexH = taskIdx;
+
+                        int row = indexH / (xend - xbegin);
+                        int col = indexH - row * (xend - xbegin);
+
+                        int y = ybegin + (row + 1) * tileSizeY;
+                        int x = xbegin + col;
+
+                        component e = edges( x, y);
+                        if (e & UP)
+                        {
+                            int lc = comps(y,x);
+                            int lu = comps(y - 1, x);
+
+                            isConnected(comps, lc, lu, changed);
+                        }
+                    }
+                    else
+                    {
+                        int indexV = taskIdx - tasksH;
+
+                        int col = indexV / (yend - ybegin);
+                        int row = indexV - col * (yend - ybegin);
+
+                        int x = xbegin + (col + 1) * tileSizeX;
+                        int y = ybegin + row;
+
+                        component e = edges(x, y);
+                        if (e & LEFT)
+                        {
+                            int lc = comps(y, x);
+                            int ll = comps(y, x - 1);
+
+                            isConnected(comps, lc, ll, changed);
+                        }
+                    }
+                }
+            } while (__syncthreads_or(changed));
+        }
+
+        __global__ void flatten(const DevMem2D edges, DevMem2Di comps)
+        {
+            int x = threadIdx.x + blockIdx.x * blockDim.x;
+            int y = threadIdx.y + blockIdx.y * blockDim.y;
+
+            if( x < comps.cols && y < comps.rows)
+                comps(y, x) = root(comps, comps(y, x));
+        }
+
+        enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1};
+
+        void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream)
+        {
+            dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
+            dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
+
+            lableTiles<<<grid, block, 0, stream>>>(edges, comps);
+            cudaSafeCall( cudaGetLastError() );
+
+            int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS;
+            while (grid.x > 1 || grid.y > 1)
+            {
+                dim3 mergeGrid(ceilf(grid.x / 2.0), ceilf(grid.y / 2.0));
+                dim3 mergeBlock(STA_SIZE_MARGE_X, STA_SIZE_MARGE_Y);
+                // debug log
+                // std::cout << "merging: " << grid.y  << " x " << grid.x << " ---> " << mergeGrid.y <<  " x " << mergeGrid.x << " for tiles: " << tileSizeY << " x " << tileSizeX << std::endl;
+                crossMerge<<<mergeGrid, mergeBlock, 0, stream>>>(2, 2, tileSizeY, tileSizeX, edges, comps, ceilf(grid.y / 2.0) - grid.y / 2, ceilf(grid.x / 2.0) - grid.x / 2);
+                tileSizeX <<= 1;
+                tileSizeY <<= 1;
+                grid = mergeGrid;
+
+                cudaSafeCall( cudaGetLastError() );
+            }
+
+            grid.x = divUp(edges.cols, block.x);
+            grid.y = divUp(edges.rows, block.y);
+            flatten<<<grid, block, 0, stream>>>(edges, comps);
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    }
+} } }
\ No newline at end of file
diff --git a/modules/gpu/src/cuda/debayer.cu b/modules/gpu/src/cuda/debayer.cu
new file mode 100644
index 0000000000..fc7533b5b3
--- /dev/null
+++ b/modules/gpu/src/cuda/debayer.cu
@@ -0,0 +1,327 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <opencv2/gpu/device/common.hpp>
+#include <opencv2/gpu/device/vec_traits.hpp>
+#include <opencv2/gpu/device/vec_math.hpp>
+#include <opencv2/gpu/device/limits.hpp>
+
+namespace cv { namespace gpu {
+    namespace device
+    {
+        template <typename D>
+        __global__ void Bayer2BGR_8u(const PtrStepb src, DevMem2D_<D> dst, const bool blue_last, const bool start_with_green)
+        {
+            const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
+            int s_y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (s_y >= dst.rows || (s_x << 2) >= dst.cols)
+                return;
+
+            s_y = ::min(::max(s_y, 1), dst.rows - 2);
+
+            uchar4 patch[3][3];
+            patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
+            patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
+            patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
+
+            patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
+            patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
+            patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
+
+            patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
+            patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
+            patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
+
+            D res0 = VecTraits<D>::all(numeric_limits<uchar>::max());
+            D res1 = VecTraits<D>::all(numeric_limits<uchar>::max());
+            D res2 = VecTraits<D>::all(numeric_limits<uchar>::max());
+            D res3 = VecTraits<D>::all(numeric_limits<uchar>::max());
+
+            if ((s_y & 1) ^ start_with_green)
+            {
+                const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
+                const int t1 = (patch[1][0].w + patch[1][1].y + 1) >> 1;
+
+                const int t2 = (patch[0][1].x + patch[0][1].z + patch[2][1].x + patch[2][1].z + 2) >> 2;
+                const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][1].z + patch[2][1].y + 2) >> 2;
+
+                const int t4 = (patch[0][1].z + patch[2][1].z + 1) >> 1;
+                const int t5 = (patch[1][1].y + patch[1][1].w + 1) >> 1;
+
+                const int t6 = (patch[0][1].z + patch[0][2].x + patch[2][1].z + patch[2][2].x + 2) >> 2;
+                const int t7 = (patch[0][1].w + patch[1][1].z + patch[1][2].x + patch[2][1].w + 2) >> 2;
+
+                if ((s_y & 1) ^ blue_last)
+                {
+                    res0.x = t1;
+                    res0.y = patch[1][1].x;
+                    res0.z = t0;
+
+                    res1.x = patch[1][1].y;
+                    res1.y = t3;
+                    res1.z = t2;
+
+                    res2.x = t5;
+                    res2.y = patch[1][1].z;
+                    res2.z = t4;
+
+                    res3.x = patch[1][1].w;
+                    res3.y = t7;
+                    res3.z = t6;
+                }
+                else
+                {
+                    res0.x = t0;
+                    res0.y = patch[1][1].x;
+                    res0.z = t1;
+
+                    res1.x = t2;
+                    res1.y = t3;
+                    res1.z = patch[1][1].y;
+
+                    res2.x = t4;
+                    res2.y = patch[1][1].z;
+                    res2.z = t5;
+
+                    res3.x = t6;
+                    res3.y = t7;
+                    res3.z = patch[1][1].w;
+                }
+            }
+            else
+            {
+                const int t0 = (patch[0][0].w + patch[0][1].y + patch[2][0].w + patch[2][1].y + 2) >> 2;
+                const int t1 = (patch[0][1].x + patch[1][0].w + patch[1][1].y + patch[2][1].x + 2) >> 2;
+
+                const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
+                const int t3 = (patch[1][1].x + patch[1][1].z + 1) >> 1;
+
+                const int t4 = (patch[0][1].y + patch[0][1].w + patch[2][1].y + patch[2][1].w + 2) >> 2;
+                const int t5 = (patch[0][1].z + patch[1][1].y + patch[1][1].w + patch[2][1].z + 2) >> 2;
+
+                const int t6 = (patch[0][1].w + patch[2][1].w + 1) >> 1;
+                const int t7 = (patch[1][1].z + patch[1][2].x + 1) >> 1;
+
+                if ((s_y & 1) ^ blue_last)
+                {
+                    res0.x = patch[1][1].x;
+                    res0.y = t1;
+                    res0.z = t0;
+
+                    res1.x = t3;
+                    res1.y = patch[1][1].y;
+                    res1.z = t2;
+
+                    res2.x = patch[1][1].z;
+                    res2.y = t5;
+                    res2.z = t4;
+
+                    res3.x = t7;
+                    res3.y = patch[1][1].w;
+                    res3.z = t6;
+                }
+                else
+                {
+                    res0.x = t0;
+                    res0.y = t1;
+                    res0.z = patch[1][1].x;
+
+                    res1.x = t2;
+                    res1.y = patch[1][1].y;
+                    res1.z = t3;
+
+                    res2.x = t4;
+                    res2.y = t5;
+                    res2.z = patch[1][1].z;
+
+                    res3.x = t6;
+                    res3.y = patch[1][1].w;
+                    res3.z = t7;
+                }
+            }
+
+            const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
+            const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            dst(d_y, d_x) = res0;
+            if (d_x + 1 < dst.cols)
+                dst(d_y, d_x + 1) = res1;
+            if (d_x + 2 < dst.cols)
+                dst(d_y, d_x + 2) = res2;
+            if (d_x + 3 < dst.cols)
+                dst(d_y, d_x + 3) = res3;
+        }
+
+        template <typename D>
+        __global__ void Bayer2BGR_16u(const PtrStepb src, DevMem2D_<D> dst, const bool blue_last, const bool start_with_green)
+        {
+            const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
+            int s_y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (s_y >= dst.rows || (s_x << 1) >= dst.cols)
+                return;
+
+            s_y = ::min(::max(s_y, 1), dst.rows - 2);
+
+            ushort2 patch[3][3];
+            patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
+            patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
+            patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
+
+            patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
+            patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
+            patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
+
+            patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
+            patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
+            patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
+
+            D res0 = VecTraits<D>::all(numeric_limits<ushort>::max());
+            D res1 = VecTraits<D>::all(numeric_limits<ushort>::max());
+
+            if ((s_y & 1) ^ start_with_green)
+            {
+                const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
+                const int t1 = (patch[1][0].y + patch[1][1].y + 1) >> 1;
+
+                const int t2 = (patch[0][1].x + patch[0][2].x + patch[2][1].x + patch[2][2].x + 2) >> 2;
+                const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][2].x + patch[2][1].y + 2) >> 2;
+
+                if ((s_y & 1) ^ blue_last)
+                {
+                    res0.x = t1;
+                    res0.y = patch[1][1].x;
+                    res0.z = t0;
+
+                    res1.x = patch[1][1].y;
+                    res1.y = t3;
+                    res1.z = t2;
+                }
+                else
+                {
+                    res0.x = t0;
+                    res0.y = patch[1][1].x;
+                    res0.z = t1;
+
+                    res1.x = t2;
+                    res1.y = t3;
+                    res1.z = patch[1][1].y;
+                }
+            }
+            else
+            {
+                const int t0 = (patch[0][0].y + patch[0][1].y + patch[2][0].y + patch[2][1].y + 2) >> 2;
+                const int t1 = (patch[0][1].x + patch[1][0].y + patch[1][1].y + patch[2][1].x + 2) >> 2;
+
+                const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
+                const int t3 = (patch[1][1].x + patch[1][2].x + 1) >> 1;
+
+                if ((s_y & 1) ^ blue_last)
+                {
+                    res0.x = patch[1][1].x;
+                    res0.y = t1;
+                    res0.z = t0;
+
+                    res1.x = t3;
+                    res1.y = patch[1][1].y;
+                    res1.z = t2;
+                }
+                else
+                {
+                    res0.x = t0;
+                    res0.y = t1;
+                    res0.z = patch[1][1].x;
+
+                    res1.x = t2;
+                    res1.y = patch[1][1].y;
+                    res1.z = t3;
+                }
+            }
+
+            const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
+            const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            dst(d_y, d_x) = res0;
+            if (d_x + 1 < dst.cols)
+                dst(d_y, d_x + 1) = res1;
+        }
+
+        template <int cn>
+        void Bayer2BGR_8u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream)
+        {
+            typedef typename TypeVec<uchar, cn>::vec_type dst_t;
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(dst.cols, 4 * block.x), divUp(dst.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
+
+            Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (DevMem2D_<dst_t>)dst, blue_last, start_with_green);
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+        template <int cn>
+        void Bayer2BGR_16u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream)
+        {
+            typedef typename TypeVec<ushort, cn>::vec_type dst_t;
+
+            const dim3 block(32, 8);
+            const dim3 grid(divUp(dst.cols, 2 * block.x), divUp(dst.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
+
+            Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (DevMem2D_<dst_t>)dst, blue_last, start_with_green);
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        template void Bayer2BGR_8u_gpu<3>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+        template void Bayer2BGR_8u_gpu<4>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+        template void Bayer2BGR_16u_gpu<3>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+        template void Bayer2BGR_16u_gpu<4>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+    }
+}}
diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp
index 0546ce3ad9..58fcde8f09 100644
--- a/modules/gpu/src/graphcuts.cpp
+++ b/modules/gpu/src/graphcuts.cpp
@@ -1,216 +1,286 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-#if !defined (HAVE_CUDA)
-
-void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
-
-#else /* !defined (HAVE_CUDA) */
-
-namespace
-{
-    typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem);
-
-    class NppiGraphcutStateHandler
-    {
-    public:
-        NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func)
-        {
-            nppSafeCall( func(sznpp, &pState, pDeviceMem) );
-        }
-
-        ~NppiGraphcutStateHandler()
-        {
-            nppSafeCall( nppiGraphcutFree(pState) );
-        }
-
-        operator NppiGraphcutState*()
-        {
-            return pState;
-        }
-
-    private:
-        NppiGraphcutState* pState;
-    };
-}
-
-void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
-{
-#if (CUDA_VERSION < 5000)
-    CV_Assert(terminals.type() == CV_32S);
-#else
-    CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
-#endif
-
-    Size src_size = terminals.size();
-
-    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(leftTransp.type() == terminals.type());
-
-    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(rightTransp.type() == terminals.type());
-
-    CV_Assert(top.size() == src_size);
-    CV_Assert(top.type() == terminals.type());
-
-    CV_Assert(bottom.size() == src_size);
-    CV_Assert(bottom.type() == terminals.type());
-
-    labels.create(src_size, CV_8U);
-
-    NppiSize sznpp;
-    sznpp.width = src_size.width;
-    sznpp.height = src_size.height;
-
-    int bufsz;
-    nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );
-
-    ensureSizeIsEnough(1, bufsz, CV_8U, buf);
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    NppStreamHandler h(stream);
-
-    NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);
-
-#if (CUDA_VERSION < 5000)
-    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
-        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
-#else
-    if (terminals.type() == CV_32S)
-    {
-        nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
-            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
-    }
-    else
-    {
-        nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(),
-            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
-    }
-#endif
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-}
-
-void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
-              GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
-{
-#if (CUDA_VERSION < 5000)
-    CV_Assert(terminals.type() == CV_32S);
-#else
-    CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
-#endif
-
-    Size src_size = terminals.size();
-
-    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(leftTransp.type() == terminals.type());
-
-    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
-    CV_Assert(rightTransp.type() == terminals.type());
-
-    CV_Assert(top.size() == src_size);
-    CV_Assert(top.type() == terminals.type());
-
-    CV_Assert(topLeft.size() == src_size);
-    CV_Assert(topLeft.type() == terminals.type());
-
-    CV_Assert(topRight.size() == src_size);
-    CV_Assert(topRight.type() == terminals.type());
-
-    CV_Assert(bottom.size() == src_size);
-    CV_Assert(bottom.type() == terminals.type());
-
-    CV_Assert(bottomLeft.size() == src_size);
-    CV_Assert(bottomLeft.type() == terminals.type());
-
-    CV_Assert(bottomRight.size() == src_size);
-    CV_Assert(bottomRight.type() == terminals.type());
-
-    labels.create(src_size, CV_8U);
-
-    NppiSize sznpp;
-    sznpp.width = src_size.width;
-    sznpp.height = src_size.height;
-
-    int bufsz;
-    nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) );
-
-    ensureSizeIsEnough(1, bufsz, CV_8U, buf);
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    NppStreamHandler h(stream);
-
-    NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc);
-
-#if (CUDA_VERSION < 5000)
-    nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
-        top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
-        bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
-        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
-#else
-    if (terminals.type() == CV_32S)
-    {
-        nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
-            top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
-            bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
-            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
-    }
-    else
-    {
-        nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(),
-            top.ptr<Npp32f>(), topLeft.ptr<Npp32f>(), topRight.ptr<Npp32f>(),
-            bottom.ptr<Npp32f>(), bottomLeft.ptr<Npp32f>(), bottomRight.ptr<Npp32f>(),
-            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
-    }
-#endif
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-}
-
-#endif /* !defined (HAVE_CUDA) */
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#if !defined (HAVE_CUDA)
+
+void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
+
+void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); }
+void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int, Stream& stream) { throw_nogpu(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+namespace cv { namespace gpu { namespace device
+{
+    namespace ccl
+    {
+        void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream);
+
+        template<typename T>
+        void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+    }
+}}}
+
+
+float4 scalarToCudaType(const cv::Scalar& in)
+{
+    float4 res;
+    res.x = in[0]; res.y = in[1]; res.z = in[2]; res.w = in[3];
+    return res;
+}
+
+
+void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s)
+{
+    CV_Assert(!image.empty());
+
+    int ch = image.channels();
+    CV_Assert(ch <= 4);
+
+    int depth = image.depth();
+
+    typedef void (*func_t)(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+
+    static const func_t suppotLookup[8][4] =
+    {   //    1,    2,     3,     4
+        { device::ccl::computeEdges<uchar>,  0,  device::ccl::computeEdges<uchar3>,  device::ccl::computeEdges<uchar4>  },// CV_8U
+        { 0,                                 0,  0,                                  0                                  },// CV_16U
+        { device::ccl::computeEdges<ushort>, 0,  device::ccl::computeEdges<ushort3>, device::ccl::computeEdges<ushort4> },// CV_8S
+        { 0,                                 0,  0,                                  0                                  },// CV_16S
+        { device::ccl::computeEdges<int>,    0,  0,                                  0                                  },// CV_32S
+        { device::ccl::computeEdges<float>,  0,  0,                                  0                                  },// CV_32F
+        { 0,                                 0,  0,                                  0                                  },// CV_64F
+        { 0,                                 0,  0,                                  0                                  } // CV_USRTYPE1
+    };
+
+    func_t f = suppotLookup[depth][ch - 1];
+    CV_Assert(f);
+
+    if (image.size() != mask.size() || mask.type() != CV_8UC1)
+        mask.create(image.size(), CV_8UC1);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+    float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi);
+    f(image, mask, culo, cuhi, stream);
+}
+
+void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
+{
+    if (!TargetArchs::builtWith(SHARED_ATOMICS) || !DeviceInfo().supports(SHARED_ATOMICS))
+        CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
+    CV_Assert(!mask.empty() && mask.type() == CV_8U);
+
+    if (mask.size() != components.size() || components.type() != CV_32SC1)
+        components.create(mask.size(), CV_32SC1);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+    device::ccl::labelComponents(mask, components, flags, stream);
+}
+
+namespace
+{
+    typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem);
+
+    class NppiGraphcutStateHandler
+    {
+    public:
+        NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func)
+        {
+            nppSafeCall( func(sznpp, &pState, pDeviceMem) );
+        }
+
+        ~NppiGraphcutStateHandler()
+        {
+            nppSafeCall( nppiGraphcutFree(pState) );
+        }
+
+        operator NppiGraphcutState*()
+        {
+            return pState;
+        }
+
+    private:
+        NppiGraphcutState* pState;
+    };
+}
+
+void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
+{
+#if (CUDA_VERSION < 5000)
+    CV_Assert(terminals.type() == CV_32S);
+#else
+    CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
+#endif
+
+    Size src_size = terminals.size();
+
+    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
+    CV_Assert(leftTransp.type() == terminals.type());
+
+    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
+    CV_Assert(rightTransp.type() == terminals.type());
+
+    CV_Assert(top.size() == src_size);
+    CV_Assert(top.type() == terminals.type());
+
+    CV_Assert(bottom.size() == src_size);
+    CV_Assert(bottom.type() == terminals.type());
+
+    labels.create(src_size, CV_8U);
+
+    NppiSize sznpp;
+    sznpp.width = src_size.width;
+    sznpp.height = src_size.height;
+
+    int bufsz;
+    nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );
+
+    ensureSizeIsEnough(1, bufsz, CV_8U, buf);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    NppStreamHandler h(stream);
+
+    NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);
+
+#if (CUDA_VERSION < 5000)
+    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
+        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+#else
+    if (terminals.type() == CV_32S)
+    {
+        nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+    else
+    {
+        nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+#endif
+
+    if (stream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
+}
+
+void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
+              GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
+{
+#if (CUDA_VERSION < 5000)
+    CV_Assert(terminals.type() == CV_32S);
+#else
+    CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
+#endif
+
+    Size src_size = terminals.size();
+
+    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
+    CV_Assert(leftTransp.type() == terminals.type());
+
+    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
+    CV_Assert(rightTransp.type() == terminals.type());
+
+    CV_Assert(top.size() == src_size);
+    CV_Assert(top.type() == terminals.type());
+
+    CV_Assert(topLeft.size() == src_size);
+    CV_Assert(topLeft.type() == terminals.type());
+
+    CV_Assert(topRight.size() == src_size);
+    CV_Assert(topRight.type() == terminals.type());
+
+    CV_Assert(bottom.size() == src_size);
+    CV_Assert(bottom.type() == terminals.type());
+
+    CV_Assert(bottomLeft.size() == src_size);
+    CV_Assert(bottomLeft.type() == terminals.type());
+
+    CV_Assert(bottomRight.size() == src_size);
+    CV_Assert(bottomRight.type() == terminals.type());
+
+    labels.create(src_size, CV_8U);
+
+    NppiSize sznpp;
+    sznpp.width = src_size.width;
+    sznpp.height = src_size.height;
+
+    int bufsz;
+    nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) );
+
+    ensureSizeIsEnough(1, bufsz, CV_8U, buf);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    NppStreamHandler h(stream);
+
+    NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc);
+
+#if (CUDA_VERSION < 5000)
+    nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
+        top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
+        bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
+        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+#else
+    if (terminals.type() == CV_32S)
+    {
+        nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
+            top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
+            bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+    else
+    {
+        nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(),
+            top.ptr<Npp32f>(), topLeft.ptr<Npp32f>(), topRight.ptr<Npp32f>(),
+            bottom.ptr<Npp32f>(), bottomLeft.ptr<Npp32f>(), bottomRight.ptr<Npp32f>(),
+            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
+    }
+#endif
+
+    if (stream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpu/src/opencv2/gpu/device/emulation.hpp b/modules/gpu/src/opencv2/gpu/device/emulation.hpp
index fe5452b5cd..0999495aeb 100644
--- a/modules/gpu/src/opencv2/gpu/device/emulation.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/emulation.hpp
@@ -1,126 +1,137 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef OPENCV_GPU_EMULATION_HPP_
-#define OPENCV_GPU_EMULATION_HPP_
-
-#include "warp_reduce.hpp"
-#include <stdio.h>
-
-namespace cv { namespace gpu { namespace device
-{
-    struct Emulation
-    {
-        template<int CTA_SIZE>
-        static __forceinline__ __device__ int Ballot(int predicate)
-        {
-#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
-            return __ballot(predicate);
-#else
-            __shared__ volatile int cta_buffer[CTA_SIZE];
-
-            int tid = threadIdx.x;
-            cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
-            return warp_reduce(cta_buffer);
-#endif
-        }
-
-        struct smem
-        {
-            enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
-
-            template<typename T>
-            static __device__ __forceinline__ T atomicInc(T* address, T val)
-            {
-#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
-                T count;
-                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
-                do
-                {
-                    count = *address & TAG_MASK;
-                    count = tag | (count + 1);
-                    *address = count;
-                } while (*address != count);
-
-                return (count & TAG_MASK) - 1;
-#else
-                return ::atomicInc(address, val);
-#endif
-            }
-
-            template<typename T>
-            static __device__ __forceinline__ void atomicAdd(T* address, T val)
-            {
-#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
-                T count;
-                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
-                do
-                {
-                    count = *address & TAG_MASK;
-                    count = tag | (count + val);
-                    *address = count;
-                } while (*address != count);
-#else
-                ::atomicAdd(address, val);
-#endif
-            }
-
-            template<typename T>
-            static __device__ __forceinline__ T atomicMin(T* address, T val)
-            {
-#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
-                T count = min(*address, val);
-                do
-                {
-                    *address = count;
-                } while (*address > count);
-
-                return count;
-#else
-                return ::atomicMin(address, val);
-#endif
-            }
-        };
-    };
-}}} // namespace cv { namespace gpu { namespace device
-
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_GPU_EMULATION_HPP_
+#define OPENCV_GPU_EMULATION_HPP_
+
+#include "warp_reduce.hpp"
+#include <stdio.h>
+
+namespace cv { namespace gpu { namespace device
+{
+    struct Emulation
+    {
+
+        static __device__ __forceinline__ int sycthOr(int pred)
+        {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                // just campilation stab
+                return false;
+#else
+                return __syncthreads_or(pred);
+#endif
+        }
+
+        template<int CTA_SIZE>
+        static __forceinline__ __device__ int Ballot(int predicate)
+        {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
+            return __ballot(predicate);
+#else
+            __shared__ volatile int cta_buffer[CTA_SIZE];
+
+            int tid = threadIdx.x;
+            cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
+            return warp_reduce(cta_buffer);
+#endif
+        }
+
+        struct smem
+        {
+            enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicInc(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count;
+                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
+                do
+                {
+                    count = *address & TAG_MASK;
+                    count = tag | (count + 1);
+                    *address = count;
+                } while (*address != count);
+
+                return (count & TAG_MASK) - 1;
+#else
+                return ::atomicInc(address, val);
+#endif
+            }
+
+            template<typename T>
+            static __device__ __forceinline__ void atomicAdd(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count;
+                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
+                do
+                {
+                    count = *address & TAG_MASK;
+                    count = tag | (count + val);
+                    *address = count;
+                } while (*address != count);
+#else
+                ::atomicAdd(address, val);
+#endif
+            }
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicMin(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count = min(*address, val);
+                do
+                {
+                    *address = count;
+                } while (*address > count);
+
+                return count;
+#else
+                return ::atomicMin(address, val);
+#endif
+            }
+        };
+    };
+}}} // namespace cv { namespace gpu { namespace device
+
 #endif /* OPENCV_GPU_EMULATION_HPP_ */
\ No newline at end of file
diff --git a/modules/gpu/test/test_color.cpp b/modules/gpu/test/test_color.cpp
index 89ca1a79a8..645967ef27 100644
--- a/modules/gpu/test/test_color.cpp
+++ b/modules/gpu/test/test_color.cpp
@@ -41,6 +41,8 @@
 
 #include "precomp.hpp"
 
+#ifdef HAVE_CUDA
+
 namespace {
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1628,7 +1630,7 @@ TEST_P(CvtColor, BGR2Lab)
     }
     catch (const cv::Exception& e)
     {
-#if (CUDA_VERSION < 5000)
+#if defined (CUDA_VERSION) && (CUDA_VERSION < 5000)
         ASSERT_EQ(CV_StsBadFlag, e.code);
 #else
         FAIL();
@@ -1655,7 +1657,7 @@ TEST_P(CvtColor, RGB2Lab)
     }
     catch (const cv::Exception& e)
     {
-#if (CUDA_VERSION < 5000)
+#if defined (CUDA_VERSION) && (CUDA_VERSION < 5000)
         ASSERT_EQ(CV_StsBadFlag, e.code);
 #else
         FAIL();
@@ -1682,7 +1684,7 @@ TEST_P(CvtColor, BGR2Luv)
     }
     catch (const cv::Exception& e)
     {
-#if (CUDA_VERSION < 5000)
+#if defined (CUDA_VERSION) && (CUDA_VERSION < 5000)
         ASSERT_EQ(CV_StsBadFlag, e.code);
 #else
         FAIL();
@@ -1709,7 +1711,7 @@ TEST_P(CvtColor, RGB2Luv)
     }
     catch (const cv::Exception& e)
     {
-#if (CUDA_VERSION < 5000)
+#if defined (CUDA_VERSION) && (CUDA_VERSION < 5000)
         ASSERT_EQ(CV_StsBadFlag, e.code);
 #else
         FAIL();
@@ -1736,7 +1738,7 @@ TEST_P(CvtColor, RGBA2mRGBA)
     }
     catch (const cv::Exception& e)
     {
-#if (CUDA_VERSION < 5000)
+#if defined (CUDA_VERSION) && (CUDA_VERSION < 5000)
         ASSERT_EQ(CV_StsBadFlag, e.code);
 #else
         FAIL();
@@ -1744,6 +1746,159 @@ TEST_P(CvtColor, RGBA2mRGBA)
     }
 }
 
+TEST_P(CvtColor, BayerBG2BGR)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2BGR);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerBG2BGR);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
+TEST_P(CvtColor, BayerBG2BGR4)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2BGR, 4);
+
+    ASSERT_EQ(4, dst.channels());
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerBG2BGR);
+
+    cv::Mat dst4(dst);
+    cv::Mat dst3;
+    cv::cvtColor(dst4, dst3, cv::COLOR_BGRA2BGR);
+
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst3(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
+TEST_P(CvtColor, BayerGB2BGR)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2BGR);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerGB2BGR);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
+TEST_P(CvtColor, BayerGB2BGR4)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2BGR, 4);
+
+    ASSERT_EQ(4, dst.channels());
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerGB2BGR);
+
+    cv::Mat dst4(dst);
+    cv::Mat dst3;
+    cv::cvtColor(dst4, dst3, cv::COLOR_BGRA2BGR);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst3(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
+TEST_P(CvtColor, BayerRG2BGR)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2BGR);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerRG2BGR);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
+TEST_P(CvtColor, BayerRG2BGR4)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2BGR, 4);
+
+    ASSERT_EQ(4, dst.channels());
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerRG2BGR);
+
+    cv::Mat dst4(dst);
+    cv::Mat dst3;
+    cv::cvtColor(dst4, dst3, cv::COLOR_BGRA2BGR);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst3(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
+TEST_P(CvtColor, BayerGR2BGR)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2BGR);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerGR2BGR);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
+TEST_P(CvtColor, BayerGR2BGR4)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2BGR, 4);
+
+    ASSERT_EQ(4, dst.channels());
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerGR2BGR);
+
+    cv::Mat dst4(dst);
+    cv::Mat dst3;
+    cv::cvtColor(dst4, dst3, cv::COLOR_BGRA2BGR);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst3(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
+}
+
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
@@ -1791,3 +1946,5 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, SwapChannels, testing::Combine(
     WHOLE_SUBMAT));
 
 } // namespace
+
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_labeling.cpp b/modules/gpu/test/test_labeling.cpp
new file mode 100644
index 0000000000..c88109af19
--- /dev/null
+++ b/modules/gpu/test/test_labeling.cpp
@@ -0,0 +1,202 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+//                          License Agreement
+//               For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+//  * The name of the copyright holders may not be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//M*/
+
+#include "precomp.hpp"
+#include <string>
+#include <iostream>
+
+#ifdef HAVE_CUDA
+
+namespace {
+
+    struct GreedyLabeling
+    {
+        struct dot
+        {
+            int x;
+            int y;
+
+            static dot make(int i, int j)
+            {
+                dot d; d.x = i; d.y = j;
+                return d;
+            }
+        };
+
+        struct InInterval
+        {
+            InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {};
+            const int lo, hi;
+
+            bool operator() (const unsigned char a, const unsigned char b) const
+            {
+                int d = a - b;
+                return lo <= d && d <= hi;
+            }
+        };
+
+        GreedyLabeling(cv::Mat img)
+        : image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {}
+
+        void operator() (cv::Mat labels) const
+        {
+            InInterval inInt(0, 2);
+            dot* stack = new dot[image.cols * image.rows];
+
+            int cc = -1;
+
+            int* dist_labels = (int*)labels.data;
+            int pitch = labels.step1();
+
+            unsigned char* source = (unsigned char*)image.data;
+            int width = image.cols;
+            int height = image.rows;
+
+            for (int j = 0; j < image.rows; ++j)
+                for (int i = 0; i < image.cols; ++i)
+                {
+                    if (dist_labels[j * pitch + i] != -1) continue;
+
+                    dot* top = stack;
+                    dot p = dot::make(i, j);
+                    cc++;
+
+                    dist_labels[j * pitch + i] = cc;
+
+                    while (top >= stack)
+                    {
+                        int*  dl = &dist_labels[p.y * pitch + p.x];
+                        unsigned char* sp = &source[p.y * image.step1() + p.x];
+
+                        dl[0] = cc;
+
+                        //right
+                        if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1]))
+                            *top++ = dot::make(p.x + 1, p.y);
+
+                        //left
+                        if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1]))
+                            *top++ = dot::make(p.x - 1, p.y);
+
+                        //bottom
+                        if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()]))
+                            *top++ = dot::make(p.x, p.y + 1);
+
+                        //top
+                        if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-image.step1()]))
+                            *top++ = dot::make(p.x, p.y - 1);
+
+                        p = *--top;
+                    }
+                }
+            delete[] stack;
+        }
+
+        void checkCorrectness(cv::Mat gpu)
+        {
+            cv::Mat diff = gpu - _labels;
+
+            int outliers = 0;
+            for (int j = 0; j < image.rows; ++j)
+                for (int i = 0; i < image.cols; ++i)
+                {
+                    if ( (_labels.at<int>(j,i) == gpu.at<int>(j,i + 1)) && (diff.at<int>(j, i) != diff.at<int>(j,i + 1)))
+                    {
+                        outliers++;
+                        // std::cout <<  j << " " << i << " " << _labels.at<int>(j,i) << " " << gpu.at<int>(j,i + 1) << " " << diff.at<int>(j, i) << " " << diff.at<int>(j,i + 1) << std::endl;
+                    }
+                }
+            ASSERT_FALSE(outliers);
+        }
+
+        cv::Mat image;
+        cv::Mat _labels;
+    };
+}
+
+struct Labeling : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+
+    cv::Mat loat_image()
+    {
+        return cv::imread(std::string( cvtest::TS::ptr()->get_data_path() ) + "labeling/IMG_0727.JPG");
+    }
+};
+
+TEST_P(Labeling, ConnectedComponents)
+{
+    cv::Mat image;
+    cvtColor(loat_image(), image, CV_BGR2GRAY);
+
+    cv::threshold(image, image, 150, 255, CV_THRESH_BINARY);
+
+    ASSERT_TRUE(image.type() == CV_8UC1);
+
+    GreedyLabeling host(image);
+    host(host._labels);
+
+    cv::gpu::GpuMat mask;
+    mask.create(image.rows, image.cols, CV_8UC1);
+
+    cv::gpu::GpuMat components;
+    components.create(image.rows, image.cols, CV_32SC1);
+
+    cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
+
+    ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
+
+    host.checkCorrectness(cv::Mat(components));
+    cv::imshow("test", image);
+    cv::waitKey(0);
+    cv::imshow("test", host._labels);
+    cv::waitKey(0);
+}
+
+INSTANTIATE_TEST_CASE_P(ConnectedComponents, Labeling, ALL_DEVICES);
+
+#endif
\ No newline at end of file
diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp
index db0d6f2ec8..0ee66ba522 100644
--- a/modules/gpu/test/test_video.cpp
+++ b/modules/gpu/test/test_video.cpp
@@ -624,6 +624,9 @@ TEST_P(MOG2, Update)
 
 TEST_P(MOG2, getBackgroundImage)
 {
+    if (useGray)
+        return;
+
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
@@ -640,13 +643,6 @@ TEST_P(MOG2, getBackgroundImage)
         cap >> frame;
         ASSERT_FALSE(frame.empty());
 
-//        if (useGray)
-//        {
-//            cv::Mat temp;
-//            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-//            cv::swap(temp, frame);
-//        }
-
         mog2(loadMat(frame, useRoi), foreground);
 
         mog2_gold(frame, foreground_gold);
@@ -667,6 +663,101 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
     testing::Values(UseGray(true), UseGray(false)),
     WHOLE_SUBMAT));
 
+//////////////////////////////////////////////////////
+// VIBE
+
+PARAM_TEST_CASE(VIBE, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+{
+};
+
+TEST_P(VIBE, Accuracy)
+{
+    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::Size size = GET_PARAM(1);
+    const int type = GET_PARAM(2);
+    const bool useRoi = GET_PARAM(3);
+
+    const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
+
+    cv::Mat frame = randomMat(size, type, 0.0, 100);
+    cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
+
+    cv::gpu::VIBE_GPU vibe;
+    cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
+    vibe.initialize(d_frame);
+
+    for (int i = 0; i < 20; ++i)
+        vibe(d_frame, d_fgmask);
+
+    frame = randomMat(size, type, 160, 255);
+    d_frame = loadMat(frame, useRoi);
+    vibe(d_frame, d_fgmask);
+
+    // now fgmask should be entirely foreground
+    ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////
+// GMG
+
+PARAM_TEST_CASE(GMG, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+{
+};
+
+TEST_P(GMG, Accuracy)
+{
+    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::Size size = GET_PARAM(1);
+    const int depth = GET_PARAM(2);
+    const int channels = GET_PARAM(3);
+    const bool useRoi = GET_PARAM(4);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    const cv::Mat zeros(size, CV_8UC1, cv::Scalar::all(0));
+    const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
+
+    cv::Mat frame = randomMat(size, type, 0, 100);
+    cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
+
+    cv::gpu::GMG_GPU gmg;
+    gmg.numInitializationFrames = 5;
+    gmg.smoothingRadius = 0;
+    gmg.initialize(d_frame.size(), 0, 255);
+
+    cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
+
+    for (int i = 0; i < gmg.numInitializationFrames; ++i)
+    {
+        gmg(d_frame, d_fgmask);
+
+        // fgmask should be entirely background during training
+        ASSERT_MAT_NEAR(zeros, d_fgmask, 0);
+    }
+
+    frame = randomMat(size, type, 160, 255);
+    d_frame = loadMat(frame, useRoi);
+    gmg(d_frame, d_fgmask);
+
+    // now fgmask should be entirely foreground
+    ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Video, GMG, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8U), MatType(CV_16U), MatType(CV_32F)),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    WHOLE_SUBMAT));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/modules/highgui/doc/reading_and_writing_images_and_video.rst b/modules/highgui/doc/reading_and_writing_images_and_video.rst
index 3e4acf3af0..f694ccd869 100644
--- a/modules/highgui/doc/reading_and_writing_images_and_video.rst
+++ b/modules/highgui/doc/reading_and_writing_images_and_video.rst
@@ -294,7 +294,7 @@ The methods/functions grab the next frame from video file or camera and return t
 
 The primary use of the function is in multi-camera environments, especially when the cameras do not have hardware synchronization. That is, you call ``VideoCapture::grab()`` for each camera and after that call the slower method ``VideoCapture::retrieve()`` to decode and get frame from each camera. This way the overhead on demosaicing or motion jpeg decompression etc. is eliminated and the retrieved frames from different cameras will be closer in time.
 
-Also, when a connected camera is multi-head (for example, a stereo camera or a Kinect device), the correct way of retrieving data from it is to call `VideoCapture::grab` first and then call :ocv:func:`VideoCapture::retrieve` one or more times with different values of the ``channel`` parameter. See http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/kinect_maps.cpp
+Also, when a connected camera is multi-head (for example, a stereo camera or a Kinect device), the correct way of retrieving data from it is to call `VideoCapture::grab` first and then call :ocv:func:`VideoCapture::retrieve` one or more times with different values of the ``channel`` parameter. See http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/kinect_maps.cpp
 
 
 VideoCapture::retrieve
diff --git a/modules/highgui/doc/user_interface.rst b/modules/highgui/doc/user_interface.rst
index 7b39a193c1..def8451a2c 100644
--- a/modules/highgui/doc/user_interface.rst
+++ b/modules/highgui/doc/user_interface.rst
@@ -203,7 +203,7 @@ Sets mouse handler for the specified window
 
     :param winname: Window name
 
-    :param onMouse: Mouse callback. See OpenCV samples, such as  http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/ffilldemo.cpp, on how to specify and use the callback.
+    :param onMouse: Mouse callback. See OpenCV samples, such as  http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/ffilldemo.cpp, on how to specify and use the callback.
 
     :param userdata: The optional parameter passed to the callback.
 
diff --git a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
index 6b4eda0b74..3019063b1a 100644
--- a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
+++ b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
@@ -202,7 +202,7 @@ Approximates a polygonal curve(s) with the specified precision.
 The functions ``approxPolyDP`` approximate a curve or a polygon with another curve/polygon with less vertices so that the distance between them is less or equal to the specified precision. It uses the Douglas-Peucker algorithm
 http://en.wikipedia.org/wiki/Ramer-Douglas-Peucker_algorithm
 
-See http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/contours.cpp for the function usage model.
+See http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/contours.cpp for the function usage model.
 
 
 ApproxChains
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index 2165673c84..91a004a866 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -1298,17 +1298,17 @@ public:
         maxk(_maxk), space_ofs(_space_ofs), space_weight(_space_weight), color_weight(_color_weight)
     {
     }
-    
+
     virtual void operator() (const Range& range) const
     {
         int i, j, cn = dest->channels(), k;
         Size size = dest->size();
-        
+
         for( i = range.start; i < range.end; i++ )
         {
             const uchar* sptr = temp->ptr(i+radius) + radius*cn;
             uchar* dptr = dest->ptr(i);
-            
+
             if( cn == 1 )
             {
                 for( j = 0; j < size.width; j++ )
@@ -1351,10 +1351,10 @@ public:
             }
         }
     }
-    
+
 private:
-    const Mat *temp;
     Mat *dest;
+    const Mat *temp;
     int radius, maxk, *space_ofs;
     float *space_weight, *color_weight;
 };
@@ -1367,40 +1367,40 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
     int cn = src.channels();
     int i, j, maxk, radius;
     Size size = src.size();
-    
+
     CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
               src.type() == dst.type() && src.size() == dst.size() &&
               src.data != dst.data );
-    
+
     if( sigma_color <= 0 )
         sigma_color = 1;
     if( sigma_space <= 0 )
         sigma_space = 1;
-    
+
     double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
     double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
-    
+
     if( d <= 0 )
         radius = cvRound(sigma_space*1.5);
     else
         radius = d/2;
     radius = MAX(radius, 1);
     d = radius*2 + 1;
-    
+
     Mat temp;
     copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
-    
+
     vector<float> _color_weight(cn*256);
     vector<float> _space_weight(d*d);
     vector<int> _space_ofs(d*d);
     float* color_weight = &_color_weight[0];
     float* space_weight = &_space_weight[0];
     int* space_ofs = &_space_ofs[0];
-    
+
     // initialize color-related bilateral filter coefficients
     for( i = 0; i < 256*cn; i++ )
         color_weight[i] = (float)std::exp(i*i*gauss_color_coeff);
-    
+
     // initialize space-related bilateral filter coefficients
     for( i = -radius, maxk = 0; i <= radius; i++ )
         for( j = -radius; j <= radius; j++ )
@@ -1411,7 +1411,7 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
             space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff);
             space_ofs[maxk++] = (int)(i*temp.step + j*cn);
         }
-    
+
     BilateralFilter_8u_Invoker body(dst, temp, radius, maxk, space_ofs, space_weight, color_weight);
     parallel_for_(Range(0, size.height), body);
 }
diff --git a/modules/objdetect/doc/cascade_classification.rst b/modules/objdetect/doc/cascade_classification.rst
index 6b3a3570a7..f86a985615 100644
--- a/modules/objdetect/doc/cascade_classification.rst
+++ b/modules/objdetect/doc/cascade_classification.rst
@@ -21,7 +21,7 @@ The word "cascade" in the classifier name means that the resultant classifier co
 The feature used in a particular classifier is specified by its shape (1a, 2b etc.), position within the region of interest and the scale (this scale is not the same as the scale used at the detection stage, though these two scales are multiplied). For example, in the case of the third line feature (2c) the response is calculated as the difference between the sum of image pixels under the rectangle covering the whole feature (including the two white stripes and the black stripe in the middle) and the sum of the image pixels under the black stripe multiplied by 3 in order to compensate for the differences in the size of areas. The sums of pixel values over a rectangular regions are calculated rapidly using integral images (see below and the :ocv:func:`integral` description).
 
 To see the object detector at work, have a look at the facedetect demo:
-http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/facedetect.cpp
+http://code.opencv.org/projects/opencv/repository/revisions/master/entry/samples/cpp/facedetect.cpp
 
 The following reference is for the detection part only. There is a separate application called  ``opencv_traincascade`` that can train a cascade of boosted classifiers from a set of samples.
 
diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp
index 7e6a346737..ec208c1ba6 100644
--- a/modules/objdetect/src/cascadedetect.hpp
+++ b/modules/objdetect/src/cascadedetect.hpp
@@ -444,7 +444,7 @@ inline int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvalu
     CascadeClassifier::Data::Stage* cascadeStages = &cascade.data.stages[0];
 
 #ifdef HAVE_TEGRA_OPTIMIZATION
-    float tmp; // float accumulator -- float operations are quicker
+    float tmp = 0; // float accumulator -- float operations are quicker
 #endif
     for( int si = 0; si < nstages; si++ )
     {
diff --git a/modules/ocl/CMakeLists.txt b/modules/ocl/CMakeLists.txt
index b331e9f1e6..a6496aef25 100644
--- a/modules/ocl/CMakeLists.txt
+++ b/modules/ocl/CMakeLists.txt
@@ -29,6 +29,14 @@ if (HAVE_OPENCL)
   if(OPENCL_INCLUDE_DIR)
       ocv_include_directories(${OPENCL_INCLUDE_DIR})
   endif()
+  if (HAVE_CLAMDFFT)
+	set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES})
+	ocv_include_directories(${CLAMDFFT_INCLUDE_DIR})
+  endif()
+  if (HAVE_CLAMDBLAS)
+	set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES})
+	ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR})
+  endif()
 endif()
 
 ocv_set_module_sources(
@@ -56,8 +64,8 @@ ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
 ################################################################################################################
 ################################   OpenCL Module Performance  ##################################################
 ################################################################################################################
-#file(GLOB perf_srcs "perf/*.cpp")
-#file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
+file(GLOB perf_srcs "perf/*.cpp")
+file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
 
-#ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
-#                       FILES "Src" ${perf_srcs})
+ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
+                       FILES "Src" ${perf_srcs})
diff --git a/modules/ocl/doc/ocl.rst b/modules/ocl/doc/ocl.rst
index 37926732b2..3f3d8bccf2 100644
--- a/modules/ocl/doc/ocl.rst
+++ b/modules/ocl/doc/ocl.rst
@@ -1,19 +1,20 @@
-************************************
+***************************************
 ocl. OpenCL-accelerated Computer Vision
-************************************
+***************************************
 
 .. toctree::
     :maxdepth: 1
 
     introduction
-    initalization_and_information
-    data_structures
-    operations_on_matrices
-    per_element_operations
-    image_processing
-    matrix_reductions
-    object_detection
-    feature_detection_and_description
-    image_filtering
-    camera_calibration_and_3d_reconstruction
-    video
+    structures_and_functions
+..    initalization_and_information
+..    data_structures
+..    operations_on_matrices
+..    per_element_operations
+..    image_processing
+..    matrix_reductions
+..    object_detection
+..    feature_detection_and_description
+..    image_filtering
+..    camera_calibration_and_3d_reconstruction
+..    video
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index 0efc72283a..c5075d811a 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -12,6 +12,7 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -858,7 +859,220 @@ namespace cv
         void benchmark_copy_vectorize(const oclMat &src, oclMat &dst);
         void benchmark_copy_offset_stride(const oclMat &src, oclMat &dst);
         void benchmark_ILP();
-        
+
+		//! computes vertical sum, supports only CV_32FC1 images
+		CV_EXPORTS void columnSum(const oclMat& src, oclMat& sum);
+
+		//! performs linear blending of two images
+		//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+		// supports only CV_8UC1 source type
+		CV_EXPORTS void blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, oclMat& result);
+
+		/////////////////////////////// Pyramid /////////////////////////////////////
+		CV_EXPORTS void pyrDown(const oclMat& src, oclMat& dst);
+
+		//! upsamples the source image and then smoothes it
+		CV_EXPORTS void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst);
+
+		///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+		struct CV_EXPORTS MatchTemplateBuf
+		{
+			Size user_block_size;
+			oclMat imagef, templf;
+			std::vector<oclMat> images;
+			std::vector<oclMat> image_sums;
+			std::vector<oclMat> image_sqsums;
+		};
+
+
+		//! computes the proximity map for the raster template and the image where the template is searched for
+		// Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+		// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+		CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method);
+		
+		//! computes the proximity map for the raster template and the image where the template is searched for
+		// Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+		// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+		CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);
+
+#ifdef HAVE_CLAMDFFT
+            ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+            // the two functions must be called before/after run any fft library functions.
+            CV_EXPORTS void fft_setup();    // this will be implicitly invoked
+            CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually
+
+		    /////////////////////////////////////// DFT /////////////////////////////////////////////////////
+		    //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+		    //! Param dft_size is the size of DFT transform.
+		    //!
+		    //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+		    // support src type of CV32FC1, CV32FC2
+		    // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+		    // dft_size is the size of original input, which is used for transformation from complex to real.
+		    // dft_size must be powers of 2, 3 and 5
+		    // real to complex dft requires at least v1.8 clAmdFft
+		    // real to complex dft output is not the same with cpu version
+		    // real to complex and complex to real does not support DFT_ROWS
+		    CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0);
+#endif // HAVE_CLAMDFFT
+
+#ifdef HAVE_CLAMDBLAS
+		//! implements generalized matrix product algorithm GEMM from BLAS
+		// The functionality requires clAmdBlas library
+		// only support type CV_32FC1
+		// flag GEMM_3_T is not supported
+		CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha,
+		const oclMat& src3, double beta, oclMat& dst, int flags = 0);
+#endif
+
+        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+        struct CV_EXPORTS HOGDescriptor
+        {
+            enum { DEFAULT_WIN_SIGMA = -1 };
+            enum { DEFAULT_NLEVELS = 64 };
+            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+
+            HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
+                          Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
+                          int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
+                          double threshold_L2hys=0.2, bool gamma_correction=true,
+                          int nlevels=DEFAULT_NLEVELS);
+
+            size_t getDescriptorSize() const;
+            size_t getBlockHistogramSize() const;
+
+            void setSVMDetector(const vector<float>& detector);
+
+            static vector<float> getDefaultPeopleDetector();
+            static vector<float> getPeopleDetector48x96();
+            static vector<float> getPeopleDetector64x128();
+
+            void detect(const oclMat& img, vector<Point>& found_locations,
+                        double hit_threshold=0, Size win_stride=Size(),
+                        Size padding=Size());
+
+            void detectMultiScale(const oclMat& img, vector<Rect>& found_locations,
+                                  double hit_threshold=0, Size win_stride=Size(),
+                                  Size padding=Size(), double scale0=1.05,
+                                  int group_threshold=2);
+
+            void getDescriptors(const oclMat& img, Size win_stride,
+                                oclMat& descriptors,
+                                int descr_format=DESCR_FORMAT_COL_BY_COL);
+
+            Size win_size;
+            Size block_size;
+            Size block_stride;
+            Size cell_size;
+            int nbins;
+            double win_sigma;
+            double threshold_L2hys;
+            bool gamma_correction;
+            int nlevels;
+
+        protected:
+            void computeBlockHistograms(const oclMat& img);
+            void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);
+
+            double getWinSigma() const;
+            bool checkDetectorSize() const;
+
+            static int numPartsWithin(int size, int part_size, int stride);
+            static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+            // Coefficients of the separating plane
+            float free_coef;
+            oclMat detector;
+
+            // Results of the last classification step
+            oclMat labels;
+            Mat labels_host;
+
+            // Results of the last histogram evaluation step
+            oclMat block_hists;
+
+            // Gradients conputation results
+            oclMat grad, qangle;
+
+            std::vector<oclMat> image_scales;
+        };
+
+        //! Speeded up robust features, port from GPU module.
+        ////////////////////////////////// SURF //////////////////////////////////////////
+        class CV_EXPORTS SURF_OCL
+        {
+        public:
+            enum KeypointLayout
+            {
+                X_ROW = 0,
+                Y_ROW,
+                LAPLACIAN_ROW,
+                OCTAVE_ROW,
+                SIZE_ROW,
+                ANGLE_ROW,
+                HESSIAN_ROW,
+                ROWS_COUNT
+            };
+
+            //! the default constructor
+            SURF_OCL();
+            //! the full constructor taking all the necessary parameters
+            explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4,
+                int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
+
+            //! returns the descriptor size in float's (64 or 128)
+            int descriptorSize() const;
+            
+            //! upload host keypoints to device memory
+            void uploadKeypoints(const vector<cv::KeyPoint>& keypoints, oclMat& keypointsocl);
+            //! download keypoints from device to host memory
+            void downloadKeypoints(const oclMat& keypointsocl, vector<KeyPoint>& keypoints);
+
+            //! download descriptors from device to host memory
+            void downloadDescriptors(const oclMat& descriptorsocl, vector<float>& descriptors);
+
+            //! finds the keypoints using fast hessian detector used in SURF
+            //! supports CV_8UC1 images
+            //! keypoints will have nFeature cols and 6 rows
+            //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
+            //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
+            //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
+            //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
+            //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
+            //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
+            //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
+            void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints);
+            //! finds the keypoints and computes their descriptors.
+            //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
+            void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,
+                bool useProvidedKeypoints = false);
+
+            void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints);
+            void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors,
+                bool useProvidedKeypoints = false);
+
+            void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
+                bool useProvidedKeypoints = false);
+
+            void releaseMemory();
+
+            // SURF parameters
+            float hessianThreshold;
+            int nOctaves;
+            int nOctaveLayers;
+            bool extended;
+            bool upright;
+
+            //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
+            float keypointsRatio;
+
+            oclMat sum, mask1, maskSum, intBuffer;
+
+            oclMat det, trace;
+
+            oclMat maxPosBuffer;
+
+        };
     }
 }
 #include "opencv2/ocl/matrix_operations.hpp"
diff --git a/modules/ocl/perf/interpolation.hpp b/modules/ocl/perf/interpolation.hpp
new file mode 100644
index 0000000000..d9180048e6
--- /dev/null
+++ b/modules/ocl/perf/interpolation.hpp
@@ -0,0 +1,120 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
+#define __OPENCV_TEST_INTERPOLATION_HPP__
+
+template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+{
+    if (border_type == cv::BORDER_CONSTANT)
+        return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
+
+    return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c);
+}
+
+template <typename T> struct NearestInterpolator
+{
+    static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+    {
+        return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
+    }
+};
+
+template <typename T> struct LinearInterpolator
+{
+    static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+    {
+        x -= 0.5f;
+        y -= 0.5f;
+
+        int x1 = cvFloor(x);
+        int y1 = cvFloor(y);
+        int x2 = x1 + 1;
+        int y2 = y1 + 1;
+
+        float res = 0;
+
+        res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y));
+        res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y));
+        res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1));
+        res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1));
+
+        return cv::saturate_cast<T>(res);
+    }
+};
+
+template <typename T> struct CubicInterpolator
+{
+    static float getValue(float p[4], float x)
+    {
+        return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0])));
+    }
+
+    static float getValue(float p[4][4], float x, float y)
+    {
+        float arr[4];
+
+        arr[0] = getValue(p[0], x);
+        arr[1] = getValue(p[1], x);
+        arr[2] = getValue(p[2], x);
+        arr[3] = getValue(p[3], x);
+
+        return getValue(arr, y);
+    }
+
+    static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+    {
+        int ix = cvRound(x);
+        int iy = cvRound(y);
+
+        float vals[4][4] =
+        {
+            {readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)},
+            {readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)},
+            {readVal<T>(src, iy    , ix - 2, c, border_type, borderVal), readVal<T>(src, iy    , ix - 1, c, border_type, borderVal), readVal<T>(src, iy    , ix, c, border_type, borderVal), readVal<T>(src, iy    , ix + 1, c, border_type, borderVal)},
+            {readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)},
+        };
+
+        return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0));
+    }
+};
+
+#endif // __OPENCV_TEST_INTERPOLATION_HPP__
diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp
new file mode 100644
index 0000000000..0d9d96791a
--- /dev/null
+++ b/modules/ocl/perf/main.cpp
@@ -0,0 +1,108 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+
+void print_info()
+{    
+    printf("\n");
+#if defined _WIN32
+#   if defined _WIN64
+        puts("OS: Windows 64");
+#   else
+        puts("OS: Windows 32");
+#   endif
+#elif defined linux
+#   if defined _LP64
+        puts("OS: Linux 64");
+#   else
+        puts("OS: Linux 32");
+#   endif
+#elif defined __APPLE__
+#   if defined _LP64
+        puts("OS: Apple 64");
+#   else
+        puts("OS: Apple 32");
+#   endif
+#endif
+
+}
+
+#if PERF_TEST_OCL
+int main(int argc, char** argv)
+{
+	
+	static std::vector<Info> ocl_info;
+	ocl::getDevice(ocl_info);
+
+    run_perf_test();
+    return 0;
+}
+#else
+int main(int argc, char** argv)
+{
+    TS::ptr()->init("ocl");
+    InitGoogleTest(&argc, argv);
+
+    print_info();
+
+    return RUN_ALL_TESTS();
+}
+#endif // PERF_TEST_OCL
+
+#else // HAVE_OPENC
+
+int main()
+{
+    printf("OpenCV was built without OpenCL support\n");
+    return 0;
+}
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/perf_test_ocl.cpp b/modules/ocl/perf/perf_test_ocl.cpp
new file mode 100644
index 0000000000..67f20a33db
--- /dev/null
+++ b/modules/ocl/perf/perf_test_ocl.cpp
@@ -0,0 +1,1191 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicore Ware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <ctime>
+
+#if PERF_TEST_OCL
+
+#ifdef HAVE_OPENCL
+
+#define SHOW_CPU false
+#define REPEAT   1000
+#define COUNT_U  0 // count the uploading execution time for ocl mat structures
+#define COUNT_D  0
+
+
+// the following macro section tests the target function (kernel) performance
+// upload is the code snippet for converting cv::mat to cv::ocl::oclMat
+// downloading is the code snippet for converting cv::ocl::oclMat back to cv::mat
+// change COUNT_U and COUNT_D to take downloading and uploading time into account
+#define P_TEST_FULL( upload, kernel_call, download ) \
+{ \
+    std::cout<< "\n" #kernel_call "\n----------------------"; \
+    {upload;} \
+    R_TEST( kernel_call, 15 ); \
+    double t = (double)cvGetTickCount(); \
+    R_T( { \
+            if( COUNT_U ) {upload;} \
+            kernel_call; \
+            if( COUNT_D ) {download;} \
+            } ); \
+    t = (double)cvGetTickCount() - t; \
+    std::cout << "runtime is  " << t/((double)cvGetTickFrequency()* 1000.) << "ms" << std::endl; \
+}
+
+
+#define R_T2( test ) \
+{ \
+    std::cout<< "\n" #test "\n----------------------"; \
+    R_TEST( test, 15 ) \
+    clock_t st = clock(); \
+    R_T( test ) \
+    std::cout<< clock() - st << "ms\n"; \
+}
+#define R_T( test ) \
+    R_TEST( test, REPEAT )
+#define R_TEST( test, repeat ) \
+    try{ \
+        for( int i = 0; i < repeat; i ++ ) { test; } \
+    } catch( ... ) { std::cout << "||||| Exception catched! |||||\n"; return; }
+
+#define FILTER_TEST_IMAGE "C:/Windows/Web/Wallpaper/Landscapes/img9.jpg"
+#define WARN_NRUN( name ) \
+    std::cout << "Warning: " #name " is not runnable!\n";
+
+
+void print_info();
+
+// performance base class
+struct PerfTest
+{
+    virtual void Run()   = 0;
+    protected:
+    virtual void SetUp() = 0;
+};
+///////////////////////////////////////
+// Arithm
+struct ArithmTestP : PerfTest
+{
+    int type;
+    cv::Scalar val;
+
+    cv::Size size;
+    cv::Mat mat1, mat2;
+    cv::Mat mask;
+    cv::Mat dst;
+    cv::ocl::oclMat oclRes, oclmat1, oclmat2;
+    cv::ocl::oclMat oclmask;
+    std::vector<cv::Mat> dstv;
+    protected:
+    ArithmTestP() : type( CV_8UC4 ) {}
+    virtual void SetUp()
+    {
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size( 3000, 3000 ); // big input image
+        mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        mat2 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+        cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+        val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+        oclmat2 = cv::ocl::oclMat(mat2);
+        oclmask = cv::ocl::oclMat(mask);
+    }
+};
+
+struct AddArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::add(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct SubtractArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::subtract(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct MultiplyArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        clock_t start = clock();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::multiply(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );		
+    }
+};
+
+struct DivideArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::divide(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct ExpP : ArithmTestP
+{
+    void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::exp(oclmat1, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct LogP : ArithmTestP
+{
+    void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::log(oclmat1, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct CompareP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::compare(oclmat1, oclmat2, oclRes, cv::CMP_EQ),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct FlipP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::flip(oclmat1, oclRes, 0),
+                oclRes.download(dst);
+                );
+    }
+    protected:
+    virtual void SetUp()
+    {
+        type = CV_8UC4;
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(3000, 3000);
+        mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        oclmat1 = cv::ocl::oclMat(mat1);
+    }
+};
+
+struct MagnitudeP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32F;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::magnitude(oclmat1, oclmat1, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct LUTP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);ocllut  = cv::ocl::oclMat(lut),
+                cv::ocl::LUT(oclmat1, ocllut, oclRes),
+                oclRes.download(dst);
+                );
+    }
+    protected:
+    cv::Mat lut;
+    cv::ocl::oclMat ocllut;
+    virtual void SetUp()
+    {
+        type = CV_8UC1;
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(3000, 3000);
+        mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        lut = cvtest::randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false);
+        oclmat1 = cv::ocl::oclMat(mat1);
+        ocllut  = cv::ocl::oclMat(lut);
+    }
+};
+
+struct MinMaxP : ArithmTestP
+{
+    double minVal_gold, minVal;
+    double maxVal_gold, maxVal;
+
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::minMax(oclmat1, &minVal, &maxVal, oclmat2),
+                {};
+                );
+    }
+
+    protected:
+    virtual void SetUp()
+    {
+        type = CV_64F;
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(3000, 3000);
+
+        mat1 = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+        oclmat2 = cv::ocl::oclMat(mat2);
+    }
+};
+
+struct MinMaxLocP : MinMaxP
+{
+    cv::Point minLoc_gold;
+    cv::Point maxLoc_gold;
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::minMaxLoc(oclmat1, &minVal, &maxVal, &minLoc_gold, &maxLoc_gold, oclmat2),
+                {}
+                );
+    }
+};
+
+struct CountNonZeroP : ArithmTestP
+{
+    int n;
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                n = cv::ocl::countNonZero(oclmat1),
+                {}
+                );
+    }
+    protected:
+    virtual void SetUp()
+    {
+        type = 6;
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size( 3000, 3000 );
+
+        cv::Mat matBase = cvtest::randomMat(rng, size, CV_8U, 0.0, 1.0, false);
+        matBase.convertTo(mat1, type);
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+    }
+};
+
+struct SumP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        cv::Scalar n;
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                n = cv::ocl::sum(oclmat1),
+                {}
+                );
+    }
+};
+
+struct BitwiseP : ArithmTestP
+{
+    protected:
+        virtual void SetUp()
+        {
+            type = CV_8UC4;
+
+            cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+            size = cv::Size( 3000, 3000 );
+
+            mat1.create(size, type);
+            mat2.create(size, type);
+
+            for (int i = 0; i < mat1.rows; ++i)
+            {
+                cv::Mat row1(1, static_cast<int>(mat1.cols * mat1.elemSize()), CV_8U, (void*)mat1.ptr(i));
+                rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+
+                cv::Mat row2(1, static_cast<int>(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i));
+                rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+            }
+            oclmat1 = cv::ocl::oclMat(mat1);
+            oclmat2 = cv::ocl::oclMat(mat2);
+        }
+};
+
+struct BitwiseNotP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_not(oclmat1, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct BitwiseAndP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::bitwise_and(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_and(oclmat1, val, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct BitwiseXorP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::bitwise_xor(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_xor(oclmat1, val, oclRes),
+                oclRes.download(dst)
+                );
+
+    }
+};
+
+struct BitwiseOrP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::bitwise_or(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_or(oclmat1, val, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct TransposeP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::transpose(oclmat1, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct AbsdiffArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::absdiff(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct PhaseP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32F;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::phase(oclmat1,oclmat2,oclRes,1),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct CartToPolarP : ArithmTestP
+{
+    cv::ocl::oclMat oclRes1;
+    virtual void Run()
+    {
+        type = CV_64FC4;
+        SetUp();
+        clock_t start = clock();
+        R_TEST(
+                cv::ocl::cartToPolar(oclmat1,oclmat2,oclRes, oclRes1, 1);
+                if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
+                , 5);
+        std::cout<< "ocl::CartToPolar -- " << clock() - start << "ms\n";
+    }
+};
+
+struct PolarToCartP : ArithmTestP
+{
+    cv::ocl::oclMat oclRes1;
+    virtual void Run()
+    {
+        type = CV_64FC4;
+        SetUp();
+        clock_t start = clock();
+        R_TEST(
+                cv::ocl::polarToCart(oclmat1,oclmat2,oclRes, oclRes1, 1);
+                if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
+                , 2);
+        std::cout<< "ocl::polarToCart -- " << clock() - start << "ms\n";
+    }
+};
+
+///////////////////////////////////////
+// split & merge
+struct SplitP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::split(oclmat1, dev_dst),
+                {			
+                dstv.resize(dev_dst.size());
+                for (size_t i = 0; i < dev_dst.size(); ++i)
+                {
+                dev_dst[i].download(dstv[i]);
+                }
+                }
+                );
+    }
+    protected:
+    std::vector<cv::ocl::oclMat> dev_dst;
+    virtual void SetUp()
+    {
+        size = cv::Size( 3000, 3000 );
+
+        mat1.create(size, type);
+        mat1.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+    }
+};
+
+struct MergeP : SplitP
+{
+    virtual void Run()
+    {
+        SetUp();
+        cv::ocl::split(oclmat1, dev_dst);
+        cv::split(mat1, dstv);
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::merge(dev_dst, oclmat2),
+                oclmat2.download(dst)
+                );
+    }
+};
+
+struct SetToP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
+        P_TEST_FULL(
+                oclmat2 = cv::ocl::oclMat(mat2),
+                oclmat1.setTo( s, oclmat2 ),
+                oclmat1.download(dst);
+                );
+    }
+    protected:
+    virtual void SetUp()
+    {
+        type = CV_32FC4;
+        size = cv::Size(3000, 3000);
+
+        mat1.create(size, type);
+        oclmat1.create(size, type);
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0.0, 1.5, false);
+        oclmat2 = cv::ocl::oclMat(mat2);
+    }
+};
+
+struct CopyToP : SetToP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                oclmat1.copyTo( oclRes, oclmat2 ),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct ConvertToP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32FC1;;
+        SetUp();
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        const double a = rng.uniform(0.0, 1.0);
+        const double b = rng.uniform(-10.0, 10.0);
+
+        int type2 = CV_32FC4;
+
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                oclmat1.convertTo( oclRes, type2 /*, a, b */ ), // fails when scaling factors a and b are specified
+                oclRes.download(dst)
+                );
+    }
+};
+
+////////////////////////////////////////////
+// Filters
+
+struct FilterTestP : PerfTest
+{
+    protected:
+        int ksize;
+        int dx, dy;
+
+        cv::Mat img_rgba;
+        cv::Mat img_gray;
+
+        cv::ocl::oclMat ocl_img_rgba;
+        cv::ocl::oclMat ocl_img_gray;
+
+        cv::ocl::oclMat dev_dst_rgba;
+        cv::ocl::oclMat dev_dst_gray;
+
+        cv::Mat dst_rgba;
+        cv::Mat dst_gray;
+
+        cv::Mat kernel;
+
+        int bordertype;
+
+        virtual void SetUp()
+        {
+            bordertype = (int)cv::BORDER_DEFAULT;
+            ksize = 7;
+            dx = ksize/2; dy = ksize/2;
+
+            kernel = cv::Mat::ones(ksize, ksize, CV_8U);
+
+            cv::Mat img = readImage(FILTER_TEST_IMAGE);
+            ASSERT_FALSE(img.empty());
+
+            cv::cvtColor(img, img_rgba, CV_BGR2BGRA);
+            cv::cvtColor(img, img_gray, CV_BGR2GRAY);
+
+            ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+            ocl_img_gray = cv::ocl::oclMat(img_gray);
+        }
+};
+
+struct BlurP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::blur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
+                cv::ocl::blur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct SobelP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::Sobel(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, ksize, 1, 0, bordertype);
+                cv::ocl::Sobel(ocl_img_gray, dev_dst_gray, -1, dx, dy, ksize, 1, 0, bordertype);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct ScharrP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        dx = 0; dy = 1;
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::Scharr(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, 1, 0, bordertype);
+                cv::ocl::Scharr(ocl_img_gray, dev_dst_gray, -1, dx, dy, 1, 0, bordertype);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct GaussianBlurP : FilterTestP
+{
+    virtual void Run()
+    {
+        double sigma1 = 3, sigma2 = 3;
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::GaussianBlur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), sigma1, sigma2);
+                cv::ocl::GaussianBlur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), sigma1, sigma2);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct DilateP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::dilate(ocl_img_rgba, dev_dst_rgba, kernel);
+                cv::ocl::dilate(ocl_img_gray, dev_dst_gray, kernel);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct ErodeP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::erode(ocl_img_rgba, dev_dst_rgba, kernel);
+                cv::ocl::erode(ocl_img_gray, dev_dst_gray, kernel);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct MorphExP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        cv::ocl::oclMat okernel;
+        P_TEST_FULL(
+                {
+                okernel      = cv::ocl::oclMat(kernel);
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::morphologyEx(ocl_img_rgba, dev_dst_rgba, 3, okernel);
+                cv::ocl::morphologyEx(ocl_img_gray, dev_dst_gray, 3, okernel);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct LaplacianP : FilterTestP
+{
+    void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::Laplacian(ocl_img_rgba, dev_dst_rgba, -1, 3 );
+                cv::ocl::Laplacian(ocl_img_gray, dev_dst_gray, -1, 3 );
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+////////////////////
+// histograms
+struct CalcHistP : PerfTest
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( src ),
+                cv::ocl::calcHist(oclmat, oclRes),
+                oclRes.download(hist)
+                );
+    }
+    protected:
+    cv::Size size;
+    cv::Mat src, hist;
+
+    cv::ocl::oclMat oclmat;
+    cv::ocl::oclMat oclRes;
+
+    virtual void SetUp()
+    {
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(3000, 3000);
+        src = cvtest::randomMat(rng, size, CV_8UC1, 0, 255, false);
+        oclmat = cv::ocl::oclMat( src );
+    }
+};
+
+struct EqualizeHistP : CalcHistP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( src ),
+                cv::ocl::equalizeHist(oclmat, oclRes),
+                oclRes.download(hist)
+                );
+    }
+};
+
+struct ThresholdP : CalcHistP
+{
+    virtual void Run()
+    {
+        SetUp();
+        int threshOp = (int)cv::THRESH_TOZERO_INV;;
+        double maxVal = 200;
+        double thresh = 125;
+
+        clock_t start = clock();
+
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( src ),
+                cv::ocl::threshold(oclmat, oclRes, thresh, maxVal, threshOp ),
+                oclRes.download(hist)
+                );
+    }
+};
+
+struct ResizeP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat( mat1 ),
+                cv::ocl::resize(oclmat1, oclRes, cv::Size(), 2.0, 2.0),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct CvtColorP : PerfTest
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( img ),
+                cv::ocl::cvtColor(oclmat, ocldst, cvtcode),
+                ocldst.download(dst)
+                );
+    }
+    protected:
+    int type;
+    int cvtcode;
+
+    cv::Mat img, dst;
+    cv::ocl::oclMat oclmat, ocldst;
+    virtual void SetUp()
+    {
+        type = CV_8U;
+        cvtcode = CV_BGR2GRAY;
+        cv::Mat imgBase = readImage(FILTER_TEST_IMAGE);
+        ASSERT_FALSE(imgBase.empty());
+
+        imgBase.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+        oclmat = cv::ocl::oclMat( img );
+    };
+};
+
+
+struct WarpAffineP : ArithmTestP
+{
+    void Run()
+    {
+        SetUp();
+        const double aplha = CV_PI / 4;
+        double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
+            {std::sin(aplha),  std::cos(aplha), 0}};
+        cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat( mat1 ),
+                cv::ocl::warpAffine( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct WarpPerspectiveP : ArithmTestP
+{
+    void Run()
+    {
+        SetUp();
+        const double aplha = CV_PI / 4;
+        double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
+            {std::sin(aplha),  std::cos(aplha), 0},
+            {0.0,              0.0,             1.0}};
+        cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat( mat1 ),
+                cv::ocl::warpPerspective( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
+                oclRes.download(dst)
+                );
+    }
+};
+
+
+struct CornerHarrisP : FilterTestP
+{
+    void Run()
+    {
+        SetUp();
+        bordertype = 2;
+        P_TEST_FULL(
+                {
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::cornerHarris(ocl_img_gray, dev_dst_gray, 3, ksize, 0.5, bordertype );
+                },
+                {
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+void test()
+{
+    clock_t start = clock();
+    std::cout << ">>>>>>>> Performance test started <<<<<<<<\n";
+    /*
+       {
+       AddArrayP AddArrayP;
+       AddArrayP.Run();
+       SubtractArrayP subarray;
+       subarray.Run();
+       MultiplyArrayP MultiplyArrayP;
+       MultiplyArrayP.Run();
+       DivideArrayP DivideArrayP;
+       DivideArrayP.Run();
+       }
+       std::cout.flush();
+       {
+       CompareP comp;
+       comp.Run();
+       MagnitudeP magnitude;
+       magnitude.Run();
+       LUTP lut;
+       lut.Run();
+       FlipP FlipP;
+       FlipP.Run();
+       MinMaxP minmax;
+       minmax.Run();
+       MinMaxLocP minmaxloc;
+       minmaxloc.Run();
+       CountNonZeroP cnz;
+       cnz.Run();
+       SumP sum;
+       sum.Run();
+       }*/
+      /* std::cout.flush();
+       {
+       BitwiseNotP bn;
+       bn.Run();
+       BitwiseOrP bo;
+       bo.Run();
+       BitwiseAndP ba;
+       ba.Run();
+       BitwiseXorP bx;
+       bx.Run();
+       }*/
+       
+    std::cout.flush();
+    {
+        //   TransposeP transpose;
+        //  transpose.Run();
+        // AbsdiffArrayP absdiff;
+        // absdiff.Run();
+        // SplitP split;
+        // split.Run();
+       // MergeP merge;
+       // merge.Run();
+        /*
+           SetToP setto;
+           setto.Run();
+           CopyToP copyto;
+           copyto.Run();
+           ConvertToP convertto;
+           convertto.Run();
+           */
+    }
+    /*
+       std::cout.flush();
+       {
+       BlurP blur;
+       blur.Run();
+       SobelP sobel;
+       sobel.Run();
+       ScharrP scharr;
+       scharr.Run();
+       GaussianBlurP gblur;
+       gblur.Run();
+       DilateP dilate;
+       dilate.Run();
+       ErodeP erode;
+       erode.Run();
+       }
+       std::cout.flush();
+       {
+       MorphExP morphex;
+       morphex.Run();
+       CalcHistP calchist;
+       calchist.Run();
+       EqualizeHistP eqhist;
+       eqhist.Run();
+       ThresholdP threshold;
+       threshold.Run();
+       ResizeP resize;
+       resize.Run();
+       CvtColorP cvtcolor;
+       cvtcolor.Run();
+       }
+
+       {
+       LogP log;
+       log.Run();
+       ExpP exp;
+       exp.Run();
+       }
+
+       std::cout.flush();
+       {
+    //PhaseP phase;
+    //phase.Run();
+    }
+    std::cout.flush();
+    {
+    CartToPolarP ctop;
+    ctop.Run();
+    }
+    std::cout.flush();
+    {
+    PolarToCartP ptoc;
+    ptoc.Run();
+    }
+    {
+    WarpAffineP warpA;
+    warpA.Run();
+    WarpPerspectiveP warpP;
+    warpP.Run();	
+    }
+
+    {
+    CornerHarrisP ch;
+    ch.Run();
+    }
+
+    {
+    LaplacianP laplacian;
+    laplacian.Run();
+    }
+
+
+    */
+        std::cout << ">>>>>>>> Performance test ended <<<<<<<<\ntotal - " << clock() - start << "ms\n";
+    std::cout.flush();
+}
+
+void  run_perf_test()
+{
+    print_info();
+    cvtest::TS::ptr()->init("ocl");
+    test();
+}
+
+#endif // WITH_OPENCL
+
+#endif // PREF_TEST_OCL
diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp
new file mode 100644
index 0000000000..f505dac9fa
--- /dev/null
+++ b/modules/ocl/perf/precomp.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+
+	
\ No newline at end of file
diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp
new file mode 100644
index 0000000000..cad26fc8de
--- /dev/null
+++ b/modules/ocl/perf/precomp.hpp
@@ -0,0 +1,72 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include <cmath>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <limits>
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <cstdarg>
+#include "cvconfig.h"
+#include "opencv2/core/core.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/video/video.hpp"
+#include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
+
+#include "utility.hpp"
+#include "interpolation.hpp"
+//#include "add_test_info.h"
+//#define  PERF_TEST_OCL 1
+
+#endif
+
diff --git a/modules/ocl/perf/test_arithm.cpp b/modules/ocl/perf/test_arithm.cpp
new file mode 100644
index 0000000000..0e6cf6e4bf
--- /dev/null
+++ b/modules/ocl/perf/test_arithm.cpp
@@ -0,0 +1,3658 @@
+///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Niko Li, newlife20080214@gmail.com
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Shengen Yan, yanshengen@gmail.com
+//    Jiang Liyuan,jlyuan001.good@163.com
+//    Rock Li, Rock.Li@amd.com
+//    Zailong Wu, bullet@yeah.net
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+#include <iomanip>
+
+#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
+{
+	int type;
+	cv::Scalar val;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat mat2;
+	cv::Mat mask;
+	cv::Mat dst;
+	cv::Mat dst1; //bak, for two outputs
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int src2x;
+	int src2y;
+	int dstx;
+	int dsty;
+	int maskx;
+	int masky;
+
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat mat2_roi;
+	cv::Mat mask_roi;
+	cv::Mat dst_roi;
+	cv::Mat dst1_roi; //bak
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+	cv::ocl::oclMat gdst1_whole; //bak
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gmat2;
+	cv::ocl::oclMat gdst;
+	cv::ocl::oclMat gdst1;   //bak
+	cv::ocl::oclMat gmask;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		//mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+		mat2 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		dst1  = randomMat(rng, size, type, 5, 16, false);
+		mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+		cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+		val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums>0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat1.cols-1; 
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src2x   = 1;
+			src1y   = 1;
+			src2y   = 1;
+			dstx    = 1;
+			dsty    =1;
+			maskx	 =1;
+			masky	=1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src2x = 0;
+			src1y = 0;
+			src2y = 0;
+			dstx = 0;
+			dsty = 0;
+			maskx	 =0;
+			masky	=0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		//mat2_roi = mat2(Rect(src2x,src2y,256,1));
+		mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+		mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+		dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+		//gdst_whole = dst;
+		//gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		//gdst1_whole = dst1;
+		//gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+		//gmat1 = mat1_roi;
+		//gmat2 = mat2_roi;
+		//gmask = mask_roi; 
+	}
+
+};
+////////////////////////////////lut/////////////////////////////////////////////////
+
+struct Lut : ArithmTestBase {};
+
+TEST_P(Lut, Mat)
+{       
+
+	cv::Mat mat2(3, 512, CV_8UC1);
+	cv::RNG& rng = TS::ptr()->get_rng();
+	rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256));
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);  
+			mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+			mat2_roi = mat2(Rect(src2x,src2y,256,1));
+
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::LUT(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::LUT(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		// s=GetParam();
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		//  src2x = rng.uniform( 0,mat2.cols - 256);
+		// src2y = rng.uniform (0,mat2.rows - 1);
+
+		// cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1));
+		mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+		mat2_roi = mat2(Rect(src2x,src2y,256,1));
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		//   gdst1_whole = dst1;
+		//     gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		//     gmask = mask_roi; 
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::LUT(gmat1, gmat2, gdst);
+	};
+#endif
+
+}
+
+
+
+////////////////////////////////exp/////////////////////////////////////////////////
+
+struct Exp : ArithmTestBase {};
+
+TEST_P(Exp, Mat) 
+{  
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::exp(mat1_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+			gmat1 = mat1_roi;
+
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::exp(gmat1, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download(cpu_dst);
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+			//EXPECT_MAT_NEAR(dst, cpu_dst, 0,"");
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::exp(gmat1, gdst);
+	};
+#endif
+
+}
+
+
+////////////////////////////////log/////////////////////////////////////////////////
+
+struct Log : ArithmTestBase {};
+
+TEST_P(Log, Mat) 
+{  
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::log(mat1_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::log(gmat1, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::log(gmat1, gdst);
+	};
+#endif
+
+}
+
+
+
+
+////////////////////////////////add/////////////////////////////////////////////////
+
+struct Add : ArithmTestBase {};
+
+TEST_P(Add, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::add(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::add(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::add(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+TEST_P(Add, Mat_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gmask = mask_roi; 
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::add(gmat1, gmat2, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gmask = mask_roi; 
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::add(gmat1, gmat2, gdst, gmask);
+	};
+#endif
+}
+TEST_P(Add, Scalar) 
+{  
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::add(mat1_roi, val, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::add(gmat1, val, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::add(gmat1, val, gdst);
+	};
+#endif
+}
+
+TEST_P(Add, Scalar_Mask) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::add(mat1_roi, val, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+			gmat1 = mat1_roi;
+			gmask = mask_roi; 
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::add(gmat1, val, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmask = mask_roi; 
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::add(gmat1, val, gdst, gmask);
+	};
+#endif
+}
+
+
+////////////////////////////////sub/////////////////////////////////////////////////
+struct Sub : ArithmTestBase {};
+
+TEST_P(Sub, Mat) 
+{ 
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::subtract(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::subtract(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::subtract(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+TEST_P(Sub, Mat_Mask) 
+{  
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gmask = mask_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+	};
+#endif
+}
+TEST_P(Sub, Scalar) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::subtract(mat1_roi, val, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::subtract(gmat1, val, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::subtract(gmat1, val, gdst);
+	};
+#endif
+}
+
+TEST_P(Sub, Scalar_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::subtract(mat1_roi, val, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::subtract(gmat1, val, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmask = mask_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::subtract(gmat1, val, gdst, gmask);
+	};
+#endif
+}
+
+
+////////////////////////////////Mul/////////////////////////////////////////////////
+struct Mul : ArithmTestBase {};
+
+TEST_P(Mul, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::multiply(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::multiply(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::multiply(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+TEST_P(Mul, Mat_Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			cv::RNG& rng = TS::ptr()->get_rng();
+			double s = rng.uniform(-10.0, 10.0);    
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::multiply(mat1_roi, mat2_roi, dst_roi, s);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::multiply(gmat1, gmat2, gdst, s);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		cv::RNG& rng = TS::ptr()->get_rng();
+		double s = rng.uniform(-10.0, 10.0);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::multiply(gmat1, gmat2, gdst, s);
+	};
+#endif
+}
+
+
+struct Div : ArithmTestBase {};
+
+TEST_P(Div, Mat) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::divide(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::divide(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::divide(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+TEST_P(Div, Mat_Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			cv::RNG& rng = TS::ptr()->get_rng();
+			double s = rng.uniform(-10.0, 10.0);  
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::divide(mat1_roi, mat2_roi, dst_roi, s);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::divide(gmat1, gmat2, gdst, s);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		cv::RNG& rng = TS::ptr()->get_rng();
+		double s = rng.uniform(-10.0, 10.0);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::divide(gmat1, gmat2, gdst, s);
+	};
+#endif
+}
+
+
+struct Absdiff : ArithmTestBase {};
+
+TEST_P(Absdiff, Mat) 
+{ 
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::absdiff(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::absdiff(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::absdiff(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+TEST_P(Absdiff, Mat_Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::absdiff(mat1_roi, val, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::absdiff(gmat1, val, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::absdiff(gmat1, val, gdst);
+	};
+#endif
+}
+
+
+
+struct CartToPolar : ArithmTestBase {};
+
+TEST_P(CartToPolar, angleInDegree) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gdst1_whole = dst1;
+			gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			cv::Mat cpu_dst1;
+			gdst1_whole.download(cpu_dst1);
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gdst1_whole = dst1;
+		gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+	};
+#endif
+}
+
+TEST_P(CartToPolar, angleInRadians) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+			gdst1_whole = dst1;
+			gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			cv::Mat cpu_dst1;
+			gdst1_whole.download(cpu_dst1);
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gdst1_whole = dst1;
+		gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+	};
+#endif
+}
+
+
+struct PolarToCart : ArithmTestBase {};
+
+TEST_P(PolarToCart, angleInDegree) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gdst1_whole = dst1;
+			gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			cv::Mat cpu_dst1;
+			gdst1_whole.download(cpu_dst1);
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gdst1_whole = dst1;
+		gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+	};
+#endif
+}
+
+TEST_P(PolarToCart, angleInRadians) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gdst1_whole = dst1;
+			gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			cv::Mat cpu_dst1;
+			gdst1_whole.download(cpu_dst1);
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gdst1_whole = dst1;
+		gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+	};
+#endif
+}
+
+
+
+struct Magnitude : ArithmTestBase {};
+
+TEST_P(Magnitude, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::magnitude(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::magnitude(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::magnitude(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+struct Transpose : ArithmTestBase {};
+
+TEST_P(Transpose, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::transpose(mat1_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::transpose(gmat1, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::transpose(gmat1, gdst);
+	};
+#endif
+}
+
+
+struct Flip : ArithmTestBase {};
+
+TEST_P(Flip, X) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::flip(mat1_roi, dst_roi, 0);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::flip(gmat1, gdst, 0);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::flip(gmat1, gdst, 0);
+	};
+#endif
+}
+
+TEST_P(Flip, Y) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::flip(mat1_roi, dst_roi, 1);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::flip(gmat1, gdst, 1);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::flip(gmat1, gdst, 1);
+	};
+#endif
+}
+
+TEST_P(Flip, BOTH) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::flip(mat1_roi, dst_roi, -1);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::flip(gmat1, gdst, -1);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::flip(gmat1, gdst, -1);
+	};
+#endif
+}
+
+
+
+struct MinMax : ArithmTestBase {};
+
+TEST_P(MinMax, MAT) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			double minVal, maxVal;
+			cv::Point minLoc, maxLoc;
+			t0 = (double)cvGetTickCount();//cpu start
+			if (mat1.depth() != CV_8S)
+			{
+				cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+			}
+			else 
+			{
+				minVal = std::numeric_limits<double>::max();
+				maxVal = -std::numeric_limits<double>::max();
+				for (int i = 0; i < mat1_roi.rows; ++i)
+					for (int j = 0; j < mat1_roi.cols; ++j)
+					{
+						signed char val = mat1_roi.at<signed char>(i, j);
+						if (val < minVal) minVal = val;
+						if (val > maxVal) maxVal = val;
+					}
+			}
+
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat1 = mat1_roi;
+			double minVal_, maxVal_;  
+			t2=(double)cvGetTickCount();//kernel        
+			cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat1 = mat1_roi;
+		double minVal_, maxVal_;  
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+	};
+#endif
+}
+
+TEST_P(MinMax, MASK) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			double minVal, maxVal;
+			cv::Point minLoc, maxLoc;
+			t0 = (double)cvGetTickCount();//cpu start
+			if (mat1.depth() != CV_8S)
+			{
+				cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
+			}
+			else 
+			{
+				minVal = std::numeric_limits<double>::max();
+				maxVal = -std::numeric_limits<double>::max();
+				for (int i = 0; i < mat1_roi.rows; ++i)
+					for (int j = 0; j < mat1_roi.cols; ++j)
+					{
+						signed char val = mat1_roi.at<signed char>(i, j);
+						unsigned char m = mask_roi.at<unsigned char>(i, j);
+						if (val < minVal && m) minVal = val;
+						if (val > maxVal && m) maxVal = val;
+					}
+			}
+
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat1 = mat1_roi;
+			gmask = mask_roi;
+			double minVal_, maxVal_;  
+			t2=(double)cvGetTickCount();//kernel        
+			cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat1 = mat1_roi;
+		gmask = mask_roi;
+		double minVal_, maxVal_;  
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
+	};
+#endif
+}
+
+
+struct MinMaxLoc : ArithmTestBase {};
+
+TEST_P(MinMaxLoc, MAT) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			double minVal, maxVal;
+			cv::Point minLoc, maxLoc;
+			int depth = mat1.depth();
+			t0 = (double)cvGetTickCount();//cpu start
+			if (depth != CV_8S)
+			{
+				cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+			}
+			else 
+			{
+				minVal = std::numeric_limits<double>::max();
+				maxVal = -std::numeric_limits<double>::max();
+				for (int i = 0; i < mat1_roi.rows; ++i)
+					for (int j = 0; j < mat1_roi.cols; ++j)
+					{
+						signed char val = mat1_roi.at<signed char>(i, j);
+						if (val < minVal) {
+							minVal = val;
+							minLoc.x = j;
+							minLoc.y = i;
+						}
+						if (val > maxVal) {
+							maxVal = val;
+							maxLoc.x = j;
+							maxLoc.y = i;
+						} 
+					}
+			}
+
+
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat1 = mat1_roi;
+			double minVal_, maxVal_;  
+			cv::Point minLoc_, maxLoc_;    
+			t2=(double)cvGetTickCount();//kernel                    
+			cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat1 = mat1_roi;
+		double minVal_, maxVal_;  
+		cv::Point minLoc_, maxLoc_;    
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
+	};
+#endif
+
+}
+
+
+TEST_P(MinMaxLoc, MASK) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			double minVal, maxVal;
+			cv::Point minLoc, maxLoc;
+			int depth = mat1.depth();
+			t0 = (double)cvGetTickCount();//cpu start
+			if (depth != CV_8S)
+			{
+				cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
+			}
+			else 
+			{
+				minVal = std::numeric_limits<double>::max();
+				maxVal = -std::numeric_limits<double>::max();
+				for (int i = 0; i < mat1_roi.rows; ++i)
+					for (int j = 0; j < mat1_roi.cols; ++j)
+					{
+						signed char val = mat1_roi.at<signed char>(i, j);
+						unsigned char m = mask_roi.at<unsigned char>(i ,j);
+						if (val < minVal && m) {
+							minVal = val;
+							minLoc.x = j;
+							minLoc.y = i;
+						}
+						if (val > maxVal && m) {
+							maxVal = val;
+							maxLoc.x = j;
+							maxLoc.y = i;
+						} 
+					}
+			}
+
+
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat1 = mat1_roi;
+			gmask = mask_roi;
+			double minVal_, maxVal_;  
+			cv::Point minLoc_, maxLoc_;    
+			t2=(double)cvGetTickCount();//kernel                    
+			cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat1 = mat1_roi;
+		gmask = mask_roi;
+		double minVal_, maxVal_;  
+		cv::Point minLoc_, maxLoc_;    
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
+	};
+#endif
+}
+
+
+struct Sum : ArithmTestBase {};
+
+TEST_P(Sum, MAT) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			Scalar cpures =cv::sum(mat1_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			Scalar gpures=cv::ocl::sum(gmat1);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		Scalar gpures=cv::ocl::sum(gmat1);
+	};
+#endif
+}
+
+//TEST_P(Sum, MASK) 
+//{    
+//    for(int j=0; j<LOOP_TIMES; j++)
+//    {
+//       
+//    }
+//}
+
+struct CountNonZero : ArithmTestBase {};
+
+TEST_P(CountNonZero, MAT) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			int cpures =cv::countNonZero(mat1_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			int gpures=cv::ocl::countNonZero(gmat1);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		int gpures=cv::ocl::countNonZero(gmat1);
+	};
+#endif
+
+}
+
+
+
+////////////////////////////////phase/////////////////////////////////////////////////
+struct Phase : ArithmTestBase {};
+
+TEST_P(Phase, Mat)
+{
+	if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
+	{
+		cout<<"\tUnsupported type\t\n";
+	}
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::phase(mat1_roi,mat2_roi,dst_roi,0);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::phase(gmat1,gmat2,gdst,0);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::phase(gmat1,gmat2,gdst,0);
+	};
+#endif
+
+}
+
+
+////////////////////////////////bitwise_and/////////////////////////////////////////////////
+struct Bitwise_and : ArithmTestBase {};
+
+TEST_P(Bitwise_and, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_and(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+	};
+#endif
+
+}
+
+TEST_P(Bitwise_and, Mat_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_and(mat1_roi, mat2_roi, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+	};
+#endif
+}
+
+TEST_P(Bitwise_and, Scalar) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_and(mat1_roi, val, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_and(gmat1, val, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_and(gmat1, val, gdst);
+	};
+#endif
+}
+
+TEST_P(Bitwise_and, Scalar_Mask) 
+{   
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_and(mat1_roi, val, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+	};
+#endif
+}
+
+
+
+////////////////////////////////bitwise_or/////////////////////////////////////////////////
+
+struct Bitwise_or : ArithmTestBase {};
+
+TEST_P(Bitwise_or, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_or(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+TEST_P(Bitwise_or, Mat_Mask) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_or(mat1_roi, mat2_roi, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+	};
+#endif
+}
+TEST_P(Bitwise_or, Scalar) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_or(mat1_roi, val, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_or(gmat1, val, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_or(gmat1, val, gdst);
+	};
+#endif
+}
+
+TEST_P(Bitwise_or, Scalar_Mask) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_or(mat1_roi, val, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+	};
+#endif
+}
+
+
+////////////////////////////////bitwise_xor/////////////////////////////////////////////////
+
+struct Bitwise_xor : ArithmTestBase {};
+
+TEST_P(Bitwise_xor, Mat) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+	};
+#endif
+}
+
+TEST_P(Bitwise_xor, Mat_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+	};
+#endif
+}
+
+TEST_P(Bitwise_xor, Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_xor(mat1_roi, val, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_xor(gmat1, val, gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_xor(gmat1, val, gdst);
+	};
+#endif
+}
+
+TEST_P(Bitwise_xor, Scalar_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_xor(mat1_roi, val, dst_roi, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+	};
+#endif
+}
+
+
+////////////////////////////////bitwise_not/////////////////////////////////////////////////
+
+struct Bitwise_not : ArithmTestBase {};
+
+TEST_P(Bitwise_not, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::bitwise_not(mat1_roi,dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::bitwise_not(gmat1,gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::bitwise_not(gmat1,gdst);
+	};
+#endif
+}
+
+////////////////////////////////compare/////////////////////////////////////////////////
+PARAM_TEST_CASE ( CompareTestBase, MatType, bool)
+{
+	int type;
+	cv::Scalar val;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat mat2;
+	cv::Mat mask;
+	cv::Mat dst;
+	cv::Mat dst1; //bak, for two outputs
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int src2x;
+	int src2y;
+	int dstx;
+	int dsty;
+	int maskx;
+	int masky;
+
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat mat2_roi;
+	cv::Mat mask_roi;
+	cv::Mat dst_roi;
+	cv::Mat dst1_roi; //bak
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+	cv::ocl::oclMat gdst1_whole; //bak
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gmat2;
+	cv::ocl::oclMat gdst;
+	cv::ocl::oclMat gdst1;   //bak
+	cv::ocl::oclMat gmask;
+
+	virtual void SetUp()
+	{
+		//type = GET_PARAM(0);
+		type = CV_8UC1;
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		//mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+		mat2 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		dst1  = randomMat(rng, size, type, 5, 16, false);
+		mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+		cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+		val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums>0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat1.cols-1; 
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src2x   = 1;
+			src1y   = 1;
+			src2y   = 1;
+			dstx    = 1;
+			dsty    =1;
+			maskx	 =1;
+			masky	=1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src2x = 0;
+			src1y = 0;
+			src2y = 0;
+			dstx = 0;
+			dsty = 0;
+			maskx	 =0;
+			masky	=0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		//mat2_roi = mat2(Rect(src2x,src2y,256,1));
+		mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+		mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+		dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+		//gdst_whole = dst;
+		//gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		//gdst1_whole = dst1;
+		//gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+		//gmat1 = mat1_roi;
+		//gmat2 = mat2_roi;
+		//gmask = mask_roi; 
+	}
+
+};
+struct Compare : CompareTestBase {};
+
+TEST_P(Compare, Mat) 
+{   
+	if(mat1.type()==CV_8SC1)
+	{
+		cout << "\tUnsupported type\t\n";
+	}	
+
+	int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
+	//const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
+	int cmp_num = sizeof(cmp_codes) / sizeof(int);
+	for (int i = 0; i < cmp_num; ++i)
+	{
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+		double totalcputick=0;
+		double totalgputick=0;
+		double totalgputick_kernel=0;
+		double t0=0;
+		double t1=0;
+		double t2=0;	
+		for(int k=1;k<2;k++){
+			totalcputick=0;
+			totalgputick=0;
+			totalgputick_kernel=0;
+			for(int j = 0; j < LOOP_TIMES+1; j ++)
+			{
+				Has_roi(k);       
+
+				t0 = (double)cvGetTickCount();//cpu start
+				cv::compare(mat1_roi,mat2_roi,dst_roi,cmp_codes[i]);
+				t0 = (double)cvGetTickCount() - t0;//cpu end
+
+				t1 = (double)cvGetTickCount();//gpu start1		
+				gdst_whole = dst;
+				gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+				gmat1 = mat1_roi;
+				gmat2 = mat2_roi;
+				t2=(double)cvGetTickCount();//kernel
+				cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
+				t2 = (double)cvGetTickCount() - t2;//kernel
+				cv::Mat cpu_dst;
+				gdst_whole.download (cpu_dst);//download
+				t1 = (double)cvGetTickCount() - t1;//gpu end1		
+				if(j == 0)
+					continue;
+				totalgputick=t1+totalgputick;
+				totalcputick=t0+totalcputick;	
+				totalgputick_kernel=t2+totalgputick_kernel;	
+
+			}
+			if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+			cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+			cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+			cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		}
+#else
+		for(int j = 0; j < 2; j ++)
+		{
+			Has_roi(j);
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+			cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
+		};
+#endif
+	}
+
+}
+
+struct Pow : ArithmTestBase {};
+
+TEST_P(Pow, Mat)
+{
+	if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
+	{
+		cout<<"\tUnsupported type\t\n";
+	}
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			double p=4.5;
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::pow(mat1_roi,p,dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::pow(gmat1,p,gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		double p=4.5;
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::pow(gmat1,p,gdst);
+	};
+#endif
+}
+
+
+struct MagnitudeSqr : ArithmTestBase {};
+
+TEST_P(MagnitudeSqr, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			for(int i = 0;i < mat1.rows;++i)
+				for(int j = 0;j < mat1.cols;++j)
+				{
+					float val1 = mat1.at<float>(i,j);
+					float val2 = mat2.at<float>(i,j);
+
+					((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2;
+
+				}
+				t0 = (double)cvGetTickCount() - t0;//cpu end
+
+				t1 = (double)cvGetTickCount();//gpu start1		
+				cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+				t2=(double)cvGetTickCount();//kernel
+				cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
+				t2 = (double)cvGetTickCount() - t2;//kernel
+				cv::Mat cpu_dst;
+				cldst.download(cpu_dst);//download
+				t1 = (double)cvGetTickCount() - t1;//gpu end1	
+				if(j == 0)
+					continue;
+				totalgputick=t1+totalgputick;
+				totalcputick=t0+totalcputick;	
+				totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
+	};
+#endif
+
+}
+
+
+struct AddWeighted : ArithmTestBase {};
+
+TEST_P(AddWeighted, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+    double totalcputick=0;
+    double totalgputick=0;
+    double totalgputick_kernel=0;
+    double t0=0;
+    double t1=0;
+    double t2=0;
+    for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+    for(int j = 0; j < LOOP_TIMES+1; j ++)
+    {
+        Has_roi(k);
+        double alpha=2.0,beta=1.0,gama=3.0;      
+
+        t0 = (double)cvGetTickCount();//cpu start
+        cv::addWeighted(mat1_roi,alpha,mat2_roi,beta,gama,dst_roi);
+        t0 = (double)cvGetTickCount() - t0;//cpu end
+
+        t1 = (double)cvGetTickCount();//gpu start1
+
+            gdst_whole = dst;
+            gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+            gmat1 = mat1_roi;
+            gmat2 = mat2_roi;
+
+        t2=(double)cvGetTickCount();//kernel
+        cv::ocl::addWeighted(gmat1,alpha,gmat2,beta,gama, gdst);
+        t2 = (double)cvGetTickCount() - t2;//kernel
+        cv::Mat cpu_dst;
+        gdst_whole.download(cpu_dst);
+        t1 = (double)cvGetTickCount() - t1;//gpu end1
+        if(j == 0)
+            continue;
+        totalgputick=t1+totalgputick;
+        totalcputick=t0+totalcputick;	
+        totalgputick_kernel=t2+totalgputick_kernel;	
+
+    }
+
+        if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+    cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+    cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+    cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+}
+#else
+    for(int j = 0; j < 2; j ++)
+    	{
+          Has_roi(j);
+    double alpha=2.0,beta=1.0,gama=3.0;   
+        gdst_whole = dst;
+        gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+        gmat1 = mat1_roi;
+        gmat2 = mat2_roi;
+        if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+        cv::ocl::addWeighted(gmat1,alpha, gmat2,beta,gama, gdst);
+   // double alpha=2.0,beta=1.0,gama=3.0;   
+   // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+   // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+   // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+    	};
+#endif
+
+}
+/*
+struct AddWeighted : ArithmTestBase {};
+
+TEST_P(AddWeighted, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int j = 0; j < LOOP_TIMES+1; j ++)
+	{
+		double alpha=2.0,beta=1.0,gama=3.0;      
+
+		t0 = (double)cvGetTickCount();//cpu start
+		cv::addWeighted(mat1,alpha,mat2,beta,gama,dst);
+		t0 = (double)cvGetTickCount() - t0;//cpu end
+
+		t1 = (double)cvGetTickCount();//gpu start1		
+		cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+
+		t2=(double)cvGetTickCount();//kernel
+		cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+		t2 = (double)cvGetTickCount() - t2;//kernel
+		cv::Mat cpu_dst;
+		cldst.download(cpu_dst);
+		t1 = (double)cvGetTickCount() - t1;//gpu end1
+		if(j == 0)
+			continue;
+		totalgputick=t1+totalgputick;
+		totalcputick=t0+totalcputick;	
+		totalgputick_kernel=t2+totalgputick_kernel;	
+
+	}
+	cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+#else
+	//for(int j = 0; j < 2; j ++)
+	//	{
+	double alpha=2.0,beta=1.0,gama=3.0;   
+	cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+	//if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+	cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+	//	};
+#endif
+
+}
+
+*/
+//********test****************
+
+INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(
+						Values(CV_8UC1, CV_8UC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(
+						Values(CV_32FC1, CV_64FC1),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
+						Values(CV_32FC1, CV_64FC1),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1,  CV_32FC4),
+						Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
+						Values(CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
+						Values(CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
+						Values(CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
+						Values(CV_8UC1, CV_32FC1),
+						Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(
+						Values(CV_8UC1, CV_32FC1),
+						Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(
+						Values(CV_8U, CV_32S, CV_32F),
+						Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(
+						Values(CV_8U, CV_32S, CV_32F),
+						Values(false)));
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
+						Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
+						Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
+						Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1,CV_16UC1,CV_16SC1,CV_32SC1,CV_32FC1,CV_64FC1), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(
+						Values(CV_8UC1, CV_32SC1, CV_32FC1),
+						Values(false))); // Values(false) is the reserved parameter
+
+
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_filters.cpp b/modules/ocl/perf/test_filters.cpp
new file mode 100644
index 0000000000..ac9a86573b
--- /dev/null
+++ b/modules/ocl/perf/test_filters.cpp
@@ -0,0 +1,1096 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Niko Li, newlife20080214@gmail.com
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Zero Lin, Zero.Lin@amd.com
+//    Zhang Ying, zhangying913@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+//using namespace cv::ocl;
+
+PARAM_TEST_CASE(FilterTestBase, MatType, bool)
+{
+	int type;
+	cv::Scalar val;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat mat2;
+	cv::Mat mask;
+	cv::Mat dst;
+	cv::Mat dst1; //bak, for two outputs
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int src2x;
+	int src2y;
+	int dstx;
+	int dsty;
+	int maskx;
+	int masky;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat mat2_roi;
+	cv::Mat mask_roi;
+	cv::Mat dst_roi;
+	cv::Mat dst1_roi; //bak
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+	cv::ocl::oclMat gdst1_whole; //bak
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gmat2;
+	cv::ocl::oclMat gdst;
+	cv::ocl::oclMat gdst1;   //bak
+	cv::ocl::oclMat gmask;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		mat2 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		dst1  = randomMat(rng, size, type, 5, 16, false);
+		mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+		cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+		val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+	}
+
+	void random_roi()
+	{
+		cv::RNG& rng = TS::ptr()->get_rng();
+
+		//randomize ROI
+		roicols = rng.uniform(1, mat1.cols);
+		roirows = rng.uniform(1, mat1.rows);
+		src1x   = rng.uniform(0, mat1.cols - roicols);
+		src1y   = rng.uniform(0, mat1.rows - roirows);
+		src2x   = rng.uniform(0, mat2.cols - roicols);
+		src2y   = rng.uniform(0, mat2.rows - roirows);
+		dstx    = rng.uniform(0, dst.cols  - roicols);
+		dsty    = rng.uniform(0, dst.rows  - roirows);
+		maskx   = rng.uniform(0, mask.cols - roicols);
+		masky   = rng.uniform(0, mask.rows - roirows);
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+		mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+		dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		gdst1_whole = dst1;
+		gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gmask = mask_roi;
+	}
+
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// blur
+
+PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
+{
+	int type;
+	cv::Size ksize;
+	int bordertype;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		ksize = GET_PARAM(1);
+		bordertype = GET_PARAM(2);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+
+
+	void Has_roi(int b)
+	{
+		if(b)
+		{
+			roicols =  mat1.cols-1; 
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+	}
+
+};
+
+TEST_P(Blur, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::blur(mat1_roi, dst_roi, ksize, Point(-1,-1), bordertype);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
+	};
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+//Laplacian 
+
+PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
+{
+	int type;
+	int ksize;
+
+	//src mat
+	cv::Mat mat; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int srcx;
+	int srcy;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		ksize = GET_PARAM(1);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size = cv::Size(2560, 2560);
+
+		mat  = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		if(b)
+		{
+			roicols =  mat.cols-1; 
+			roirows = mat.rows-1;
+			srcx   = 1;
+			srcy   = 1;
+			dstx    = 1;
+			dsty    =1;
+		}else
+		{
+			roicols = mat.cols;
+			roirows = mat.rows;
+			srcx = 0;
+			srcy = 0;
+			dstx = 0;
+			dsty = 0;
+		};
+
+		mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+	}
+
+};
+
+struct Laplacian : LaplacianTestBase {};
+
+TEST_P(Laplacian, Accuracy) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat = mat_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat = mat_roi;
+
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+	};
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// erode & dilate 
+
+PARAM_TEST_CASE(ErodeDilateBase, MatType, bool)
+{
+	int type;
+	//int iterations;
+
+	//erode or dilate kernel
+	cv::Mat kernel;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		//  iterations = GET_PARAM(1);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size = cv::Size(2560, 2560);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		//		rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
+		kernel = randomMat(rng, Size(3,3), CV_8UC1, 0, 3, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		if(b)
+		{
+			roicols =  mat1.cols-1; 
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+	}
+
+};
+
+// erode 
+
+struct Erode : ErodeDilateBase{};
+
+TEST_P(Erode, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::erode(mat1_roi, dst_roi, kernel);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::erode(gmat1, gdst, kernel);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::erode(gmat1, gdst, kernel);
+	};
+#endif
+
+}
+
+// dilate
+
+struct Dilate : ErodeDilateBase{};
+
+TEST_P(Dilate, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::dilate(mat1_roi, dst_roi, kernel);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::dilate(gmat1, gdst, kernel);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::dilate(gmat1, gdst, kernel);
+	};
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// Sobel 
+
+PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
+{
+	int type;
+	int dx, dy, ksize, bordertype;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		dx = GET_PARAM(1);
+		dy = GET_PARAM(2);
+		ksize = GET_PARAM(3);
+		bordertype = GET_PARAM(4);
+		dx = 2; dy=0;
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size = cv::Size(2560, 2560);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		if(b)
+		{
+			roicols =  mat1.cols-1; 
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+	}
+
+};
+
+TEST_P(Sobel, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+	};
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// Scharr 
+
+PARAM_TEST_CASE(Scharr, MatType, int, int, int)
+{
+	int type;
+	int dx, dy, bordertype;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		dx = GET_PARAM(1);
+		dy = GET_PARAM(2);
+		bordertype = GET_PARAM(3);
+		dx = 1; dy=0;
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size = cv::Size(2560, 2560);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		if(b)
+		{
+			roicols =  mat1.cols-1; 
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+	}
+};
+
+TEST_P(Scharr, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
+	};
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// GaussianBlur
+
+PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
+{
+	int type;
+	cv::Size ksize;
+	int bordertype;
+
+	double sigma1, sigma2;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		ksize = GET_PARAM(1);
+		bordertype = GET_PARAM(2);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size = cv::Size(2560, 2560);
+
+		sigma1 = rng.uniform(0.1, 1.0); 
+		sigma2 = rng.uniform(0.1, 1.0);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		if(b)
+		{
+			roicols =  mat1.cols-1; 
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+	}
+
+};
+
+TEST_P(GaussianBlur, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+	};
+#endif
+
+}
+
+//************test**********
+
+INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/),
+						Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(1/*, 3*/)));
+
+//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
+
+INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+
+//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
+
+INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+
+
+INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(1, 2), Values(0, 1), Values(3, 5, 7), Values((MatType)cv::BORDER_CONSTANT,
+						(MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
+						Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
+						Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_haar.cpp b/modules/ocl/perf/test_haar.cpp
new file mode 100644
index 0000000000..8aabd67d64
--- /dev/null
+++ b/modules/ocl/perf/test_haar.cpp
@@ -0,0 +1,198 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/objdetect/objdetect.hpp"
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv;
+
+struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
+
+PARAM_TEST_CASE(HaarTestBase, int, int)
+{
+	std::vector<cv::ocl::Info> oclinfo;
+    cv::ocl::OclCascadeClassifier cascade, nestedCascade;
+	cv::CascadeClassifier cpucascade, cpunestedCascade;
+//    Mat img;
+
+    double scale;
+    int index;
+
+    virtual void SetUp()
+    {
+        scale = 1.1;
+
+#if WIN32
+        string cascadeName="E:\\opencvbuffer\\trunk\\data\\haarcascades\\haarcascade_frontalface_alt.xml";
+#else
+        string cascadeName="../data/haarcascades/haarcascade_frontalface_alt.xml";
+#endif
+
+        if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
+        {
+            cout << "ERROR: Could not load classifier cascade" << endl;
+            cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
+                "   [--nested-cascade[=nested_cascade_path]]\n"
+                "   [--scale[=<image scale>\n"
+                "   [filename|camera_index]\n" << endl ;
+
+            return;
+        }
+	int devnums = getDevice(oclinfo);
+	CV_Assert(devnums>0);
+	//if you want to use undefault device, set it here
+	//setDevice(oclinfo[0]);
+	cv::ocl::setBinpath("E:\\");
+    }
+};
+
+////////////////////////////////faceDetect/////////////////////////////////////////////////
+
+struct Haar : HaarTestBase {};
+
+TEST_P(Haar, FaceDetect) 
+{    
+    for(int index = 1;index < 2; index++)
+    {
+        Mat img;
+        char buff[256];
+#if WIN32
+        sprintf(buff,"E:\\myDataBase\\%d.jpg",index);
+        img = imread( buff, 1 );
+#else 
+        sprintf(buff,"%d.jpg",index);
+        img = imread( buff, 1 );
+        std::cout << "Now test " << index << ".jpg" <<std::endl;
+#endif
+        if(img.empty())
+        { 
+            std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
+            continue;
+        }
+
+        int i = 0;
+        double t = 0;
+        vector<Rect> faces;
+
+        const static Scalar colors[] =  { CV_RGB(0,0,255),
+            CV_RGB(0,128,255),
+            CV_RGB(0,255,255),
+            CV_RGB(0,255,0),
+            CV_RGB(255,128,0),
+            CV_RGB(255,255,0),
+            CV_RGB(255,0,0),
+            CV_RGB(255,0,255)} ;
+
+        Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+        MemStorage storage(cvCreateMemStorage(0));
+        cvtColor( img, gray, CV_BGR2GRAY );
+        resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+        equalizeHist( smallImg, smallImg );
+        CvMat _image = smallImg;
+
+        Mat tempimg(&_image, false);
+
+        cv::ocl::oclMat image(tempimg);
+        CvSeq* _objects;
+
+#if 1
+        for(int k= 0; k<10; k++)
+        {
+            t = (double)cvGetTickCount();
+            _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
+                    2, 0
+                    |CV_HAAR_SCALE_IMAGE
+                    , Size(30,30), Size(0, 0) );
+
+            t = (double)cvGetTickCount() - t ;
+            printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
+        }
+
+#else
+        cpucascade.detectMultiScale( image, faces,  1.1,
+                2, 0
+                |CV_HAAR_SCALE_IMAGE
+                , Size(30,30), Size(0, 0) );
+
+#endif
+        vector<CvAvgComp> vecAvgComp;
+        Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
+        faces.resize(vecAvgComp.size());
+        std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
+
+        for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
+        { 
+            Mat smallImgROI;
+            vector<Rect> nestedObjects;
+            Point center;
+            Scalar color = colors[i%8];
+            int radius;
+            center.x = cvRound((r->x + r->width*0.5)*scale);
+            center.y = cvRound((r->y + r->height*0.5)*scale);
+            radius = cvRound((r->width + r->height)*0.25*scale);
+            circle( img, center, radius, color, 3, 8, 0 );
+        }  
+
+#if WIN32
+        sprintf(buff,"E:\\result1\\%d.jpg",index);
+        imwrite(buff,img);
+#else 
+        sprintf(buff,"testdet_%d.jpg",index);
+        imwrite(buff,img);
+#endif
+    }
+}
+
+
+//INSTANTIATE_TEST_CASE_P(HaarTestBase, Haar, Combine(Values(1),
+//            Values(1)));
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_imgproc.cpp b/modules/ocl/perf/test_imgproc.cpp
new file mode 100644
index 0000000000..e01e976817
--- /dev/null
+++ b/modules/ocl/perf/test_imgproc.cpp
@@ -0,0 +1,1551 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Niko Li, newlife20080214@gmail.com
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Shengen Yan, yanshengen@gmail.com
+//    Jiang Liyuan, lyuan001.good@163.com
+//    Rock Li, Rock.Li@amd.com
+//    Zailong Wu, bullet@yeah.net
+//    Xu Pang, pangxu010@163.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+
+MatType nulltype = -1;
+
+#define ONE_TYPE(type)  testing::ValuesIn(typeVector(type))
+#define NULL_TYPE  testing::ValuesIn(typeVector(nulltype))
+
+
+vector<MatType> typeVector(MatType type)
+{
+	vector<MatType> v;
+	v.push_back(type);
+	return v;
+}
+
+
+PARAM_TEST_CASE(ImgprocTestBase, MatType,MatType,MatType,MatType,MatType, bool)
+{
+	int type1,type2,type3,type4,type5;
+	cv::Scalar val;
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int src2x;
+	int src2y;
+	int dstx;
+	int dsty;
+	int dst1x;
+	int dst1y;
+	int maskx;
+	int masky;
+
+	//mat
+	cv::Mat mat1; 
+	cv::Mat mat2;
+	cv::Mat mask;
+	cv::Mat dst;
+	cv::Mat dst1; //bak, for two outputs
+
+	//mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat mat2_roi;
+	cv::Mat mask_roi;
+	cv::Mat dst_roi;
+	cv::Mat dst1_roi; //bak
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl mat
+	cv::ocl::oclMat clmat1;
+	cv::ocl::oclMat clmat2;
+	cv::ocl::oclMat clmask;
+	cv::ocl::oclMat cldst;
+	cv::ocl::oclMat cldst1; //bak
+
+	//ocl mat with roi
+	cv::ocl::oclMat clmat1_roi;
+	cv::ocl::oclMat clmat2_roi;
+	cv::ocl::oclMat clmask_roi;
+	cv::ocl::oclMat cldst_roi;
+	cv::ocl::oclMat cldst1_roi;
+
+	virtual void SetUp()
+	{
+		type1 = GET_PARAM(0);
+		type2 = GET_PARAM(1);
+		type3 = GET_PARAM(2);
+		type4 = GET_PARAM(3);
+		type5 = GET_PARAM(4);
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+		double min = 1,max = 20; 
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums>0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+		if(type1!=nulltype)
+		{
+			mat1 = randomMat(rng, size, type1, min, max, false);
+			clmat1 = mat1;
+		}
+		if(type2!=nulltype)
+		{
+			mat2 = randomMat(rng, size, type2, min, max, false);
+			clmat2 = mat2;
+		}
+		if(type3!=nulltype)
+		{
+			dst  = randomMat(rng, size, type3, min, max, false);
+			cldst = dst;
+		}
+		if(type4!=nulltype)
+		{
+			dst1 = randomMat(rng, size, type4, min, max, false);
+			cldst1 = dst1;
+		}
+		if(type5!=nulltype)
+		{
+			mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+			cv::threshold(mask, mask, 0.5, 255., type5);
+			clmask = mask;
+		}
+		val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+	}
+
+
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat1.cols-1; //start
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src2x   = 1;
+			src1y   = 1;
+			src2y   = 1;
+			dstx    = 1;
+			dsty    =1;
+			dst1x    = 1;
+			dst1y    =1;
+			maskx	 =1;
+			masky	=1;
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src2x = 0;
+			src1y = 0;
+			src2y = 0;
+			dstx = 0;
+			dsty = 0;
+			dst1x  =0;
+			dst1y  =0;
+			maskx	 =0;
+			masky	=0;
+		};
+
+		if(type1!=nulltype)
+		{
+			mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+			//clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+		}
+		if(type2!=nulltype)
+		{
+			mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+			//clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+		}
+		if(type3!=nulltype)
+		{
+			dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+			//cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+		}
+		if(type4!=nulltype)
+		{
+			dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+			//cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+		}
+		if(type5!=nulltype)
+		{
+			mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+			//clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+		}
+	}
+
+	void random_roi()
+	{
+		cv::RNG& rng = TS::ptr()->get_rng();
+
+		//randomize ROI
+		roicols = rng.uniform(1, mat1.cols);
+		roirows = rng.uniform(1, mat1.rows);
+		src1x   = rng.uniform(0, mat1.cols - roicols);
+		src1y   = rng.uniform(0, mat1.rows - roirows);
+		src2x   = rng.uniform(0, mat2.cols - roicols);
+		src2y   = rng.uniform(0, mat2.rows - roirows);
+		dstx    = rng.uniform(0, dst.cols  - roicols);
+		dsty    = rng.uniform(0, dst.rows  - roirows);
+		dst1x    = rng.uniform(0, dst1.cols  - roicols);
+		dst1y    = rng.uniform(0, dst1.rows  - roirows);
+		maskx   = rng.uniform(0, mask.cols - roicols);
+		masky   = rng.uniform(0, mask.rows - roirows);
+
+		if(type1!=nulltype)
+		{
+			mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+			//clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+		}
+		if(type2!=nulltype)
+		{
+			mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+			//clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+		}
+		if(type3!=nulltype)
+		{
+			dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+			//cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+		}
+		if(type4!=nulltype)
+		{
+			dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+			//cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+		}
+		if(type5!=nulltype)
+		{
+			mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+			//clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+		}
+	}
+};
+////////////////////////////////equalizeHist//////////////////////////////////////////
+
+struct equalizeHist : ImgprocTestBase {};
+
+TEST_P(equalizeHist, MatType) 
+{ 
+	if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+	{
+		cout<<"Unsupported type"<<endl;
+		EXPECT_DOUBLE_EQ(0.0, 0.0);
+	}
+	else
+	{
+#ifndef PRINT_KERNEL_RUN_TIME   
+		double totalcputick=0;
+		double totalgputick=0;
+		double totalgputick_kernel=0;
+		double t0=0;
+		double t1=0;
+		double t2=0;	
+		for(int k=0;k<2;k++){
+			totalcputick=0;
+			totalgputick=0;
+			totalgputick_kernel=0;
+			for(int j = 0; j < LOOP_TIMES+1; j ++)
+			{
+				Has_roi(k);       
+
+				t0 = (double)cvGetTickCount();//cpu start
+				cv::equalizeHist(mat1_roi, dst_roi);
+				t0 = (double)cvGetTickCount() - t0;//cpu end
+
+				t1 = (double)cvGetTickCount();//gpu start1		
+				if(type1!=nulltype)
+				{
+					clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+				}
+				cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+				t2=(double)cvGetTickCount();//kernel
+				cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+				t2 = (double)cvGetTickCount() - t2;//kernel
+				cv::Mat cpu_cldst;
+				//cldst.download(cpu_cldst);//download
+				t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+				if(j == 0)
+					continue;
+
+				totalgputick=t1+totalgputick;
+				totalcputick=t0+totalcputick;	
+				totalgputick_kernel=t2+totalgputick_kernel;	
+
+			}
+			if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+			cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+			cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+			cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		}
+#else
+		for(int j = 0; j < 2; j ++)
+		{
+			Has_roi(j);
+			if(type1!=nulltype)
+			{
+				clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+			}
+			if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+			cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+		};
+#endif
+	}
+}
+
+
+////////////////////////////////bilateralFilter////////////////////////////////////////////
+
+struct bilateralFilter : ImgprocTestBase {};
+
+TEST_P(bilateralFilter, Mat) 
+{    
+	double sigmacolor = 50.0;
+	int radius = 9;
+	int d = 2*radius+1;
+	double sigmaspace = 20.0;
+	int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+	//const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+	if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+	{
+		cout<<"Unsupported type"<<endl;
+		EXPECT_DOUBLE_EQ(0.0, 0.0);
+	}
+	else
+	{
+		for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
+#ifndef PRINT_KERNEL_RUN_TIME   
+			double totalcputick=0;
+			double totalgputick=0;
+			double totalgputick_kernel=0;
+			double t0=0;
+			double t1=0;
+			double t2=0;	
+			for(int k=0;k<2;k++){
+				totalcputick=0;
+				totalgputick=0;
+				totalgputick_kernel=0;
+				for(int j = 0; j < LOOP_TIMES+1; j ++)
+				{
+					Has_roi(k);       
+
+					t0 = (double)cvGetTickCount();//cpu start
+					cv::bilateralFilter(mat1_roi, dst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+					t0 = (double)cvGetTickCount() - t0;//cpu end
+
+					t1 = (double)cvGetTickCount();//gpu start1		
+					if(type1!=nulltype)
+					{
+						clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+					}
+					t2=(double)cvGetTickCount();//kernel
+					cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+					t2 = (double)cvGetTickCount() - t2;//kernel
+					cv::Mat cpu_cldst;
+					cldst.download(cpu_cldst);//download
+					t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+					if(j == 0)
+						continue;
+
+					totalgputick=t1+totalgputick;
+					totalcputick=t0+totalcputick;	
+					totalgputick_kernel=t2+totalgputick_kernel;	
+
+				}
+				if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+				cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+				cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+				cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+			}
+
+#else
+			for(int j = 0; j < 2; j ++)
+			{
+				Has_roi(j);
+				if(type1!=nulltype)
+				{
+					clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+				};
+				if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+				cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+			};
+
+#endif
+		};
+
+	}
+}
+
+////////////////////////////////copyMakeBorder////////////////////////////////////////////
+
+struct CopyMakeBorder : ImgprocTestBase {};
+
+TEST_P(CopyMakeBorder, Mat) 
+{    
+	int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+	//const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+
+	if ((mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1) || mat1.type() != dst.type())
+	{
+		cout<<"Unsupported type"<<endl;
+		EXPECT_DOUBLE_EQ(0.0, 0.0);
+	}
+	else
+	{
+		for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
+#ifndef PRINT_KERNEL_RUN_TIME   
+			double totalcputick=0;
+			double totalgputick=0;
+			double totalgputick_kernel=0;
+			double t0=0;
+			double t1=0;
+			double t2=0;	
+			for(int k=0;k<2;k++){
+				totalcputick=0;
+				totalgputick=0;
+				totalgputick_kernel=0;
+				for(int j = 0; j < LOOP_TIMES+1; j ++)
+				{
+					Has_roi(k);       
+
+					t0 = (double)cvGetTickCount();//cpu start
+					cv::copyMakeBorder(mat1_roi, dst_roi, 7,5,5,7, bordertype[i],cv::Scalar(1.0));
+					t0 = (double)cvGetTickCount() - t0;//cpu end
+
+					t1 = (double)cvGetTickCount();//gpu start1		
+					if(type1!=nulltype)
+					{
+						clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+					}
+					t2=(double)cvGetTickCount();//kernel
+					cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,7,5,5,7,  bordertype[i],cv::Scalar(1.0));
+					t2 = (double)cvGetTickCount() - t2;//kernel
+					cv::Mat cpu_cldst;
+					cldst.download(cpu_cldst);//download
+					t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+					if(j == 0)
+						continue;
+
+					totalgputick=t1+totalgputick;
+					totalcputick=t0+totalcputick;	
+					totalgputick_kernel=t2+totalgputick_kernel;	
+
+				}
+				if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+				cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+				cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+				cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+			}
+#else
+			for(int j = 0; j < 2; j ++)
+			{
+				Has_roi(j);
+				if(type1!=nulltype)
+				{
+					clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+				};
+				if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+				cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,7,5,5,7,  bordertype[i],cv::Scalar(1.0));
+			};
+#endif
+		};
+	}
+}
+
+////////////////////////////////cornerMinEigenVal//////////////////////////////////////////
+
+struct cornerMinEigenVal : ImgprocTestBase {};
+
+TEST_P(cornerMinEigenVal, Mat) 
+{    	
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
+			int borderType = cv::BORDER_REFLECT;
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType); 
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			if(type1!=nulltype)
+			{
+				clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+			}
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_cldst;
+			cldst.download(cpu_cldst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
+		int borderType = cv::BORDER_REFLECT;
+		if(type1!=nulltype)
+		{
+			clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+		};
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+	};
+#endif
+}
+
+
+////////////////////////////////cornerHarris//////////////////////////////////////////
+
+struct cornerHarris : ImgprocTestBase {};
+
+TEST_P(cornerHarris, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);   
+			int blockSize = 7, apertureSize= 3;
+			int borderType = cv::BORDER_REFLECT;
+			double kk = 2;
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType); 
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			if(type1!=nulltype)
+			{
+				clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+			}
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_cldst;
+			cldst.download(cpu_cldst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		double kk = 2;
+		int blockSize = 7, apertureSize= 3;
+		int borderType = cv::BORDER_REFLECT;
+		if(type1!=nulltype)
+		{
+			clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+		};
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+	};
+#endif
+
+}
+
+
+////////////////////////////////integral/////////////////////////////////////////////////
+
+struct integral : ImgprocTestBase {};
+
+TEST_P(integral, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);   
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::integral(mat1_roi, dst_roi, dst1_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			if(type1!=nulltype)
+			{
+				clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+			}
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_cldst;
+			cv::Mat cpu_cldst1;
+			cldst.download(cpu_cldst);//download
+			cldst1.download(cpu_cldst1);
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		if(type1!=nulltype)
+		{
+			clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+		};
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+	};
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// warpAffine  & warpPerspective
+
+PARAM_TEST_CASE(WarpTestBase, MatType, int)
+{
+	int type;
+	cv::Size size;
+	int interpolation;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int src_roicols;
+	int src_roirows;
+	int dst_roicols;
+	int dst_roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		//dsize = GET_PARAM(1);
+		interpolation = GET_PARAM(1);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		size = cv::Size(MWIDTH, MHEIGHT);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			src_roicols =  mat1.cols-1; //start
+			src_roirows = mat1.rows-1;
+			dst_roicols=dst.cols-1;
+			dst_roirows=dst.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+
+		}else
+		{
+			src_roicols = mat1.cols;
+			src_roirows = mat1.rows;
+			dst_roicols=dst.cols;
+			dst_roirows=dst.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+
+		};
+		mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
+		dst_roi  = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+
+	}
+
+};
+
+/////warpAffine
+
+struct WarpAffine : WarpTestBase{};
+
+TEST_P(WarpAffine, Mat)
+{
+	static const double coeffs[2][3] =
+	{
+		{cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+		{sin(3.14 / 6), cos(3.14 / 6), -100.0}
+	};
+	Mat M(2, 3, CV_64F, (void*)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+	};
+#endif
+
+}
+
+
+// warpPerspective
+
+struct WarpPerspective : WarpTestBase{};
+
+TEST_P(WarpPerspective, Mat)
+{
+	static const double coeffs[3][3] =
+	{
+		{cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+		{sin(3.14 / 6), cos(3.14 / 6), -100.0},
+		{0.0, 0.0, 1.0}
+	};
+	Mat M(3, 3, CV_64F, (void*)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+	};
+#endif
+
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// resize
+
+PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
+{
+	int type;
+	cv::Size dsize;
+	double fx, fy;
+	int interpolation;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int src_roicols;
+	int src_roirows;
+	int dst_roicols;
+	int dst_roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		dsize = GET_PARAM(1);
+		fx = GET_PARAM(2);
+		fy = GET_PARAM(3);
+		interpolation = GET_PARAM(4);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		if(dsize == cv::Size() && !(fx > 0 && fy > 0))
+		{
+			cout << "invalid dsize and fx fy" << endl;
+			return;
+		}
+
+		if(dsize == cv::Size()) 
+		{
+			dsize.width = (int)(size.width * fx);
+			dsize.height = (int)(size.height * fy);
+		}
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, dsize, type, 5, 16, false);
+
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			src_roicols =  mat1.cols-1; //start
+			src_roirows = mat1.rows-1;
+			dst_roicols=dst.cols-1;
+			dst_roirows=dst.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+
+		}else
+		{
+			src_roicols = mat1.cols;
+			src_roirows = mat1.rows;
+			dst_roicols=dst.cols;
+			dst_roirows=dst.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+
+		};
+		mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
+		dst_roi  = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+
+	}
+
+};
+
+TEST_P(Resize, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+		gmat1 = mat1_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+	};
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+//threshold 
+
+PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
+{
+	int type;
+	int threshOp;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		threshOp = GET_PARAM(1);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat1 = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat1.cols-1; //start
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			dstx    = 1;
+			dsty    =1;
+
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x = 0;
+			src1y = 0;
+			dstx = 0;
+			dsty = 0;
+
+		};
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+
+	}
+};
+
+TEST_P(Threshold, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			double maxVal = randomDouble(20.0, 127.0);
+			double thresh = randomDouble(0.0, maxVal);
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+			gmat1 = mat1_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		double maxVal = randomDouble(20.0, 127.0);
+		double thresh = randomDouble(0.0, maxVal);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		gmat1 = mat1_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
+	};
+#endif
+
+}
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//meanShift
+
+PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
+{
+	int type, typeCoor;
+	int sp, sr;
+	cv::TermCriteria crit;
+	//src mat
+	cv::Mat src;
+	cv::Mat dst;
+	cv::Mat dstCoor;
+
+	//set up roi
+	int roicols;
+	int roirows;
+	int srcx;
+	int srcy;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat src_roi;
+	cv::Mat dst_roi;
+	cv::Mat dstCoor_roi;
+
+	//ocl dst mat
+	cv::ocl::oclMat gdst;
+	cv::ocl::oclMat gdstCoor;
+
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl mat with roi
+	cv::ocl::oclMat gsrc_roi;
+	cv::ocl::oclMat gdst_roi;
+	cv::ocl::oclMat gdstCoor_roi;
+
+	virtual void SetUp()
+	{
+		type     = GET_PARAM(0);
+		typeCoor = GET_PARAM(1);
+		sp       = GET_PARAM(2);
+		sr       = GET_PARAM(3);
+		crit     = GET_PARAM(4);
+
+		cv::RNG &rng = TS::ptr()->get_rng();
+
+		// MWIDTH=256, MHEIGHT=256. defined in utility.hpp
+		cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+		src = randomMat(rng, size, type, 5, 16, false);
+		dst = randomMat(rng, size, type, 5, 16, false);
+		dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
+
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		cv::ocl::setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		if(b)
+		{
+			//randomize ROI
+			roicols = src.cols - 1;
+			roirows = src.rows - 1;
+			srcx = 1;
+			srcy = 1;
+			dstx = 1;
+			dsty = 1;
+		}else
+		{
+			roicols = src.cols;
+			roirows = src.rows;
+			srcx = 0;
+			srcy = 0;
+			dstx = 0;
+			dsty = 0;
+		};
+
+		src_roi = src(Rect(srcx, srcy, roicols, roirows));
+		dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+		dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows));
+
+		gdst = dst;
+		gdstCoor = dstCoor;
+	}
+};
+
+/////////////////////////meanShiftFiltering/////////////////////////////
+struct meanShiftFiltering : meanShiftTestBase {};
+
+TEST_P(meanShiftFiltering, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++)
+	{
+		double totalgputick=0;
+		double totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t1 = (double)cvGetTickCount();//gpu start1	
+
+			gsrc_roi = src_roi;
+			gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+
+			cv::Mat cpu_gdst;
+			gdst.download(cpu_gdst);//download
+
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+
+		gsrc_roi = src_roi;
+		gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+	};
+#endif
+
+}
+
+///////////////////////////meanShiftProc//////////////////////////////////
+struct meanShiftProc : meanShiftTestBase {};
+
+TEST_P(meanShiftProc, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++)
+	{
+		double totalgputick=0;
+		double totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+
+			gsrc_roi = src_roi;
+			gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+			gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+
+			cv::Mat cpu_gdstCoor;
+			gdstCoor.download(cpu_gdstCoor);//download
+
+			t1 = (double)cvGetTickCount() - t1;//gpu end1	
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+
+		gsrc_roi = src_roi;
+		gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+		gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+	};
+#endif
+
+}
+
+
+
+//************test*******************
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
+						ONE_TYPE(CV_8UC1),
+						NULL_TYPE,
+						ONE_TYPE(CV_8UC1),
+						NULL_TYPE,
+						NULL_TYPE,
+						Values(false))); // Values(false) is the reserved parameter
+
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
+//	ONE_TYPE(CV_8UC1),
+//	NULL_TYPE,
+//	ONE_TYPE(CV_8UC1),
+//	NULL_TYPE,
+//	NULL_TYPE,
+//	Values(false))); // Values(false) is the reserved parameter
+//
+//
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
+//	Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/),
+//	NULL_TYPE,
+//	Values(CV_8UC1,CV_8UC4/*,CV_32SC1*/),
+//	NULL_TYPE,
+//	NULL_TYPE,
+//	Values(false))); // Values(false) is the reserved parameter
+
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
+//	Values(CV_8UC1,CV_32FC1),
+//	NULL_TYPE,
+//	ONE_TYPE(CV_32FC1),
+//	NULL_TYPE,
+//	NULL_TYPE,
+//	Values(false))); // Values(false) is the reserved parameter
+//
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
+//	Values(CV_8UC1,CV_32FC1),
+//	NULL_TYPE,
+//	ONE_TYPE(CV_32FC1),
+//	NULL_TYPE,
+//	NULL_TYPE,
+//	Values(false))); // Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
+						ONE_TYPE(CV_8UC1),
+						NULL_TYPE,
+						ONE_TYPE(CV_32SC1),
+						ONE_TYPE(CV_32FC1),
+						NULL_TYPE,
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+						(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+						(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
+						(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+						(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+						(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),  Values(cv::Size()),
+						Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
+						Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY),
+						ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC),
+						ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))));
+
+INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
+						ONE_TYPE(CV_8UC4),
+						ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering
+						Values(5),
+						Values(6),
+						Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
+						));
+
+INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
+						ONE_TYPE(CV_8UC4),
+						ONE_TYPE(CV_16SC2),
+						Values(5),
+						Values(6),
+						Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1))
+						));
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_matrix_operation.cpp b/modules/ocl/perf/test_matrix_operation.cpp
new file mode 100644
index 0000000000..cc9a142a6e
--- /dev/null
+++ b/modules/ocl/perf/test_matrix_operation.cpp
@@ -0,0 +1,616 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv::ocl;
+////////////////////////////////converto/////////////////////////////////////////////////
+PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
+{
+	int type;
+	int dst_type;
+
+	//src mat
+	cv::Mat mat; 
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int srcx;
+	int srcy;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type     = GET_PARAM(0);
+		dst_type = GET_PARAM(1);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat.cols-1; //start
+			roirows = mat.rows-1;
+			srcx   = 1;
+			srcy   = 1;
+			dstx    = 1;
+			dsty    =1;
+		}else
+		{
+			roicols = mat.cols;
+			roirows = mat.rows;
+			srcx   = 0;
+			srcy   = 0;
+			dstx   = 0;
+			dsty   = 0;
+		};
+
+		mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+		//gdst_whole = dst;
+		//gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		//gmat = mat_roi;
+	}
+};
+
+
+struct ConvertTo :ConvertToTestBase {};
+
+TEST_P(ConvertTo, Accuracy) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			mat_roi.convertTo(dst_roi, dst_type);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat = mat_roi;
+			t2=(double)cvGetTickCount();//kernel
+			gmat.convertTo(gdst, dst_type);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		gmat = mat_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		gmat.convertTo(gdst, dst_type);
+	};
+#endif
+
+}
+
+
+///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
+
+PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
+{
+	int type;
+
+	cv::Mat mat; 
+	cv::Mat mask;
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int srcx;
+	int srcy;
+	int dstx;
+	int dsty;
+	int maskx;
+	int masky;
+
+	//src mat with roi
+	cv::Mat mat_roi;
+	cv::Mat mask_roi;
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat;
+	cv::ocl::oclMat gdst;
+	cv::ocl::oclMat gmask;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat = randomMat(rng, size, type, 5, 16, false);
+		dst  = randomMat(rng, size, type, 5, 16, false);
+		mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+		cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat.cols-1; //start
+			roirows = mat.rows-1;
+			srcx   = 1;
+			srcy   = 1;
+			dstx    = 1;
+			dsty    =1;
+			maskx   = 1;
+			masky   = 1;
+		}else
+		{
+			roicols = mat.cols;
+			roirows = mat.rows;
+			srcx   = 0;
+			srcy   = 0;
+			dstx   = 0;
+			dsty   = 0;
+			maskx   = 0;
+			masky   = 0;
+		};
+
+		mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+		mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+		dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+		//gdst_whole = dst;
+		//gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		//gmat = mat_roi;
+		//gmask = mask_roi;
+	}
+};
+
+struct CopyTo :CopyToTestBase {};
+
+TEST_P(CopyTo, Without_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			mat_roi.copyTo(dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat = mat_roi;
+			t2=(double)cvGetTickCount();//kernel
+			gmat.copyTo(gdst);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		gmat = mat_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		gmat.copyTo(gdst);
+	};
+#endif
+}
+
+TEST_P(CopyTo, With_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			mat_roi.copyTo(dst_roi,mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+			gmat = mat_roi;
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			gmat.copyTo(gdst, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+		gmat = mat_roi;
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		gmat.copyTo(gdst, gmask);
+	};
+#endif
+}
+
+///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
+
+PARAM_TEST_CASE(SetToTestBase, MatType, bool)
+{
+	int type;
+	cv::Scalar val;
+
+	cv::Mat mat; 
+	cv::Mat mask;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int srcx;
+	int srcy;
+	int maskx;
+	int masky;
+
+	//src mat with roi
+	cv::Mat mat_roi;
+	cv::Mat mask_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gmat_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat;
+	cv::ocl::oclMat gmask;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat = randomMat(rng, size, type, 5, 16, false);
+		mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+		cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+		val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat.cols-1; //start
+			roirows = mat.rows-1;
+			srcx   = 1;
+			srcy   = 1;
+			maskx   = 1;
+			masky   = 1;
+		}else
+		{
+			roicols = mat.cols;
+			roirows = mat.rows;
+			srcx   = 0;
+			srcy   = 0;
+			maskx   = 0;
+			masky   = 0;
+		};
+
+		mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+		mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+
+		//gmat_whole = mat;
+		//gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+		//gmask = mask_roi;
+	}
+};
+
+struct SetTo :SetToTestBase {};
+
+TEST_P(SetTo, Without_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			mat_roi.setTo(val);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat_whole = mat;
+			gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+			t2=(double)cvGetTickCount();//kernel
+			gmat.setTo(val);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gmat_whole.download(cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat_whole = mat;
+		gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		gmat.setTo(val);
+	};
+#endif
+}
+
+TEST_P(SetTo, With_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+
+			t0 = (double)cvGetTickCount();//cpu start
+			mat_roi.setTo(val, mask_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gmat_whole = mat;
+			gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+			gmask = mask_roi;
+			t2=(double)cvGetTickCount();//kernel
+			gmat.setTo(val, gmask);
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gmat_whole.download(cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat_whole = mat;
+		gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+		gmask = mask_roi;
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		gmat.setTo(val, gmask);
+	};
+#endif
+}
+
+//**********test************	
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)));
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
+						Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+						Values(false))); // Values(false) is the reserved parameter
+#endif
diff --git a/modules/ocl/perf/test_split_merge.cpp b/modules/ocl/perf/test_split_merge.cpp
new file mode 100644
index 0000000000..e3e8ee445c
--- /dev/null
+++ b/modules/ocl/perf/test_split_merge.cpp
@@ -0,0 +1,455 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv::ocl;
+PARAM_TEST_CASE(MergeTestBase, MatType, int)
+{
+	int type;
+	int channels;
+
+	//src mat
+	cv::Mat mat1; 
+	cv::Mat mat2;
+	cv::Mat mat3;
+	cv::Mat mat4;
+
+	//dst mat
+	cv::Mat dst;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int src1x;
+	int src1y;
+	int src2x;
+	int src2y;
+	int src3x;
+	int src3y;
+	int src4x;
+	int src4y;
+	int dstx;
+	int dsty;
+
+	//src mat with roi
+	cv::Mat mat1_roi;
+	cv::Mat mat2_roi;
+	cv::Mat mat3_roi;
+	cv::Mat mat4_roi;
+
+	//dst mat with roi
+	cv::Mat dst_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat1;
+	cv::ocl::oclMat gmat2;
+	cv::ocl::oclMat gmat3;
+	cv::ocl::oclMat gmat4;
+	cv::ocl::oclMat gdst;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		channels = GET_PARAM(1);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		dst  = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		setBinpath(CLBINPATH);
+	}
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat1.cols-1; //start
+			roirows = mat1.rows-1;
+			src1x   = 1;
+			src1y   = 1;
+			src2x   = 1;
+			src2y   = 1;
+			src3x   = 1;
+			src3y   = 1;
+			src4x   = 1;
+			src4y   = 1;
+			dstx    = 1;
+			dsty    =1;
+
+		}else
+		{
+			roicols = mat1.cols;
+			roirows = mat1.rows;
+			src1x   = 0;
+			src1y   = 0;
+			src2x   = 0;
+			src2y   = 0;
+			src3x   = 0;
+			src3y   = 0;
+			src4x   = 0;
+			src4y   = 0;
+			dstx    = 0;
+			dsty    = 0;
+		};
+
+		mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+		mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+		mat3_roi = mat3(Rect(src3x,src3y,roicols,roirows));
+		mat4_roi = mat4(Rect(src4x,src4y,roicols,roirows));
+
+
+		dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+	}
+
+};
+
+struct Merge : MergeTestBase {};
+
+TEST_P(Merge, Accuracy) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			std::vector<cv::Mat> dev_src;
+			dev_src.push_back(mat1_roi);
+			dev_src.push_back(mat2_roi);
+			dev_src.push_back(mat3_roi);
+			dev_src.push_back(mat4_roi);   
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::merge(dev_src, dst_roi);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1	]
+			gmat1 = mat1_roi;
+			gmat2 = mat2_roi;
+			gmat3 = mat3_roi;
+			gmat4 = mat4_roi;
+			gdst_whole = dst;
+			gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+			std::vector<cv::ocl::oclMat> dev_gsrc;
+			dev_gsrc.push_back(gmat1);
+			dev_gsrc.push_back(gmat2);
+			dev_gsrc.push_back(gmat3);
+			dev_gsrc.push_back(gmat4);
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::merge(dev_gsrc, gdst); 
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst;
+			gdst_whole.download (cpu_dst);//download
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+
+			if(j == 0)
+				continue;
+
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		gmat1 = mat1_roi;
+		gmat2 = mat2_roi;
+		gmat3 = mat3_roi;
+		gmat4 = mat4_roi;
+		gdst_whole = dst;
+		gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+		std::vector<cv::ocl::oclMat> dev_gsrc;
+		dev_gsrc.push_back(gmat1);
+		dev_gsrc.push_back(gmat2);
+		dev_gsrc.push_back(gmat3);
+		dev_gsrc.push_back(gmat4);
+
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::merge(dev_gsrc, gdst); 
+	};
+#endif
+}
+
+
+PARAM_TEST_CASE(SplitTestBase, MatType, int)
+{
+	int type;
+	int channels;
+
+	//src mat
+	cv::Mat mat; 
+
+	//dstmat
+	cv::Mat dst1;
+	cv::Mat dst2;
+	cv::Mat dst3;
+	cv::Mat dst4;
+
+	// set up roi
+	int roicols;
+	int roirows;
+	int srcx;
+	int srcy;
+	int dst1x;
+	int dst1y;
+	int dst2x;
+	int dst2y;
+	int dst3x;
+	int dst3y;
+	int dst4x;
+	int dst4y;
+
+	//src mat with roi
+	cv::Mat mat_roi;
+
+	//dst mat with roi
+	cv::Mat dst1_roi;
+	cv::Mat dst2_roi;
+	cv::Mat dst3_roi;
+	cv::Mat dst4_roi;
+	std::vector<cv::ocl::Info> oclinfo;
+	//ocl dst mat for testing
+	cv::ocl::oclMat gdst1_whole;
+	cv::ocl::oclMat gdst2_whole;
+	cv::ocl::oclMat gdst3_whole;
+	cv::ocl::oclMat gdst4_whole;
+
+	//ocl mat with roi
+	cv::ocl::oclMat gmat;
+	cv::ocl::oclMat gdst1;
+	cv::ocl::oclMat gdst2;
+	cv::ocl::oclMat gdst3;
+	cv::ocl::oclMat gdst4;
+
+	virtual void SetUp()
+	{
+		type = GET_PARAM(0);
+		channels = GET_PARAM(1);
+
+		cv::RNG& rng = TS::ptr()->get_rng();
+		cv::Size size(MWIDTH, MHEIGHT);
+
+		mat  = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+		dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+		int devnums = getDevice(oclinfo);
+		CV_Assert(devnums > 0);
+		//if you want to use undefault device, set it here
+		//setDevice(oclinfo[0]);
+		setBinpath(CLBINPATH);
+	}
+
+	void Has_roi(int b)
+	{
+		//cv::RNG& rng = TS::ptr()->get_rng();
+		if(b)
+		{
+			//randomize ROI
+			roicols =  mat.cols-1; //start
+			roirows = mat.rows-1;
+			srcx   = 1;
+			srcx   = 1;
+			dst1x    = 1;
+			dst1y    =1;
+			dst2x    = 1;
+			dst2y    =1;
+			dst3x    = 1;
+			dst3y    =1;
+			dst4x    = 1;
+			dst4y    =1;
+		}else
+		{
+			roicols = mat.cols;
+			roirows = mat.rows;
+			srcx = 0;
+			srcy = 0;
+			dst1x = 0;
+			dst1y = 0;
+			dst2x    = 0;
+			dst2y    =0;
+			dst3x    = 0;
+			dst3y    =0;
+			dst4x    = 0;
+			dst4y    =0;
+		};
+
+		mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+
+		dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+		dst2_roi = dst2(Rect(dst2x,dst2y,roicols,roirows));
+		dst3_roi = dst3(Rect(dst3x,dst3y,roicols,roirows));
+		dst4_roi = dst4(Rect(dst4x,dst4y,roicols,roirows));
+	}
+
+};
+
+struct Split :SplitTestBase {};
+
+TEST_P(Split, Accuracy) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+	double totalcputick=0;
+	double totalgputick=0;
+	double totalgputick_kernel=0;
+	double t0=0;
+	double t1=0;
+	double t2=0;	
+	for(int k=0;k<2;k++){
+		totalcputick=0;
+		totalgputick=0;
+		totalgputick_kernel=0;
+		for(int j = 0; j < LOOP_TIMES+1; j ++)
+		{
+			Has_roi(k);       
+			cv::Mat         dev_dst[4]  = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+			cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+			t0 = (double)cvGetTickCount();//cpu start
+			cv::split(mat_roi, dev_dst);
+			t0 = (double)cvGetTickCount() - t0;//cpu end
+
+			t1 = (double)cvGetTickCount();//gpu start1		
+			gdst1_whole = dst1;
+			gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
+
+			gdst2_whole = dst2;
+			gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
+
+			gdst3_whole = dst3;
+			gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
+
+			gdst4_whole = dst4;
+			gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
+
+			gmat = mat_roi;
+			t2=(double)cvGetTickCount();//kernel
+			cv::ocl::split(gmat, dev_gdst); 
+			t2 = (double)cvGetTickCount() - t2;//kernel
+			cv::Mat cpu_dst1;
+			cv::Mat cpu_dst2;
+			cv::Mat cpu_dst3;
+			cv::Mat cpu_dst4;
+			gdst1_whole.download(cpu_dst1);
+			gdst2_whole.download(cpu_dst2);
+			gdst3_whole.download(cpu_dst3);
+			gdst4_whole.download(cpu_dst4);
+			t1 = (double)cvGetTickCount() - t1;//gpu end1		
+			if(j == 0)
+				continue;
+			totalgputick=t1+totalgputick;
+			totalcputick=t0+totalcputick;	
+			totalgputick_kernel=t2+totalgputick_kernel;	
+
+		}
+		if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+		cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+		cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+	}
+#else
+	for(int j = 0; j < 2; j ++)
+	{
+		Has_roi(j);
+		cv::Mat         dev_dst[4]  = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+		cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+		gdst1_whole = dst1;
+		gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
+
+		gdst2_whole = dst2;
+		gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
+
+		gdst3_whole = dst3;
+		gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
+
+		gdst4_whole = dst4;
+		gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
+		gmat = mat_roi;
+		if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+		cv::ocl::split(gmat, dev_gdst); 
+	};
+#endif
+}
+
+//*************test*****************
+INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
+						Values(CV_8UC4, CV_32FC4), Values(1, 4)));
+
+INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
+						Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));     
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/utility.cpp b/modules/ocl/perf/utility.cpp
new file mode 100644
index 0000000000..417f72f056
--- /dev/null
+++ b/modules/ocl/perf/utility.cpp
@@ -0,0 +1,265 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#define VARNAME(A) #A
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+
+
+//std::string generateVarList(int first,...)
+//{
+//	vector<std::string> varname;
+//
+//	va_list argp;
+//	string s;
+//	stringstream ss;
+//	va_start(argp,first);
+//	int i=first;
+//	while(i!=-1)
+//	{
+//		ss<<i<<",";
+//		i=va_arg(argp,int);
+//	};
+//	s=ss.str();
+//	va_end(argp);
+//	return s;
+//};
+
+//std::string generateVarList(int& p1,int& p2)
+//{
+//	stringstream ss;
+//	ss<<VARNAME(p1)<<":"<<src1x<<","<<VARNAME(p2)<<":"<<src1y;
+//	return ss.str();
+//};
+
+int randomInt(int minVal, int maxVal)
+{
+    RNG& rng = TS::ptr()->get_rng();
+    return rng.uniform(minVal, maxVal);
+}
+
+double randomDouble(double minVal, double maxVal)
+{
+    RNG& rng = TS::ptr()->get_rng();
+    return rng.uniform(minVal, maxVal);
+}
+
+Size randomSize(int minVal, int maxVal)
+{
+    return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal));
+}
+
+Scalar randomScalar(double minVal, double maxVal)
+{
+    return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal));
+}
+
+Mat randomMat(Size size, int type, double minVal, double maxVal)
+{
+    return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
+}
+
+
+
+
+
+
+
+/*
+void showDiff(InputArray gold_, InputArray actual_, double eps)
+{
+    Mat gold;
+    if (gold_.kind() == _InputArray::MAT)
+        gold = gold_.getMat();
+    else
+        gold_.getGpuMat().download(gold);
+
+    Mat actual;
+    if (actual_.kind() == _InputArray::MAT)
+        actual = actual_.getMat();
+    else
+        actual_.getGpuMat().download(actual);
+
+    Mat diff;
+    absdiff(gold, actual, diff);
+    threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY);
+
+    namedWindow("gold", WINDOW_NORMAL);
+    namedWindow("actual", WINDOW_NORMAL);
+    namedWindow("diff", WINDOW_NORMAL);
+
+    imshow("gold", gold);
+    imshow("actual", actual);
+    imshow("diff", diff);
+
+    waitKey();
+}
+*/
+
+/*
+bool supportFeature(const DeviceInfo& info, FeatureSet feature)
+{
+    return TargetArchs::builtWith(feature) && info.supports(feature);
+}
+
+const vector<DeviceInfo>& devices()
+{
+    static vector<DeviceInfo> devs;
+    static bool first = true;
+
+    if (first)
+    {
+        int deviceCount = getCudaEnabledDeviceCount();
+
+        devs.reserve(deviceCount);
+
+        for (int i = 0; i < deviceCount; ++i)
+        {
+            DeviceInfo info(i);
+            if (info.isCompatible())
+                devs.push_back(info);
+        }
+
+        first = false;
+    }
+
+    return devs;
+}
+
+vector<DeviceInfo> devices(FeatureSet feature)
+{
+    const vector<DeviceInfo>& d = devices();
+    
+    vector<DeviceInfo> devs_filtered;
+
+    if (TargetArchs::builtWith(feature))
+    {
+        devs_filtered.reserve(d.size());
+
+        for (size_t i = 0, size = d.size(); i < size; ++i)
+        {
+            const DeviceInfo& info = d[i];
+
+            if (info.supports(feature))
+                devs_filtered.push_back(info);
+        }
+    }
+
+    return devs_filtered;
+}
+*/
+
+vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
+{
+    vector<MatType> v;
+
+    v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
+
+    for (int depth = depth_start; depth <= depth_end; ++depth)
+    {
+        for (int cn = cn_start; cn <= cn_end; ++cn)
+        {
+            v.push_back(CV_MAKETYPE(depth, cn));
+        }
+    }
+
+    return v;
+}
+
+const vector<MatType>& all_types()
+{
+    static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
+
+    return v;
+}
+
+Mat readImage(const string& fileName, int flags)
+{
+    return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
+}
+
+Mat readImageType(const string& fname, int type)
+{
+    Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
+    if (CV_MAT_CN(type) == 4)
+    {
+        Mat temp;
+        cvtColor(src, temp, cv::COLOR_BGR2BGRA);
+        swap(src, temp);
+    }
+    src.convertTo(src, CV_MAT_DEPTH(type));
+    return src;
+}
+
+double checkNorm(const Mat& m)
+{
+    return norm(m, NORM_INF);
+}
+
+double checkNorm(const Mat& m1, const Mat& m2)
+{
+    return norm(m1, m2, NORM_INF);
+}
+
+double checkSimilarity(const Mat& m1, const Mat& m2)
+{
+    Mat diff;
+    matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+    return std::abs(diff.at<float>(0, 0) - 1.f);
+}
+
+/*
+void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os)
+{
+    (*os) << info.name();
+}
+*/
+
+void PrintTo(const Inverse& inverse, std::ostream* os)
+{
+    if (inverse)
+        (*os) << "inverse";
+    else
+        (*os) << "direct";
+}
diff --git a/modules/ocl/perf/utility.hpp b/modules/ocl/perf/utility.hpp
new file mode 100644
index 0000000000..0a0bfba6d7
--- /dev/null
+++ b/modules/ocl/perf/utility.hpp
@@ -0,0 +1,177 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_UTILITY_HPP__
+#define __OPENCV_TEST_UTILITY_HPP__
+//#define PRINT_KERNEL_RUN_TIME
+#ifdef PRINT_KERNEL_RUN_TIME
+#define LOOP_TIMES 1
+#else
+#define LOOP_TIMES 1
+#endif
+#define MWIDTH 2557
+#define MHEIGHT 2579
+#define CLBINPATH ".\\"
+int randomInt(int minVal, int maxVal);
+double randomDouble(double minVal, double maxVal);
+
+//std::string generateVarList(int first,...);
+std::string generateVarList(int& p1,int& p2);
+cv::Size randomSize(int minVal, int maxVal);
+cv::Scalar randomScalar(double minVal, double maxVal);
+cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
+
+void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
+
+//! return true if device supports specified feature and gpu module was built with support the feature.
+//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+
+//! return all devices compatible with current gpu module build.
+//const std::vector<cv::ocl::DeviceInfo>& devices();
+//! return all devices compatible with current gpu module build which support specified feature.
+//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
+
+//! read image from testdata folder.
+cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
+cv::Mat readImageType(const std::string& fname, int type);
+
+double checkNorm(const cv::Mat& m);
+double checkNorm(const cv::Mat& m1, const cv::Mat& m2);
+double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
+
+#define EXPECT_MAT_NORM(mat, eps) \
+{ \
+    EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
+}
+
+//#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
+//{ \
+//    ASSERT_EQ(mat1.type(), mat2.type()); \
+//    ASSERT_EQ(mat1.size(), mat2.size()); \
+//    EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \
+//}
+
+#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \
+{ \
+    ASSERT_EQ(mat1.type(), mat2.type()); \
+    ASSERT_EQ(mat1.size(), mat2.size()); \
+    EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)<<s; \
+}
+
+#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
+{ \
+    ASSERT_EQ(mat1.type(), mat2.type()); \
+    ASSERT_EQ(mat1.size(), mat2.size()); \
+    EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
+}
+
+namespace cv 
+{ 
+    namespace ocl 
+    {
+        // void PrintTo(const DeviceInfo& info, std::ostream* os);
+    }
+}
+
+using perf::MatDepth;
+using perf::MatType;
+
+//! return vector with types from specified range.
+std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
+
+//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
+const std::vector<MatType>& all_types();
+
+class Inverse
+{
+    public:
+        inline Inverse(bool val = false) : val_(val) {}
+
+        inline operator bool() const { return val_; }
+
+    private:
+        bool val_;
+};
+
+void PrintTo(const Inverse& useRoi, std::ostream* os);
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+
+CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
+
+    enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
+CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
+
+CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+
+    CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+
+CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+
+CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
+
+CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+
+CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+
+CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+void  run_perf_test();
+
+#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+
+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+
+#define ALL_DEVICES testing::ValuesIn(devices())
+#define DEVICES(feature) testing::ValuesIn(devices(feature))
+
+#define ALL_TYPES testing::ValuesIn(all_types())
+#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
+
+#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
+
+#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
+
+#endif // __OPENCV_TEST_UTILITY_HPP__
diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp
new file mode 100644
index 0000000000..a9df907d3c
--- /dev/null
+++ b/modules/ocl/src/blend.cpp
@@ -0,0 +1,98 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Nathan, liujun@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+#if !defined (HAVE_OPENCL)
+void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
+                            oclMat& result){throw_nogpu();}
+#else
+namespace cv 
+{
+	namespace ocl 
+	{
+        ////////////////////////////////////OpenCL kernel strings//////////////////////////
+        extern const char *blend_linear;
+	}
+}
+
+void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
+                            oclMat& result)
+{
+	cv::ocl::Context *ctx = img1.clCxt;
+	assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
+	int channels = img1.channels();
+	int depth = img1.depth();
+	int rows = img1.rows;
+	int cols = img1.cols;
+	int istep = img1.step;
+	int wstep = weights1.step;
+	size_t globalSize[] = {cols * channels, rows, 1};
+	size_t localSize[] = {16, 16, 1};
+
+	vector< pair<size_t, const void *> > args;
+
+	if(globalSize[0]!=0)
+	{
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
+		std::string kernelName = "BlendLinear";
+
+		openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
+	}
+}
+#endif
\ No newline at end of file
diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp
new file mode 100644
index 0000000000..e789d38b09
--- /dev/null
+++ b/modules/ocl/src/columnsum.cpp
@@ -0,0 +1,91 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Chunpeng Zhang, chunpeng@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <iomanip>
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+
+#if !defined(HAVE_OPENCL)
+
+void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); }
+
+#else /*!HAVE_OPENCL */
+
+namespace cv 
+{ 
+	namespace ocl
+	{
+		extern const char* imgproc_columnsum;
+	}
+}
+
+void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
+{
+	CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size());
+
+	Context *clCxt = src.clCxt;                                        
+		       
+	const std::string kernelName = "columnSum";
+		
+	std::vector< pair<size_t, const void *> > args;
+
+	args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));		
+	args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));			
+	args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));		
+	args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));			
+	args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));		
+	args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));		
+
+	size_t globalThreads[3] = {dst.cols, dst.rows, 1};					
+	size_t localThreads[3]  = {16, 16, 1};		
+
+	openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+
+}
+#endif 
\ No newline at end of file
diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp
new file mode 100644
index 0000000000..b3eda35c18
--- /dev/null
+++ b/modules/ocl/src/fft.cpp
@@ -0,0 +1,302 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <iomanip>
+#include "precomp.hpp"
+
+#ifdef HAVE_CLAMDFFT
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+#if !defined (HAVE_OPENCL)
+void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
+#else
+
+#include <clAmdFft.h>
+
+namespace cv{ namespace ocl {
+    enum FftType
+    {
+        C2R = 1, // complex to complex
+        R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
+        C2C = 3  // opencl HERMITIAN_INTERLEAVED to real
+    };
+    struct FftPlan
+    {
+        friend void fft_setup();
+        friend void fft_teardown();
+        ~FftPlan();
+    protected:
+        FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
+        const Size dft_size;
+        const int src_step, dst_step;
+        const int flags;
+        const FftType type;
+        clAmdFftPlanHandle plHandle;
+        static vector<FftPlan*> planStore;
+        static bool started;
+        static clAmdFftSetupData * setupData;
+    public:
+        // return a baked plan-> 
+        // if there is one matched plan, return it
+        // if not, bake a new one, put it into the planStore and return it.
+        static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
+    };
+}}
+bool cv::ocl::FftPlan::started = false;
+vector<cv::ocl::FftPlan*> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan*>();
+clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0;
+
+void cv::ocl::fft_setup()
+{
+    if(FftPlan::started)
+    {
+        return;
+    }
+    FftPlan::setupData = new clAmdFftSetupData;
+    openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData ));
+    FftPlan::started = true;
+}
+void cv::ocl::fft_teardown()
+{
+    if(!FftPlan::started)
+    {
+        return;
+    }
+    delete FftPlan::setupData;
+    for(int i = 0; i < FftPlan::planStore.size(); i ++)
+    {
+        delete FftPlan::planStore[i];
+    }
+    FftPlan::planStore.clear();
+    openCLSafeCall( clAmdFftTeardown( ) );
+    FftPlan::started = false;
+}
+
+// bake a new plan
+cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
+    : dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0)
+{
+    if(!FftPlan::started)
+    {
+        // implicitly do fft setup
+        fft_setup();
+    }
+
+    bool is_1d_input	= (_dft_size.height == 1);
+    int is_row_dft		= flags & DFT_ROWS;
+    int is_scaled_dft		= flags & DFT_SCALE;
+    int is_inverse			= flags & DFT_INVERSE;
+
+    clAmdFftResultLocation	place;
+    clAmdFftLayout			inLayout;
+    clAmdFftLayout			outLayout;
+    clAmdFftDim				dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D;
+
+    size_t batchSize		 = is_row_dft?dft_size.height : 1;
+    size_t clLengthsIn[ 3 ]  = {1, 1, 1};
+    size_t clStridesIn[ 3 ]  = {1, 1, 1};
+    size_t clLengthsOut[ 3 ] = {1, 1, 1};
+    size_t clStridesOut[ 3 ] = {1, 1, 1};
+    clLengthsIn[0]			 = dft_size.width;
+    clLengthsIn[1]			 = is_row_dft ? 1 : dft_size.height;
+    clStridesIn[0]			 = 1;
+    clStridesOut[0]			 = 1;
+
+    switch(_type)
+    {
+    case C2C:
+        inLayout        = CLFFT_COMPLEX_INTERLEAVED;
+        outLayout       = CLFFT_COMPLEX_INTERLEAVED;
+        clStridesIn[1]  = src_step / sizeof(std::complex<float>);
+        clStridesOut[1] = clStridesIn[1];
+        break;
+    case R2C:
+        CV_Assert(!is_row_dft); // this is not supported yet
+        inLayout        = CLFFT_REAL;
+        outLayout       = CLFFT_HERMITIAN_INTERLEAVED;
+        clStridesIn[1]  = src_step / sizeof(float);
+        clStridesOut[1] = dst_step / sizeof(std::complex<float>);
+        break;
+    case C2R:
+        CV_Assert(!is_row_dft); // this is not supported yet
+        inLayout        = CLFFT_HERMITIAN_INTERLEAVED;
+        outLayout       = CLFFT_REAL;
+        clStridesIn[1]  = src_step / sizeof(std::complex<float>);
+        clStridesOut[1] = dst_step / sizeof(float);
+        break;
+    default:
+        //std::runtime_error("does not support this convertion!");
+        cout << "Does not support this convertion!" << endl;
+        throw exception();
+        break;
+    }
+
+    clStridesIn[2]  = is_row_dft ? clStridesIn[1]  : dft_size.width * clStridesIn[1];
+    clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
+
+    openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) );
+
+    openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
+    openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
+    openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
+
+    openCLSafeCall( clAmdFftSetPlanInStride  ( plHandle, dim, clStridesIn ) );
+    openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
+    openCLSafeCall( clAmdFftSetPlanDistance  ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) );
+    openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
+}
+cv::ocl::FftPlan::~FftPlan()
+{
+    for(int i = 0; i < planStore.size(); i ++)
+    {
+        if(planStore[i]->plHandle == plHandle)
+        {
+            planStore.erase(planStore.begin()+ i);
+        }
+    }
+    openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
+}
+
+clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
+{
+    // go through search
+    for(int i = 0; i < planStore.size(); i ++)
+    {
+        FftPlan * plan = planStore[i];
+        if(
+            plan->dft_size.width == _dft_size.width && 
+            plan->dft_size.height == _dft_size.height &&
+            plan->flags == _flags &&
+            plan->src_step == _src_step &&
+            plan->dst_step == _dst_step &&
+            plan->type == _type
+            )
+        {
+            return plan->plHandle;
+        }
+    }
+    // no baked plan is found
+    FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
+    planStore.push_back(newPlan);
+    return newPlan->plHandle;
+}
+
+void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags) 
+{
+    if(dft_size == Size(0,0))
+    {
+        dft_size = src.size();
+    }
+    // check if the given dft size is of optimal dft size
+    CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
+
+    // similar assertions with cuda module
+    CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
+
+    // we don't support DFT_SCALE flag
+    CV_Assert(!(DFT_SCALE & flags));
+
+    bool is_1d_input	= (src.rows == 1);
+    int is_row_dft		= flags & DFT_ROWS;
+    int is_scaled_dft		= flags & DFT_SCALE;
+    int is_inverse			= flags & DFT_INVERSE;
+    bool is_complex_input	= src.channels() == 2;
+    bool is_complex_output	= !(flags & DFT_REAL_OUTPUT);
+
+    // We don't support real-to-real transform
+    CV_Assert(is_complex_input || is_complex_output);
+    FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
+
+    switch(type)
+    {
+    case C2C:
+        dst.create(src.rows, src.cols, CV_32FC2);
+        break;
+    case R2C:
+        CV_Assert(!is_row_dft); // this is not supported yet
+        dst.create(src.rows, src.cols/2 + 1, CV_32FC2);
+        break;
+    case C2R:
+        CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
+        CV_Assert(!is_row_dft); // this is not supported yet
+        dst.create(src.rows, dft_size.width, CV_32FC1);
+        break;
+    default:
+        //std::runtime_error("does not support this convertion!");
+        cout << "Does not support this convertion!" << endl;
+        throw exception();
+        break;
+    }
+    clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
+
+    //get the buffersize
+    size_t buffersize=0;
+    openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
+
+    //allocate the intermediate buffer	
+    cl_mem clMedBuffer=NULL;
+    if (buffersize)
+    {
+        cl_int medstatus;
+        clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
+        openCLSafeCall( medstatus );
+    }
+    openCLSafeCall( clAmdFftEnqueueTransform( plHandle, 
+        is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD, 
+        1, 
+        &src.clCxt->impl->clCmdQueue, 
+        0, NULL, NULL, 
+        (cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) );
+    openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
+    if(clMedBuffer)
+    {
+        openCLFree(clMedBuffer);
+    }
+}
+
+#endif
+#endif //HAVE_CLAMDFFT
diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp
new file mode 100644
index 0000000000..c35e061826
--- /dev/null
+++ b/modules/ocl/src/gemm.cpp
@@ -0,0 +1,161 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <iomanip>
+#include "precomp.hpp"
+
+#ifdef HAVE_CLAMDBLAS
+
+#include "clAmdBlas.h"
+
+#if !defined (HAVE_OPENCL)
+void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
+#else
+
+using namespace cv;
+
+	void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
+		const oclMat& src3, double beta, oclMat& dst, int flags)
+	{
+		CV_Assert(src1.cols == src2.rows && 
+			(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
+		CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
+		if(!src3.empty())
+		{
+			src3.copyTo(dst);
+		}
+		else
+		{
+			dst.create(src1.rows, src2.cols, src1.type());
+			dst.setTo(Scalar::all(0));
+		}
+		openCLSafeCall( clAmdBlasSetup() );
+		
+		const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
+		const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
+		const clAmdBlasOrder     order  = clAmdBlasRowMajor;
+
+		const int M = src1.rows;
+		const int N = src2.cols;
+		const int K = src1.cols;
+		int lda     = src1.step;
+		int ldb     = src2.step;
+		int ldc     = dst.step;
+		int offa    = src1.offset;
+		int offb    = src2.offset;
+		int offc    = dst.offset;
+
+
+		switch(src1.type())
+		{
+		case CV_32FC1:
+			lda  /= sizeof(float);
+			ldb  /= sizeof(float);
+			ldc  /= sizeof(float);
+			offa /= sizeof(float);
+			offb /= sizeof(float);
+			offc /= sizeof(float);
+			openCLSafeCall
+			(
+				clAmdBlasSgemmEx(order, transA, transB, M, N, K,
+					alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+					beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+			);
+			break;
+		case CV_64FC1:
+			lda  /= sizeof(double);
+			ldb  /= sizeof(double);
+			ldc  /= sizeof(double);
+			offa /= sizeof(double);
+			offb /= sizeof(double);
+			offc /= sizeof(double);
+			openCLSafeCall
+			(
+				clAmdBlasDgemmEx(order, transA, transB, M, N, K,
+					alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+					beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+			);
+			break;
+		case CV_32FC2:
+			{
+				lda  /= sizeof(std::complex<float>);
+				ldb  /= sizeof(std::complex<float>);
+				ldc  /= sizeof(std::complex<float>);
+				offa /= sizeof(std::complex<float>);
+				offb /= sizeof(std::complex<float>);
+				offc /= sizeof(std::complex<float>);
+				cl_float2 alpha_2 = {{alpha, 0}};
+				cl_float2 beta_2  = {{beta, 0}};
+				openCLSafeCall
+				(
+					clAmdBlasCgemmEx(order, transA, transB, M, N, K,
+						alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+						beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+				);
+			}
+			break;
+		case CV_64FC2:
+			{
+				lda  /= sizeof(std::complex<double>);
+				ldb  /= sizeof(std::complex<double>);
+				ldc  /= sizeof(std::complex<double>);
+				offa /= sizeof(std::complex<double>);
+				offb /= sizeof(std::complex<double>);
+				offc /= sizeof(std::complex<double>);
+				cl_double2 alpha_2 = {{alpha, 0}};
+				cl_double2 beta_2  = {{beta, 0}};
+				openCLSafeCall
+				(
+					clAmdBlasZgemmEx(order, transA, transB, M, N, K,
+						alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, 
+						beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+				);
+			}
+			break;
+		}
+		clAmdBlasTeardown();
+	}
+#endif
+#endif
diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp
new file mode 100644
index 0000000000..1f8a92541b
--- /dev/null
+++ b/modules/ocl/src/hog.cpp
@@ -0,0 +1,1787 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//		Wenju He, wenju@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+#if !defined (HAVE_OPENCL)
+
+cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_nogpu(); }
+size_t cv::ocl::HOGDescriptor::getDescriptorSize() const { throw_nogpu(); return 0; }
+size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); return 0; }
+double cv::ocl::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; }
+bool cv::ocl::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; }
+void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>&) { throw_nogpu(); }
+void cv::ocl::HOGDescriptor::detect(const oclMat&, vector<Point>&, double, Size, Size) { throw_nogpu(); }
+void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat&, vector<Rect>&, double, Size, Size, double, int) { throw_nogpu(); }
+void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat&) { throw_nogpu(); }
+void cv::ocl::HOGDescriptor::getDescriptors(const oclMat&, Size, oclMat&, int) { throw_nogpu(); }
+std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
+std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector<float>(); }
+std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector<float>(); }
+
+#else
+
+#define CELL_WIDTH 8
+#define CELL_HEIGHT 8
+#define CELLS_PER_BLOCK_X 2
+#define CELLS_PER_BLOCK_Y 2
+#define NTHREADS 256
+
+namespace cv { namespace ocl
+{
+	///////////////////////////OpenCL kernel strings///////////////////////////
+	extern const char *objdetect_hog;
+}}
+
+namespace cv { namespace ocl { namespace device
+{
+    namespace hog
+    {
+        int cnbins;
+        int cblock_stride_x;
+        int cblock_stride_y;
+        int cnblocks_win_x;
+        int cnblocks_win_y;
+        int cblock_hist_size;
+        int cblock_hist_size_2up;
+        int cdescr_size;
+        int cdescr_width;
+
+        void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
+                              int nblocks_win_x, int nblocks_win_y);
+
+        void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
+                           int height, int width, const cv::ocl::oclMat& grad,
+                           const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists);
+
+        void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
+                             int height, int width, cv::ocl::oclMat& block_hists, float threshold);
+
+        void classify_hists(int win_height, int win_width, int block_stride_y,
+                            int block_stride_x, int win_stride_y, int win_stride_x, int height,
+                            int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef,
+                            float threshold, cv::ocl::oclMat& labels);
+
+        void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
+                                    int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists,
+                                    cv::ocl::oclMat& descriptors);
+        void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
+                                    int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists,
+                                    cv::ocl::oclMat& descriptors);
+
+        void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img,
+                                    float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
+        void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
+                                    float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
+    }
+}}}
+
+using namespace ::cv::ocl::device;
+
+cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
+                                      int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
+        : win_size(win_size_),
+          block_size(block_size_),
+          block_stride(block_stride_),
+          cell_size(cell_size_),
+          nbins(nbins_),
+          win_sigma(win_sigma_),
+          threshold_L2hys(threshold_L2hys_),
+          gamma_correction(gamma_correction_),
+          nlevels(nlevels_)
+{
+    CV_Assert((win_size.width  - block_size.width ) % block_stride.width  == 0 &&
+              (win_size.height - block_size.height) % block_stride.height == 0);
+
+    CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0);
+
+    CV_Assert(block_stride == cell_size);
+
+    CV_Assert(cell_size == Size(8, 8));
+
+    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
+    CV_Assert(cells_per_block == Size(2, 2));
+
+    cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
+    hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
+}
+
+size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
+{
+    return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize();
+}
+
+size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
+{
+    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
+    return (size_t)(nbins * cells_per_block.area());
+}
+
+double cv::ocl::HOGDescriptor::getWinSigma() const
+{
+    return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0;
+}
+
+bool cv::ocl::HOGDescriptor::checkDetectorSize() const
+{
+    size_t detector_size = detector.rows * detector.cols;
+    size_t descriptor_size = getDescriptorSize();
+    return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
+}
+
+void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>& _detector)
+{
+    std::vector<float> detector_reordered(_detector.size());
+
+    size_t block_hist_size = getBlockHistogramSize();
+    cv::Size blocks_per_img = numPartsWithin(win_size, block_size, block_stride);
+
+    for (int i = 0; i < blocks_per_img.height; ++i)
+        for (int j = 0; j < blocks_per_img.width; ++j)
+        {
+            const float* src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
+            float* dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
+            for (size_t k = 0; k < block_hist_size; ++k)
+                dst[k] = src[k];
+        }
+
+    this->detector.upload(Mat(detector_reordered).reshape(1, 1));
+
+    size_t descriptor_size = getDescriptorSize();
+    free_coef = _detector.size() > descriptor_size ? _detector[descriptor_size] : 0;
+
+    CV_Assert(checkDetectorSize());
+}
+
+void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
+{
+    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
+
+    grad.create(img.size(), CV_32FC2);
+
+    qangle.create(img.size(), CV_8UC2);
+
+    float angleScale = (float)(nbins / CV_PI);
+    switch (img.type())
+    {
+    case CV_8UC1:
+        hog::compute_gradients_8UC1(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
+        break;
+    case CV_8UC4:
+        hog::compute_gradients_8UC4(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
+        break;
+    }
+}
+
+
+void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat& img)
+{
+    computeGradient(img, grad, qangle);
+
+    size_t block_hist_size = getBlockHistogramSize();
+    Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
+
+    block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
+
+    hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, grad, qangle, (float)getWinSigma(), block_hists);
+
+    hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, block_hists, (float)threshold_L2hys);
+}
+
+
+void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, oclMat& descriptors, int descr_format)
+{
+    CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+
+    computeBlockHistograms(img);
+
+    const size_t block_hist_size = getBlockHistogramSize();
+    Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
+    Size wins_per_img   = numPartsWithin(img.size(), win_size, win_stride);
+
+    descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
+
+    switch (descr_format)
+    {
+    case DESCR_FORMAT_ROW_BY_ROW:
+        hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
+                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
+        break;
+    case DESCR_FORMAT_COL_BY_COL:
+        hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
+                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
+        break;
+    default:
+        CV_Error(CV_StsBadArg, "Unknown descriptor format");
+    }
+}
+
+
+void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
+{
+    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
+    CV_Assert(padding == Size(0, 0));
+
+    hits.clear();
+    if (detector.empty())
+        return;
+
+    computeBlockHistograms(img);
+
+    if (win_stride == Size())
+        win_stride = block_stride;
+    else
+        CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+
+    Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
+    labels.create(1, wins_per_img.area(), CV_8U);
+
+    hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
+                        win_stride.height, win_stride.width, img.rows, img.cols, block_hists,
+                        detector, (float)free_coef, (float)hit_threshold, labels);
+
+    labels.download(labels_host);
+    unsigned char* vec = labels_host.ptr();
+    for (int i = 0; i < wins_per_img.area(); i++)
+    {
+        int y = i / wins_per_img.width;
+        int x = i - wins_per_img.width * y;
+        if (vec[i])
+            hits.push_back(Point(x * win_stride.width, y * win_stride.height));
+    }
+}
+
+
+
+void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& found_locations, double hit_threshold,
+                                              Size win_stride, Size padding, double scale0, int group_threshold)
+{
+    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
+
+    vector<double> level_scale;
+    double scale = 1.;
+    int levels = 0;
+
+    for (levels = 0; levels < nlevels; levels++)
+    {
+        level_scale.push_back(scale);
+        if (cvRound(img.cols/scale) < win_size.width ||
+            cvRound(img.rows/scale) < win_size.height || scale0 <= 1)
+            break;
+        scale *= scale0;
+    }
+    levels = std::max(levels, 1);
+    level_scale.resize(levels);
+    image_scales.resize(levels);
+
+    std::vector<Rect> all_candidates;
+    vector<Point> locations;
+
+    for (size_t i = 0; i < level_scale.size(); i++)
+    {
+        scale = level_scale[i];
+        Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
+        oclMat smaller_img;
+
+        if (sz == img.size())
+            smaller_img = img;
+        else
+        {
+            image_scales[i].create(sz, img.type());
+            resize(img, image_scales[i], image_scales[i].size(), 0, 0, INTER_LINEAR);
+            smaller_img = image_scales[i];
+        }
+
+        detect(smaller_img, locations, hit_threshold, win_stride, padding);
+        Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
+        for (size_t j = 0; j < locations.size(); j++)
+            all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
+    }
+
+    found_locations.assign(all_candidates.begin(), all_candidates.end());
+    groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
+}
+
+int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
+{
+    return (size - part_size + stride) / stride;
+}
+
+cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
+{
+    return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
+}
+
+std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
+{
+    return getPeopleDetector64x128();
+}
+
+std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
+{
+    static const float detector[] = {
+        0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
+        0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f,
+        0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f,
+        0.254676f, -0.069235f, 0.082566f, 0.147260f, 0.326969f, 0.148888f,
+        0.055270f, -0.087985f, 0.261720f, 0.143442f, 0.026812f, 0.238212f,
+        0.194020f, 0.056341f, -0.025854f, -0.034444f, -0.156631f, 0.205174f,
+        0.089008f, -0.139811f, -0.100147f, -0.037830f, -0.029230f, -0.055641f,
+        0.033248f, -0.016512f, 0.155244f, 0.247315f, -0.124694f, -0.048414f,
+        -0.062219f, 0.193683f, 0.004574f, 0.055089f, 0.093565f, 0.167712f,
+        0.167581f, 0.018895f, 0.215258f, 0.122609f, 0.090520f, -0.067219f,
+        -0.049029f, -0.099615f, 0.241804f, -0.094893f, -0.176248f, 0.001727f,
+        -0.134473f, 0.104442f, 0.050942f, 0.081165f, 0.072156f, 0.121646f,
+        0.002656f, -0.297974f, -0.133587f, -0.060121f, -0.092515f, -0.048974f,
+        -0.084754f, -0.180111f, -0.038590f, 0.086283f, -0.134636f, -0.107249f,
+        0.132890f, 0.141556f, 0.249425f, 0.130273f, -0.030031f, 0.073212f,
+        -0.008155f, 0.019931f, 0.071688f, 0.000300f, -0.019525f, -0.021725f,
+        -0.040993f, -0.086841f, 0.070124f, 0.240033f, 0.265350f, 0.043208f,
+        0.166754f, 0.091453f, 0.060916f, -0.036972f, -0.091043f, 0.079873f,
+        0.219781f, 0.158102f, -0.140618f, -0.043016f, 0.124802f, 0.093668f,
+        0.103208f, 0.094872f, 0.080541f, 0.137711f, 0.160566f, -0.169231f,
+        0.013983f, 0.309508f, -0.004217f, -0.057200f, -0.064489f, 0.014066f,
+        0.361009f, 0.251328f, -0.080983f, -0.044183f, 0.061436f, -0.037381f,
+        -0.078786f, 0.030993f, 0.066314f, 0.037683f, 0.152325f, -0.091683f,
+        0.070203f, 0.217856f, 0.036435f, -0.076462f, 0.006254f, -0.094431f,
+        0.154829f, -0.023038f, -0.196961f, -0.024594f, 0.178465f, -0.050139f,
+        -0.045932f, -0.000965f, 0.109112f, 0.046165f, -0.159373f, -0.008713f,
+        0.041307f, 0.097129f, -0.057211f, -0.064599f, 0.077165f, 0.176167f,
+        0.138322f, 0.065753f, -0.104950f, 0.017933f, 0.136255f, -0.011598f,
+        0.047007f, 0.080550f, 0.068619f, 0.084661f, -0.035493f, -0.091314f,
+        -0.041411f, 0.060971f, -0.101912f, -0.079870f, -0.085977f, -0.022686f,
+        0.079788f, -0.098064f, -0.054603f, 0.040383f, 0.300794f, 0.128603f,
+        0.094844f, 0.047407f, 0.101825f, 0.061832f, -0.162160f, -0.204553f,
+        -0.035165f, 0.101450f, -0.016641f, -0.027140f, -0.134392f, -0.008743f,
+        0.102331f, 0.114853f, 0.009644f, 0.062823f, 0.237339f, 0.167843f,
+        0.053066f, -0.012592f, 0.043158f, 0.002305f, 0.065001f, -0.038929f,
+        -0.020356f, 0.152343f, 0.043469f, -0.029967f, -0.042948f, 0.032481f,
+        0.068488f, -0.110840f, -0.111083f, 0.111980f, -0.002072f, -0.005562f,
+        0.082926f, 0.006635f, -0.108153f, 0.024242f, -0.086464f, -0.189884f,
+        -0.017492f, 0.191456f, -0.007683f, -0.128769f, -0.038017f, -0.132380f,
+        0.091926f, 0.079696f, -0.106728f, -0.007656f, 0.172744f, 0.011576f,
+        0.009883f, 0.083258f, -0.026516f, 0.145534f, 0.153924f, -0.130290f,
+        -0.108945f, 0.124490f, -0.003186f, -0.100485f, 0.015024f, -0.060512f,
+        0.026288f, -0.086713f, -0.169012f, 0.076517f, 0.215778f, 0.043701f,
+        -0.131642f, -0.012585f, -0.045181f, -0.118183f, -0.241544f, -0.167293f,
+        -0.020107f, -0.019917f, -0.101827f, -0.107096f, -0.010503f, 0.044938f,
+        0.189680f, 0.217119f, -0.046086f, 0.044508f, 0.199716f, -0.036004f,
+        -0.148927f, 0.013355f, -0.078279f, 0.030451f, 0.056301f, -0.024609f,
+        0.083224f, 0.099533f, -0.039432f, -0.138880f, 0.005482f, -0.024120f,
+        -0.140468f, -0.066381f, -0.017057f, 0.009260f, -0.058004f, -0.028486f,
+        -0.061610f, 0.007483f, -0.158309f, -0.150687f, -0.044595f, -0.105121f,
+        -0.045763f, -0.006618f, -0.024419f, -0.117713f, -0.119366f, -0.175941f,
+        -0.071542f, 0.119027f, 0.111362f, 0.043080f, 0.034889f, 0.093003f,
+        0.007842f, 0.057368f, -0.108834f, -0.079968f, 0.230959f, 0.020205f,
+        0.011470f, 0.098877f, 0.101310f, -0.030215f, -0.018018f, -0.059552f,
+        -0.106157f, 0.021866f, -0.036471f, 0.080051f, 0.041165f, -0.082101f,
+        0.117726f, 0.030961f, -0.054763f, -0.084102f, -0.185778f, -0.061305f,
+        -0.038089f, -0.110728f, -0.264010f, 0.076675f, -0.077111f, -0.137644f,
+        0.036232f, 0.277995f, 0.019116f, 0.107738f, 0.144003f, 0.080304f,
+        0.215036f, 0.228897f, 0.072713f, 0.077773f, 0.120168f, 0.075324f,
+        0.062730f, 0.122478f, -0.049008f, 0.164912f, 0.162450f, 0.041246f,
+        0.009891f, -0.097827f, -0.038700f, -0.023027f, -0.120020f, 0.203364f,
+        0.248474f, 0.149810f, -0.036276f, -0.082814f, -0.090343f, -0.027143f,
+        -0.075689f, -0.320310f, -0.000500f, -0.143334f, -0.065077f, -0.186936f,
+        0.129372f, 0.116431f, 0.181699f, 0.170436f, 0.418854f, 0.460045f,
+        0.333719f, 0.230515f, 0.047822f, -0.044954f, -0.068086f, 0.140179f,
+        -0.044821f, 0.085550f, 0.092483f, -0.107296f, -0.130670f, -0.206629f,
+        0.114601f, -0.317869f, -0.076663f, 0.038680f, 0.212753f, -0.016059f,
+        -0.126526f, -0.163602f, 0.210154f, 0.099887f, -0.126366f, 0.118453f,
+        0.019309f, -0.021611f, -0.096499f, -0.111809f, -0.200489f, 0.142854f,
+        0.228840f, -0.353346f, -0.179151f, 0.116834f, 0.252389f, -0.031728f,
+        -0.188135f, -0.158998f, 0.386523f, 0.122315f, 0.209944f, 0.394023f,
+        0.359030f, 0.260717f, 0.170335f, 0.013683f, -0.142596f, -0.026138f,
+        -0.011878f, -0.150519f, 0.047159f, -0.107062f, -0.147347f, -0.187689f,
+        -0.186027f, -0.208048f, 0.058468f, -0.073026f, -0.236556f, -0.079788f,
+        -0.146216f, -0.058563f, -0.101361f, -0.071294f, -0.071093f, 0.116919f,
+        0.234304f, 0.306781f, 0.321866f, 0.240000f, 0.073261f, -0.012173f,
+        0.026479f, 0.050173f, 0.166127f, 0.228955f, 0.061905f, 0.156460f,
+        0.205990f, 0.120672f, 0.037350f, 0.167884f, 0.290099f, 0.420900f,
+        -0.012601f, 0.189839f, 0.306378f, 0.118383f, -0.095598f, -0.072360f,
+        -0.132496f, -0.224259f, -0.126021f, 0.022714f, 0.284039f, 0.051369f,
+        -0.000927f, -0.058735f, -0.083354f, -0.141254f, -0.187578f, -0.202669f,
+        0.048902f, 0.246597f, 0.441863f, 0.342519f, 0.066979f, 0.215286f,
+        0.188191f, -0.072240f, -0.208142f, -0.030196f, 0.178141f, 0.136985f,
+        -0.043374f, -0.181098f, 0.091815f, 0.116177f, -0.126690f, -0.386625f,
+        0.368165f, 0.269149f, -0.088042f, -0.028823f, 0.092961f, 0.024099f,
+        0.046112f, 0.176756f, 0.135849f, 0.124955f, 0.195467f, -0.037218f,
+        0.167217f, 0.188938f, 0.053528f, -0.066561f, 0.133721f, -0.070565f,
+        0.115898f, 0.152435f, -0.116993f, -0.110592f, -0.179005f, 0.026668f,
+        0.080530f, 0.075084f, -0.070401f, 0.012497f, 0.021849f, -0.139764f,
+        -0.022020f, -0.096301f, -0.064954f, -0.127446f, -0.013806f, -0.108315f,
+        0.156285f, 0.149867f, -0.011382f, 0.064532f, 0.029168f, 0.027393f,
+        0.069716f, 0.153735f, 0.038459f, 0.230714f, 0.253840f, 0.059522f,
+        -0.045053f, 0.014083f, 0.071103f, 0.068747f, 0.095887f, 0.005832f,
+        0.144887f, 0.026357f, -0.067359f, -0.044151f, -0.123283f, -0.019911f,
+        0.005318f, 0.109208f, -0.003201f, -0.021734f, 0.142025f, -0.066907f,
+        -0.120070f, -0.188639f, 0.012472f, -0.048704f, -0.012366f, -0.184828f,
+        0.168591f, 0.267166f, 0.058208f, -0.044101f, 0.033500f, 0.178558f,
+        0.104550f, 0.122418f, 0.080177f, 0.173246f, 0.298537f, 0.064173f,
+        0.053397f, 0.174341f, 0.230984f, 0.117025f, 0.166242f, 0.227781f,
+        0.120623f, 0.176952f, -0.011393f, -0.086483f, -0.008270f, 0.051700f,
+        -0.153369f, -0.058837f, -0.057639f, -0.060115f, 0.026349f, -0.160745f,
+        -0.037894f, -0.048575f, 0.041052f, -0.022112f, 0.060365f, 0.051906f,
+        0.162657f, 0.138519f, -0.050185f, -0.005938f, 0.071301f, 0.127686f,
+        0.062342f, 0.144400f, 0.072600f, 0.198436f, 0.246219f, -0.078185f,
+        -0.036169f, 0.075934f, 0.047328f, -0.013601f, 0.087205f, 0.019900f,
+        0.022606f, -0.015365f, -0.092506f, 0.075275f, -0.116375f, 0.050500f,
+        0.045118f, 0.166567f, 0.072073f, 0.060371f, 0.131747f, -0.169863f,
+        -0.039352f, -0.047486f, -0.039797f, -0.204312f, 0.021710f, 0.129443f,
+        -0.021173f, 0.173416f, -0.070794f, -0.063986f, 0.069689f, -0.064099f,
+        -0.123201f, -0.017372f, -0.206870f, 0.065863f, 0.113226f, 0.024707f,
+        -0.071341f, -0.066964f, -0.098278f, -0.062927f, 0.075840f, 0.014716f,
+        0.019378f, 0.132699f, -0.074191f, -0.089557f, -0.078446f, -0.197488f,
+        -0.173665f, 0.052583f, 0.044361f, 0.113549f, 0.098492f, 0.077379f,
+        -0.011146f, -0.192593f, -0.164435f, 0.045568f, 0.205699f, 0.049187f,
+        -0.082281f, 0.134874f, 0.185499f, 0.034968f, -0.119561f, -0.112372f,
+        -0.115091f, -0.054042f, -0.183816f, -0.078100f, 0.190695f, 0.091617f,
+        0.004257f, -0.041135f, -0.061453f, -0.141592f, -0.194809f, -0.120638f,
+        0.020168f, 0.109672f, 0.067398f, -0.015238f, -0.239145f, -0.264671f,
+        -0.185176f, 0.050472f, 0.020793f, 0.035678f, 0.022839f, -0.052055f,
+        -0.127968f, -0.113049f, -0.228416f, -0.258281f, -0.053437f, 0.076424f,
+        0.061450f, 0.237478f, 0.003618f, -0.055865f, -0.108087f, -0.028937f,
+        0.045585f, 0.052829f, -0.001471f, 0.022826f, 0.059565f, -0.104430f,
+        -0.077266f, -0.211882f, -0.212078f, 0.028074f, 0.075846f, 0.016265f,
+        0.161879f, 0.134477f, 0.008935f, -0.048041f, 0.074692f, 0.004928f,
+        -0.025156f, 0.192874f, 0.074410f, 0.308732f, 0.267400f, 0.094208f,
+        -0.005251f, 0.042041f, -0.032148f, 0.015588f, 0.252869f, 0.175302f,
+        0.022892f, 0.081673f, 0.063208f, 0.162626f, 0.194426f, 0.233890f,
+        0.262292f, 0.186930f, 0.084079f, -0.286388f, -0.213034f, -0.048867f,
+        -0.207669f, -0.170050f, 0.011673f, -0.092958f, -0.192786f, -0.273536f,
+        0.230904f, 0.266732f, 0.320519f, 0.297155f, 0.548169f, 0.304922f,
+        0.132687f, 0.247333f, 0.212488f, -0.271472f, -0.142105f, -0.002627f,
+        -0.119215f, 0.128383f, 0.100079f, -0.057490f, -0.121902f, -0.228892f,
+        0.202292f, -0.399795f, -0.371326f, -0.095836f, -0.063626f, -0.161375f,
+        -0.311180f, -0.294797f, 0.242122f, 0.011788f, 0.095573f, 0.322523f,
+        0.511840f, 0.322880f, 0.313259f, 0.173331f, 0.002542f, -0.029802f,
+        0.324766f, -0.326170f, -0.340547f, -0.138288f, -0.002963f, -0.114060f,
+        -0.377312f, -0.442570f, 0.212446f, -0.007759f, -0.011576f, 0.169711f,
+        0.308689f, 0.317348f, 0.539390f, 0.332845f, 0.057331f, -0.068180f,
+        0.101994f, 0.266995f, 0.209570f, 0.355730f, 0.091635f, 0.170238f,
+        0.125215f, 0.274154f, 0.070223f, 0.025515f, 0.049946f, -0.000550f,
+        0.043715f, -0.141843f, 0.020844f, 0.129871f, 0.256588f, 0.105015f,
+        0.148339f, 0.170682f, 0.028792f, 0.074037f, 0.160042f, 0.405137f,
+        0.246187f, 0.352160f, 0.168951f, 0.222263f, 0.264439f, 0.065945f,
+        0.021963f, -0.075084f, 0.093105f, 0.027318f, 0.098864f, 0.057566f,
+        -0.080282f, 0.185032f, 0.314419f, 0.333727f, 0.125798f, 0.294919f,
+        0.386002f, 0.217619f, -0.183517f, -0.278622f, -0.002342f, -0.027821f,
+        -0.134266f, -0.331843f, -0.008296f, 0.124564f, 0.053712f, -0.369016f,
+        -0.095036f, 0.209381f, 0.423760f, 0.371760f, 0.106397f, 0.369408f,
+        0.485608f, 0.231201f, -0.138685f, -0.349208f, -0.070083f, 0.028991f,
+        -0.081630f, -0.395992f, -0.146791f, -0.027354f, 0.063396f, -0.272484f,
+        0.058299f, 0.338207f, 0.110767f, -0.052642f, -0.233848f, -0.027448f,
+        0.030328f, 0.155572f, -0.093826f, 0.019331f, 0.120638f, 0.006292f,
+        -0.106083f, -0.236290f, -0.140933f, -0.088067f, -0.025138f, -0.208395f,
+        -0.025502f, 0.144192f, -0.048353f, -0.106144f, -0.305121f, -0.114147f,
+        0.090963f, 0.327727f, 0.035606f, -0.093779f, 0.002651f, -0.171081f,
+        -0.188131f, -0.216571f, -0.209101f, -0.054402f, 0.157147f, -0.057127f,
+        0.066584f, 0.008988f, 0.041191f, 0.034456f, -0.078255f, 0.052099f,
+        -0.022239f, 0.066981f, -0.117520f, -0.072637f, 0.062512f, 0.037570f,
+        -0.057544f, -0.312359f, 0.034357f, -0.031549f, 0.002566f, -0.207375f,
+        -0.070654f, -0.018786f, -0.044815f, -0.012814f, -0.076320f, 0.078183f,
+        0.023877f, 0.117078f, 0.022292f, -0.205424f, -0.060430f, -0.017296f,
+        -0.004827f, -0.321036f, -0.092155f, 0.038837f, 0.073190f, -0.067513f,
+        0.026521f, 0.171945f, 0.087318f, 0.034495f, -0.034089f, 0.154410f,
+        -0.061431f, 0.007435f, -0.111094f, -0.095976f, 0.014741f, -0.132324f,
+        -0.029517f, -0.192160f, 0.098667f, 0.020762f, 0.177050f, -0.064510f,
+        -0.054437f, -0.058678f, -0.001858f, 0.167602f, 0.015735f, 0.054338f,
+        0.016477f, 0.186381f, -0.010667f, 0.054692f, 0.126742f, 0.013140f,
+        0.090353f, -0.133608f, -0.018017f, -0.152619f, 0.027600f, -0.138700f,
+        -0.050274f, 0.045141f, -0.118731f, 0.094797f, -0.167605f, 0.097461f,
+        -0.009131f, 0.199920f, -0.052976f, 0.158194f, 0.178568f, -0.107600f,
+        0.009671f, -0.084072f, -0.040258f, -0.205673f, 0.102891f, 0.223511f,
+        0.042699f, 0.118548f, -0.021274f, 0.110997f, -0.155121f, 0.027696f,
+        -0.149968f, 0.051552f, -0.129219f, 0.173524f, 0.073972f, -0.189045f,
+        -0.034523f, -0.106655f, -0.011843f, -0.197381f, 0.219413f, 0.183197f,
+        -0.054920f, 0.144955f, 0.036517f, -0.085412f, -0.229070f, -0.143710f,
+        -0.049486f, 0.156634f, -0.008673f, -0.064778f, 0.082344f, 0.145673f,
+        0.002912f, -0.210121f, -0.116564f, 0.078425f, 0.220908f, -0.067594f,
+        0.048610f, 0.084912f, -0.066202f, -0.112515f, -0.217767f, -0.082640f,
+        -0.017414f, 0.230265f, -0.070735f, 0.066073f, 0.215256f, 0.071157f,
+        -0.087220f, -0.202235f, -0.011918f, 0.099562f, 0.174716f, -0.063845f,
+        -0.121055f, 0.014367f, 0.132709f, -0.005060f, -0.244606f, -0.179693f,
+        -0.134690f, 0.023239f, -0.193116f, -0.076975f, -0.021164f, -0.001938f,
+        -0.163799f, -0.111437f, -0.210362f, -0.166376f, 0.034754f, 0.010036f,
+        -0.021917f, 0.068014f, -0.086893f, -0.251746f, -0.267171f, 0.037383f,
+        0.003966f, 0.033571f, -0.151506f, 0.025437f, -0.020626f, -0.308454f,
+        -0.343143f, -0.092263f, -0.026261f, -0.028345f, 0.036036f, 0.035169f,
+        0.129470f, 0.122205f, 0.015661f, -0.070612f, -0.094333f, -0.066055f,
+        -0.041083f, 0.159146f, 0.073184f, 0.110044f, 0.174471f, 0.078069f,
+        -0.014881f, 0.008116f, 0.013209f, 0.075857f, 0.195605f, 0.062714f,
+        0.067955f, 0.056544f, -0.153908f, -0.141749f, -0.072550f, 0.033523f,
+        -0.024665f, 0.134487f, 0.079076f, 0.133562f, 0.227130f, 0.018054f,
+        0.004928f, 0.169162f, 0.065152f, 0.072160f, 0.131631f, 0.096303f,
+        0.054288f, 0.106256f, 0.114632f, 0.119038f, 0.515200f, 0.247429f,
+        0.199134f, 0.211957f, 0.127558f, -0.294684f, -0.194890f, -0.049988f,
+        -0.112247f, -0.008122f, -0.006176f, 0.037035f, -0.110881f, -0.249989f,
+        0.152434f, 0.234621f, 0.153340f, 0.349283f, 0.683049f, 0.157174f,
+        0.124844f, 0.099136f, 0.064407f, -0.248400f, -0.155323f, -0.026498f,
+        -0.023450f, 0.049051f, -0.114187f, 0.007195f, -0.176825f, -0.376926f,
+        0.366159f, -0.179938f, -0.148508f, 0.006043f, 0.170048f, 0.097866f,
+        -0.102658f, -0.260430f, 0.248868f, 0.037019f, -0.118111f, 0.078176f,
+        0.194171f, 0.211328f, 0.368612f, 0.361213f, 0.130013f, 0.094650f,
+        0.227396f, -0.178058f, -0.114782f, -0.008093f, 0.231080f, -0.011843f,
+        -0.097917f, -0.325788f, 0.141879f, 0.119738f, -0.230427f, -0.117419f,
+        -0.114153f, 0.037903f, 0.116383f, 0.218773f, -0.101884f, 0.059466f,
+        0.119255f, 0.010874f, -0.031449f, 0.045996f, 0.119931f, 0.273760f,
+        0.311700f, 0.261794f, 0.194809f, 0.339829f, 0.239449f, 0.064140f,
+        0.077597f, 0.098996f, 0.143534f, 0.184602f, 0.037507f, 0.225494f,
+        0.096142f, -0.147370f, -0.207833f, -0.174742f, -0.086391f, -0.038942f,
+        0.159577f, -0.088492f, -0.000989f, 0.108154f, -0.025890f, -0.072713f,
+        0.025997f, -0.006803f, -0.086879f, -0.011290f, -0.269200f, -0.103450f,
+        -0.124910f, -0.116340f, 0.141459f, 0.208800f, 0.042268f, 0.265034f,
+        0.516474f, 0.217591f, -0.018843f, -0.313328f, -0.168363f, 0.047129f,
+        0.090480f, -0.109852f, -0.018761f, 0.210669f, 0.281269f, -0.043591f,
+        -0.034147f, -0.237772f, -0.134843f, -0.072481f, -0.103831f, 0.038355f,
+        0.308619f, 0.148023f, -0.045867f, -0.123950f, -0.210860f, -0.064973f,
+        -0.036308f, -0.046731f, -0.022099f, 0.095776f, 0.409423f, 0.060635f,
+        -0.065196f, 0.051828f, 0.027981f, -0.009609f, -0.137681f, -0.095011f,
+        -0.019045f, 0.177278f, 0.009759f, -0.092119f, -0.016958f, -0.133860f,
+        -0.118421f, -0.032039f, -0.006214f, -0.084541f, 0.063971f, -0.073642f,
+        0.165676f, 0.110443f, 0.044131f, 0.046568f, 0.053292f, -0.055466f,
+        0.015512f, 0.371947f, 0.232102f, -0.016923f, 0.103979f, -0.091758f,
+        0.005907f, 0.209100f, 0.157433f, 0.030518f, 0.250366f, 0.062322f,
+        0.036720f, 0.094676f, 0.017306f, -0.010328f, -0.079012f, 0.016781f,
+        -0.112435f, 0.061795f, 0.042543f, -0.126799f, -0.009975f, -0.056760f,
+        0.046424f, -0.194712f, -0.139399f, -0.037731f, 0.157989f, -0.016261f,
+        0.123345f, 0.230563f, 0.083300f, -0.016392f, 0.059567f, -0.016035f,
+        -0.064767f, 0.231945f, 0.156629f, 0.034602f, 0.145628f, 0.041315f,
+        0.034535f, 0.019967f, -0.089188f, -0.012091f, 0.307857f, 0.211405f,
+        -0.025091f, -0.148249f, -0.129384f, 0.063536f, -0.068603f, -0.067941f,
+        -0.035104f, 0.210832f, 0.063810f, 0.062764f, -0.089889f, -0.030554f,
+        0.014791f, -0.053362f, -0.037818f, -0.196640f, 0.008388f, -0.082654f,
+        0.143056f, 0.064221f, 0.069795f, 0.191040f, 0.097321f, -0.028679f,
+        0.075794f, 0.313154f, 0.086240f, 0.207643f, 0.017809f, 0.122867f,
+        0.224586f, 0.167403f, -0.023884f, 0.047434f, 0.344091f, 0.187745f,
+        0.136177f, 0.141738f, 0.063799f, 0.045233f, -0.077342f, -0.003525f,
+        -0.165041f, -0.025616f, -0.073745f, 0.164439f, 0.011200f, -0.145896f,
+        -0.027954f, -0.061987f, -0.039874f, -0.142775f, 0.151042f, -0.038238f,
+        0.053152f, 0.078615f, 0.086061f, 0.100593f, 0.128046f, -0.071006f,
+        -0.116558f, 0.208445f, 0.051086f, 0.076843f, 0.023191f, -0.084781f,
+        -0.011790f, 0.147807f, -0.048554f, -0.113932f, 0.283322f, 0.190934f,
+        0.092789f, 0.033018f, -0.142428f, -0.142480f, -0.099023f, -0.041020f,
+        -0.042760f, 0.203295f, -0.053475f, 0.042424f, 0.222839f, -0.019167f,
+        -0.133176f, -0.276216f, -0.031998f, 0.117290f, 0.177827f, -0.059973f,
+        -0.064744f, -0.117040f, -0.155482f, -0.099531f, 0.164121f, -0.026682f,
+        -0.093810f, 0.238993f, -0.006506f, 0.007830f, 0.065819f, -0.203643f,
+        -0.100925f, -0.053652f, -0.130770f, 0.026277f, 0.131796f, 0.032742f,
+        0.127186f, 0.116694f, -0.161122f, -0.279773f, -0.252515f, -0.002638f,
+        0.042812f, 0.096776f, -0.123280f, 0.064858f, -0.010455f, -0.219760f,
+        -0.239331f, -0.104363f, -0.058022f, -0.053584f, 0.025611f, 0.005129f,
+        -0.100418f, -0.045712f, -0.194418f, -0.126366f, -0.030530f, 0.051168f,
+        0.215959f, 0.172402f, -0.054700f, -0.185995f, -0.278360f, -0.193693f,
+        -0.040309f, 0.003735f, -0.007770f, 0.123556f, 0.190179f, -0.077315f,
+        0.117403f, 0.212942f, 0.012160f, 0.000113f, 0.027331f, 0.040202f,
+        0.033293f, 0.219438f, 0.184174f, 0.259349f, 0.311206f, 0.082547f,
+        -0.047875f, -0.078417f, 0.010746f, 0.082620f, 0.311931f, 0.307605f,
+        0.003863f, 0.021405f, -0.026388f, -0.019572f, 0.020582f, -0.059353f,
+        0.025199f, 0.261319f, 0.086316f, 0.143614f, 0.107780f, 0.003900f,
+        -0.188397f, -0.038563f, -0.106045f, -0.125154f, -0.010509f, 0.054021f,
+        0.242130f, 0.279152f, 0.215546f, 0.346995f, 0.440856f, 0.237452f,
+        0.234154f, 0.301646f, 0.168929f, -0.208358f, -0.126848f, 0.010260f,
+        0.121018f, -0.062975f, -0.052848f, 0.050341f, -0.061103f, -0.266482f,
+        0.107186f, 0.140221f, 0.280065f, 0.287889f, 0.373198f, 0.151596f,
+        0.013593f, 0.115616f, 0.014616f, -0.281710f, -0.237597f, -0.117305f,
+        -0.000034f, -0.136739f, -0.196275f, -0.095225f, -0.125310f, -0.250514f,
+        0.236804f, -0.071805f, -0.037421f, 0.048230f, 0.321596f, 0.063632f,
+        0.024039f, -0.029133f, 0.230983f, 0.160593f, -0.154355f, -0.013086f,
+        -0.079929f, 0.094692f, 0.160391f, 0.180239f, 0.053895f, 0.100759f,
+        0.288631f, 0.038191f, 0.181692f, 0.229682f, 0.440166f, 0.063401f,
+        0.006273f, 0.020865f, 0.338695f, 0.256244f, -0.043927f, 0.115617f,
+        0.003296f, 0.173965f, 0.021318f, -0.040936f, -0.118932f, 0.182380f,
+        0.235922f, -0.053233f, -0.015053f, -0.101057f, 0.095341f, 0.051111f,
+        0.161831f, 0.032614f, 0.159496f, 0.072375f, 0.025089f, 0.023748f,
+        0.029151f, 0.161284f, -0.117717f, -0.036191f, -0.176822f, -0.162006f,
+        0.226542f, -0.078329f, 0.043079f, -0.119172f, 0.054614f, -0.101365f,
+        -0.064541f, -0.115304f, 0.135170f, 0.298872f, 0.098060f, 0.089428f,
+        -0.007497f, 0.110391f, -0.028824f, 0.020835f, -0.036804f, 0.125411f,
+        0.192105f, -0.048931f, 0.003086f, -0.010681f, 0.074698f, -0.016263f,
+        0.096063f, 0.060267f, -0.007277f, 0.139139f, -0.080635f, 0.036628f,
+        0.086058f, 0.131979f, 0.085707f, 0.025301f, 0.226094f, 0.194759f,
+        0.042193f, -0.157846f, -0.068402f, -0.141450f, -0.112659f, -0.076305f,
+        -0.069085f, -0.114332f, -0.102005f, 0.132193f, -0.067042f, 0.106643f,
+        0.198964f, 0.171616f, 0.167237f, -0.033730f, -0.026755f, 0.083621f,
+        0.149459f, -0.002799f, -0.000318f, 0.011753f, 0.065889f, -0.089375f,
+        -0.049610f, 0.224579f, 0.216548f, -0.034908f, -0.017851f, -0.088144f,
+        0.007530f, 0.240268f, 0.073270f, 0.013263f, 0.175323f, 0.012082f,
+        0.093993f, 0.015282f, 0.105854f, 0.107990f, 0.077798f, -0.096166f,
+        -0.079607f, 0.177820f, 0.142392f, 0.033337f, -0.078100f, -0.081616f,
+        -0.046993f, 0.139459f, 0.020272f, -0.123161f, 0.175269f, 0.105217f,
+        0.057328f, 0.080909f, -0.012612f, -0.097081f, 0.082060f, -0.096716f,
+        -0.063921f, 0.201884f, 0.128166f, -0.035051f, -0.032227f, -0.068139f,
+        -0.115915f, 0.095080f, -0.086007f, -0.067543f, 0.030776f, 0.032712f,
+        0.088937f, 0.054336f, -0.039329f, -0.114022f, 0.171672f, -0.112321f,
+        -0.217646f, 0.065186f, 0.060223f, 0.192174f, 0.055580f, -0.131107f,
+        -0.144338f, 0.056730f, -0.034707f, -0.081616f, -0.135298f, -0.000614f,
+        0.087189f, 0.014614f, 0.067709f, 0.107689f, 0.225780f, 0.084361f,
+        -0.008544f, 0.051649f, -0.048369f, -0.037739f, -0.060710f, 0.002654f,
+        0.016935f, 0.085563f, -0.015961f, -0.019265f, 0.111788f, 0.062376f,
+        0.202019f, 0.047713f, 0.042261f, 0.069716f, 0.242913f, 0.021052f,
+        -0.072812f, -0.155920f, -0.026436f, 0.035621f, -0.079300f, -0.028787f,
+        -0.048329f, 0.084718f, -0.060565f, -0.083750f, -0.164075f, -0.040742f,
+        -0.086219f, 0.015271f, -0.005204f, -0.016038f, 0.045816f, -0.050433f,
+        -0.077652f, 0.117109f, 0.009611f, -0.009045f, -0.008634f, -0.055373f,
+        -0.085968f, 0.028527f, -0.054736f, -0.168089f, 0.175839f, 0.071205f,
+        -0.023603f, 0.037907f, -0.004561f, -0.022634f, 0.123831f, 0.094469f,
+        -0.072920f, -0.133642f, -0.014032f, -0.142754f, -0.026999f, -0.199409f,
+        0.013268f, 0.226989f, 0.048650f, -0.170988f, -0.050141f, 0.007880f,
+        0.061880f, 0.019078f, -0.043578f, -0.038139f, 0.134814f, 0.054097f,
+        -0.081670f, 0.176838f, 0.047920f, -0.038176f, 0.050406f, -0.107181f,
+        -0.036279f, 0.027060f, 0.081594f, -0.002820f, 0.090507f, -0.033338f,
+        -0.059571f, 0.013404f, -0.099860f, 0.073371f, 0.342805f, 0.098305f,
+        -0.150910f, -0.020822f, -0.056960f, 0.046262f, -0.043413f, -0.149405f,
+        -0.129105f, -0.010899f, -0.014229f, -0.179949f, -0.113044f, -0.049468f,
+        -0.065513f, 0.090269f, -0.011919f, 0.087846f, 0.095796f, 0.146127f,
+        0.101599f, 0.078066f, -0.084348f, -0.100002f, -0.020134f, -0.050169f,
+        0.062122f, 0.014640f, 0.019143f, 0.036543f, 0.180924f, -0.013976f,
+        -0.066768f, -0.001090f, -0.070419f, -0.004839f, -0.001504f, 0.034483f,
+        -0.044954f, -0.050336f, -0.088638f, -0.174782f, -0.116082f, -0.205507f,
+        0.015587f, -0.042839f, -0.096879f, -0.144097f, -0.050268f, -0.196796f,
+        0.109639f, 0.271411f, 0.173732f, 0.108070f, 0.156437f, 0.124255f,
+        0.097242f, 0.238693f, 0.083941f, 0.109105f, 0.223940f, 0.267188f,
+        0.027385f, 0.025819f, 0.125070f, 0.093738f, 0.040353f, 0.038645f,
+        -0.012730f, 0.144063f, 0.052931f, -0.009138f, 0.084193f, 0.160272f,
+        -0.041366f, 0.011951f, -0.121446f, -0.106713f, -0.047566f, 0.047984f,
+        -0.255224f, -0.076116f, 0.098685f, -0.150845f, -0.171513f, -0.156590f,
+        0.058331f, 0.187493f, 0.413018f, 0.554265f, 0.372242f, 0.237943f,
+        0.124571f, 0.110829f, 0.010322f, -0.174477f, -0.067627f, -0.001979f,
+        0.142913f, 0.040597f, 0.019907f, 0.025963f, -0.043585f, -0.120732f,
+        0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f,
+        -0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f,
+        -0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f,
+        -9.063785f };
+    return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
+}
+
+
+
+
+std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
+{
+    static const float detector[] = {
+       0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
+       0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
+       0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
+       0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
+       -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
+       -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
+       -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
+       0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
+       0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
+       0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
+       0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
+       0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,
+       5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f,
+       0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f,
+       0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f,
+       0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f,
+       0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f,
+       0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f,
+       0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f,
+       -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f,
+       -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f,
+       -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f,
+       0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f,
+       0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f,
+       -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f,
+       0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f,
+       -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f,
+       0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f,
+       0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f,
+       -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f,
+       -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f,
+       -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f,
+       0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f,
+       -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f,
+       0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f,
+       0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f,
+       -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f,
+       0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f,
+       3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f,
+       -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f,
+       -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f,
+       -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f,
+       -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f,
+       -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f,
+       -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f,
+       -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f,
+       0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f,
+       0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f,
+       0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f,
+       0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f,
+       0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f,
+       8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f,
+       -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f,
+       -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f,
+       -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f,
+       0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f,
+       -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f,
+       -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f,
+       0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f,
+       -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f,
+       -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f,
+       0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f,
+       0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f,
+       2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f,
+       0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f,
+       -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f,
+       6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f,
+       0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f,
+       -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f,
+       -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f,
+       -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f,
+       1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f,
+       -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f,
+       -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f,
+       -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f,
+       0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f,
+       0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f,
+       0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f,
+       -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f,
+       -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f,
+       -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f,
+       0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f,
+       -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f,
+       8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f,
+       -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f,
+       -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f,
+       -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f,
+       9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f,
+       0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f,
+       -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f,
+       2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f,
+       2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f,
+       -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f,
+       0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f,
+       -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f,
+       8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f,
+       -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f,
+       -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f,
+       8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f,
+       0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f,
+       0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f,
+       0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f,
+       -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f,
+       0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f,
+       0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f,
+       0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f,
+       -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f,
+       -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f,
+       0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f,
+       0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f,
+       -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f,
+       1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f,
+       0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f,
+       -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f,
+       -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f,
+       -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f,
+       0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f,
+       -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f,
+       0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f,
+       -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f,
+       0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f,
+       0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f,
+       -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f,
+       -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f,
+       0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f,
+       9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f,
+       -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f,
+       -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f,
+       -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f,
+       -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f,
+       0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f,
+       -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f,
+       7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f,
+       -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f,
+       7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f,
+       0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f,
+       0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f,
+       0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f,
+       -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f,
+       -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f,
+       0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f,
+       -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f,
+       0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f,
+       0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f,
+       -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f,
+       0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f,
+       -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f,
+       0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f,
+       -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f,
+       -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f,
+       -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f,
+       -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f,
+       -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f,
+       0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f,
+       0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f,
+       -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f,
+       0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f,
+       0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f,
+       0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f,
+       -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f,
+       -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f,
+       0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f,
+       -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f,
+       8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f,
+       -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f,
+       -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f,
+       -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f,
+       -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f,
+       0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f,
+       0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f,
+       0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f,
+       0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f,
+       0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f,
+       0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f,
+       -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f,
+       0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f,
+       0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f,
+       0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f,
+       0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f,
+       -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f,
+       0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f,
+       4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f,
+       -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f,
+       -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f,
+       0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f,
+       0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f,
+       0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f,
+       0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f,
+       -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f,
+       0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f,
+       -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f,
+       -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f,
+       -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f,
+       0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f,
+       -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f,
+       -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f,
+       -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f,
+       -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f,
+       -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f,
+       8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f,
+       -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f,
+       -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f,
+       -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f,
+       3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f,
+       -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f,
+       -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f,
+       -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f,
+       0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f,
+       1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f,
+       0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f,
+       -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f,
+       -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f,
+       -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f,
+       -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f,
+       0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f,
+       -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f,
+       0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f,
+       -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f,
+       -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f,
+       -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f,
+       0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f,
+       -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f,
+       0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f,
+       0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f,
+       -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f,
+       -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f,
+       0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f,
+       0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f,
+       0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f,
+       -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f,
+       -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f,
+       0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f,
+       -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f,
+       2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f,
+       0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f,
+       -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f,
+       0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f,
+       0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f,
+       0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f,
+       0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f,
+       0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f,
+       -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f,
+       -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f,
+       0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f,
+       -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f,
+       0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f,
+       -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f,
+       0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f,
+       -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f,
+       -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f,
+       0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f,
+       5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f,
+       0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f,
+       -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f,
+       0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f,
+       0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f,
+       0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f,
+       0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f,
+       0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f,
+       -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f,
+       -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f,
+       -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f,
+       -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f,
+       0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f,
+       -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f,
+       0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f,
+       7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f,
+       8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f,
+       -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f,
+       -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f,
+       -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f,
+       -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f,
+       -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f,
+       0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f,
+       0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f,
+       0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f,
+       -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f,
+       5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f,
+       -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f,
+       -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f,
+       -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f,
+       0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f,
+       0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f,
+       0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f,
+       0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f,
+       -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f,
+       0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f,
+       0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f,
+       0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f,
+       0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f,
+       -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f,
+       -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f,
+       0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f,
+       -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f,
+       -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f,
+       -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f,
+       0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f,
+       -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f,
+       -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f,
+       -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f,
+       -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f,
+       0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f,
+       0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f,
+       -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f,
+       -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f,
+       -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f,
+       -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f,
+       0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f,
+       -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f,
+       -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f,
+       -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f,
+       0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f,
+       -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f,
+       0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f,
+       -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f,
+       0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f,
+       6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f,
+       -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f,
+       -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f,
+       0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f,
+       -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f,
+       -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f,
+       -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f,
+       0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f,
+       -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f,
+       8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f,
+       -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f,
+       -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f,
+       -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f,
+       0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f,
+       0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f,
+       0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f,
+       -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f,
+       -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f,
+       -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f,
+       -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f,
+       0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f,
+       0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f,
+       -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f,
+       -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f,
+       0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f,
+       0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f,
+       -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f,
+       -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f,
+       -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f,
+       7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f,
+       -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f,
+       -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f,
+       -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f,
+       -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f,
+       0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f,
+       0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f,
+       6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f,
+       0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f,
+       0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f,
+       -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f,
+       0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f,
+       0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f,
+       0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f,
+       0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f,
+       -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f,
+       0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f,
+       -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f,
+       2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f,
+       0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f,
+       0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f,
+       0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f,
+       0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f,
+       0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f,
+       0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f,
+       0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f,
+       -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f,
+       -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f,
+       -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f,
+       0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f,
+       -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f,
+       -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f,
+       -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f,
+       8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f,
+       -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f,
+       -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f,
+       0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f,
+       -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f,
+       -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f,
+       -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f,
+       -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f,
+       0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f,
+       0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f,
+       0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f,
+       -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f,
+       0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f,
+       6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f,
+       -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f,
+       0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f,
+       -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f,
+       0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f,
+       0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f,
+       -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f,
+       0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f,
+       -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f,
+       0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f,
+       -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f,
+       0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f,
+       3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f,
+       -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f,
+       0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f,
+       -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f,
+       6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f,
+       -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f,
+       1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f,
+       0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f,
+       -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f,
+       1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f,
+       0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f,
+       0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f,
+       -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f,
+       -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f,
+       0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f,
+       -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f,
+       0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f,
+       8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f,
+       0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f,
+       -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f,
+       0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f,
+       -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f,
+       0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f,
+       -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f,
+       -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f,
+       0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f,
+       -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f,
+       0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f,
+       -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f,
+       0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f,
+       -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f,
+       -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f,
+       -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f,
+       -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f,
+       -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f,
+       8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f,
+       0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f,
+       0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f,
+       0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f,
+       0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f,
+       0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f,
+       0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f,
+       0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f,
+       0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f,
+       -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f,
+       -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f,
+       0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f,
+       0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f,
+       0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f,
+       0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f,
+       0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f,
+       -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f,
+       0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f,
+       2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f,
+       0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f,
+       -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f,
+       -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f,
+       7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f,
+       -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f,
+       -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f,
+       -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f,
+       -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f,
+       -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f,
+       -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f,
+       0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f,
+       -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f,
+       0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f,
+       0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f,
+       0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f,
+       -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f,
+       -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f,
+       -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f,
+       0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f,
+       -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f,
+       0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f,
+       0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f,
+       0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f,
+       -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f,
+       8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f,
+       0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f,
+       0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f,
+       0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f,
+       -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f,
+       0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f,
+       -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f,
+       0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f,
+       -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f,
+       0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f,
+       -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f,
+       0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f,
+       -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f,
+       -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f,
+       -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f,
+       0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f,
+       -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f,
+       0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f,
+       -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f,
+       0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f,
+       -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f,
+       -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f,
+       -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f,
+       0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f,
+       0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f,
+       0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f,
+       -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f,
+       -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f,
+       0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f,
+       0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f,
+       0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f,
+       -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f,
+       0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f,
+       -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f,
+       4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f,
+       -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f,
+       9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f,
+       -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f,
+       0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f,
+       -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f,
+       -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f,
+       0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f,
+       0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f,
+       -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f,
+       -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f,
+       0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f,
+       5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f,
+       0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f,
+       -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f,
+       -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f,
+       -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f,
+       -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f,
+       0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f,
+       -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f,
+       0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f,
+       -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f,
+       0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f,
+       -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f,
+       0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f,
+       -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f,
+       -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f,
+       -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f,
+       0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f,
+       -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f,
+       -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f,
+       0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f,
+       -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f,
+       -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f,
+       -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f,
+       0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f,
+       0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f,
+       0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f,
+       0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f,
+       -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f,
+       3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f,
+       0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f,
+       -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f,
+       0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f,
+       0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f,
+       -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f,
+       -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f,
+       0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f,
+       0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f,
+       -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f,
+       -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f,
+       -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f,
+       -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f,
+       -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f,
+       -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f,
+       -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f,
+       0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f,
+       0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f,
+       -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f,
+       0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f,
+       -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f,
+       0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f,
+       -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f,
+       -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f,
+       -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f,
+       0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f,
+       0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f,
+       0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f,
+       -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f,
+       -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f,
+       0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f,
+       0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f,
+       0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f,
+       -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f,
+       0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f,
+       0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f,
+       0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f,
+       -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f,
+       0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f,
+       0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f,
+       0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f,
+       -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f,
+       -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f,
+       0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f,
+       -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f,
+       0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f,
+       -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f,
+       -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f,
+       -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f,
+       -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f,
+       0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f,
+       0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f,
+       -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f,
+       0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f,
+       7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f,
+       0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f,
+       -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f,
+       0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f,
+       0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f,
+       -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f,
+       0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f,
+       0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f,
+       -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f,
+       -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f,
+       0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f,
+       1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f,
+       0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f,
+       5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f,
+       4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f,
+       0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f,
+       0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f,
+       -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f,
+       -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f,
+       0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f,
+       0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f,
+       0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f,
+       0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f,
+       0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f,
+       0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f,
+       -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f,
+       0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f,
+       0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f,
+       0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f,
+       -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f,
+       4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f,
+       -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f,
+       -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f,
+       6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f,
+       -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f,
+       0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f,
+       0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f,
+       0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f,
+       -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f,
+       -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f,
+       -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f,
+       -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f,
+       0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f,
+       -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f,
+       0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f,
+       2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f,
+       -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f,
+       -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f,
+       -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f,
+       2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f,
+       0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f,
+       0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f,
+       0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f,
+       -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f,
+       -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f,
+       -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f,
+       -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f,
+       0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f,
+       0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f,
+       0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f,
+       -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f,
+       -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f,
+       -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f,
+       -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f,
+       -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f,
+       0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f,
+       -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f,
+       -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f,
+       0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f,
+       -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f,
+       0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f,
+       -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f,
+       0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f,
+       1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f,
+       -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f,
+       -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f,
+       -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f,
+       -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f,
+       0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f,
+       0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f,
+       0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f,
+       0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f,
+       0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f,
+       -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f,
+       -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f,
+       0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f,
+       -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f,
+       0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f,
+       -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f,
+       0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f,
+       0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f,
+       3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f,
+       0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f,
+       0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f,
+       -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f,
+       -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f,
+       0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f,
+       0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f,
+       0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f,
+       -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f,
+       0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f,
+       0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f,
+       6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f,
+       -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f,
+       0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f,
+       -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f,
+       0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f,
+       0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f,
+       9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f,
+       -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f,
+       0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f,
+       -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f,
+       0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f,
+       -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f,
+       5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f,
+       -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f,
+       -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f,
+       -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f,
+       0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f,
+       -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f,
+       0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f,
+       -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f,
+       0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f,
+       -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f,
+       -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f,
+       0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f,
+       0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f,
+       8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f,
+       -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f,
+       -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f,
+       0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f,
+       -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f,
+       7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f,
+       0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f,
+       0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f,
+       -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f,
+       0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f,
+       0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f,
+       -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f,
+       -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f,
+       0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f,
+       8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f,
+       -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f,
+       0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f,
+       -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f,
+       0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f,
+       -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f,
+       0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f,
+       0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f,
+       -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f,
+       0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f,
+       -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f,
+       0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f,
+       0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f,
+       0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f,
+       -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f,
+       -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f,
+       0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f,
+       -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f,
+       -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f,
+       -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f,
+       -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f,
+       -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f,
+       -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f,
+       -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f,
+       0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f,
+       -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f,
+       -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f,
+       -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f,
+       0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f,
+       -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f,
+       -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f,
+       -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f,
+       0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f,
+       0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f,
+       0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f,
+       -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f,
+       -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f,
+       -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f,
+       -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f,
+       -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f,
+       2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f,
+       -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f,
+       0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f,
+       0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f,
+       0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f,
+       -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f,
+       4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f,
+       0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f,
+       0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f,
+       -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f,
+       -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f,
+       -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f,
+       -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
+       -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
+       -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f };
+    return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
+}
+
+/* Returns the nearest upper power of two, works only for
+the typical GPU thread count (pert block) values */
+static int power_2up(unsigned int n)
+{
+    if (n < 1) return 1;
+    else if (n < 2) return 2;
+    else if (n < 4) return 4;
+    else if (n < 8) return 8;
+    else if (n < 16) return 16;
+    else if (n < 32) return 32;
+    else if (n < 64) return 64;
+    else if (n < 128) return 128;
+    else if (n < 256) return 256;
+    else if (n < 512) return 512;
+    else if (n < 1024) return 1024;
+    return -1; // Input is too big
+}
+
+void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y,
+                                            int nblocks_win_x, int nblocks_win_y)
+{
+    cnbins = nbins;
+    cblock_stride_x = block_stride_x;
+    cblock_stride_y = block_stride_y;
+    cnblocks_win_x = nblocks_win_x;
+    cnblocks_win_y = nblocks_win_y;
+
+    int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
+    cblock_hist_size = block_hist_size;
+
+    int block_hist_size_2up = power_2up(block_hist_size);
+    cblock_hist_size_2up = block_hist_size_2up;
+
+    int descr_width = nblocks_win_x * block_hist_size;
+    cdescr_width = descr_width;
+
+    int descr_size = descr_width * nblocks_win_y;
+    cdescr_size = descr_size;
+}
+
+void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y,
+                                         int height, int width, const cv::ocl::oclMat& grad,
+                                         const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists)
+{
+    Context *clCxt = Context::getContext();
+	string kernelName = "compute_hists_kernel";
+	vector< pair<size_t, const void *> > args;
+
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
+
+    size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 };
+    size_t localThreads[3] = { 32, 2, 1 };
+
+    int grad_quadstep = grad.step >> 2;
+    int qangle_step = qangle.step;
+
+    // Precompute gaussian spatial window parameter
+    float scale = 1.f / (2.f * sigma * sigma);
+
+    int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
+    int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
+    int smem = hists_size + final_hists_size;
+
+    args.push_back( make_pair( sizeof(cl_int), (void *)&width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_x));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_y));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
+    args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+    args.push_back( make_pair( smem, (void *)NULL));
+
+    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y,
+                                           int height, int width, cv::ocl::oclMat& block_hists, float threshold)
+{
+    Context *clCxt = Context::getContext();
+	string kernelName = "normalize_hists_kernel";
+	vector< pair<size_t, const void *> > args;
+
+    int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
+    int nthreads = power_2up(block_hist_size);
+
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
+    size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 };
+    size_t localThreads[3] = { nthreads, 1, 1  };
+
+    if ((nthreads < 32) || (nthreads > 512) )
+        cv::ocl::error("normalize_hists: histogram's size is too small or too big", __FILE__, __LINE__, "normalize_hists");
+
+    args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+    args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
+    args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL));
+
+    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y,
+                                          int block_stride_x, int win_stride_y, int win_stride_x, int height,
+                                          int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef,
+                                          float threshold, cv::ocl::oclMat& labels)
+{
+    Context *clCxt = Context::getContext();
+	string kernelName = "classify_hists_kernel";
+	vector< pair<size_t, const void *> > args;
+
+    int win_block_stride_x = win_stride_x / block_stride_x;
+    int win_block_stride_y = win_stride_y / block_stride_y;
+    int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
+    int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+
+    size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
+    size_t localThreads[3] = { NTHREADS, 1, 1 };
+
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_win_width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&coefs.data));
+    args.push_back( make_pair( sizeof(cl_float), (void *)&free_coef));
+    args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data));
+
+    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
+                                                  int win_stride_y, int win_stride_x, int height, int width, 
+                                                  const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors)
+{
+    Context *clCxt = Context::getContext();
+	string kernelName = "extract_descrs_by_rows_kernel";
+	vector< pair<size_t, const void *> > args;
+
+    int win_block_stride_x = win_stride_x / block_stride_x;
+    int win_block_stride_y = win_stride_y / block_stride_y;
+    int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
+    int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+    int descriptors_quadstep = descriptors.step >> 2;
+
+    size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
+    size_t localThreads[3] = { NTHREADS, 1, 1 };
+
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+
+    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
+                                                  int win_stride_y, int win_stride_x, int height, int width, 
+                                                  const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors)
+{
+    Context *clCxt = Context::getContext();
+	string kernelName = "extract_descrs_by_cols_kernel";
+	vector< pair<size_t, const void *> > args;
+
+    int win_block_stride_x = win_stride_x / block_stride_x;
+    int win_block_stride_y = win_stride_y / block_stride_y;
+    int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
+    int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
+    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+    int descriptors_quadstep = descriptors.step >> 2;
+
+    size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
+    size_t localThreads[3] = { NTHREADS, 1, 1 };
+
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cnblocks_win_x));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cnblocks_win_y));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+
+    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+static inline int divUp(int total, int grain)
+{
+    return (total + grain - 1) / grain;
+}
+
+void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img, 
+                                                  float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma)
+{
+    Context *clCxt = Context::getContext();
+	string kernelName = "compute_gradients_8UC1_kernel";
+	vector< pair<size_t, const void *> > args;
+
+    size_t localThreads[3] = { NTHREADS, 1, 1 };
+    size_t globalThreads[3] = { width, height, 1 };
+    char correctGamma = (correct_gamma) ? 1 : 0;
+    int img_step = img.step;
+    int grad_quadstep = grad.step >> 3;
+    int qangle_step = qangle.step >> 1;
+
+    args.push_back( make_pair( sizeof(cl_int), (void *)&height));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
+    args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale));
+    args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
+
+    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
+                                                  float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma)
+{
+    Context *clCxt = Context::getContext();
+	string kernelName = "compute_gradients_8UC4_kernel";
+	vector< pair<size_t, const void *> > args;
+
+    size_t localThreads[3] = { NTHREADS, 1, 1 };
+    size_t globalThreads[3] = { width, height, 1 };
+ 
+    char correctGamma = (correct_gamma) ? 1 : 0;
+    int img_step = img.step >> 2;
+    int grad_quadstep = grad.step >> 3;
+    int qangle_step = qangle.step >> 1;
+
+    args.push_back( make_pair( sizeof(cl_int), (void *)&height));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&width));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
+    args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale));
+    args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
+
+    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+#endif
diff --git a/modules/ocl/src/kernels/blend_linear.cl b/modules/ocl/src/kernels/blend_linear.cl
new file mode 100644
index 0000000000..bf733576c0
--- /dev/null
+++ b/modules/ocl/src/kernels/blend_linear.cl
@@ -0,0 +1,196 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, MulticoreWare Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Liu Liujun, liujun@multicorewareinc.com 
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+__kernel void BlendLinear_C1_D0(
+	__global uchar *dst,
+	__global uchar *img1,
+	__global uchar *img2,
+	__global float *weight1,
+	__global float *weight2,
+	int rows,
+	int cols,
+	int istep,
+	int wstep
+	)
+{
+	int idx = get_global_id(0);
+	int idy = get_global_id(1);
+	if (idx < cols && idy < rows)
+	{
+		int pos = idy * istep + idx;
+		int wpos = idy * (wstep /sizeof(float)) + idx;
+		float w1 = weight1[wpos];
+		float w2 = weight2[wpos];
+		dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
+
+	}
+}
+
+__kernel void BlendLinear_C3_D0(
+	__global uchar *dst,
+	__global uchar *img1,
+	__global uchar *img2,
+	__global float *weight1,
+	__global float *weight2,
+	int rows,
+	int cols,
+	int istep,
+	int wstep
+	)
+{
+	int idx = get_global_id(0);
+	int idy = get_global_id(1);
+	int x = idx / 3;
+	int y = idy;
+	if (x < cols && y < rows)
+	{
+		int pos = idy * istep + idx;
+		int wpos = idy * (wstep /sizeof(float)) + x;
+		float w1 = weight1[wpos];
+		float w2 = weight2[wpos];
+		dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
+	}
+}
+
+__kernel void BlendLinear_C4_D0(
+	__global uchar *dst,
+	__global uchar *img1,
+	__global uchar *img2,
+	__global float *weight1,
+	__global float *weight2,
+	int rows,
+	int cols,
+	int istep,
+	int wstep
+	)
+{
+	int idx = get_global_id(0);
+	int idy = get_global_id(1);
+	int x = idx / 4;
+	int y = idy;
+	if (x < cols && y < rows)
+	{
+		int pos = idy * istep + idx;
+		int wpos = idy * (wstep /sizeof(float)) + x;
+		float w1 = weight1[wpos];
+		float w2 = weight2[wpos];
+		dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
+	}
+}
+
+__kernel void BlendLinear_C1_D5(
+	__global float *dst,
+	__global float *img1,
+	__global float *img2,
+	__global float *weight1,
+	__global float *weight2,
+	int rows,
+	int cols,
+	int istep,
+	int wstep
+	)
+{
+	int idx = get_global_id(0);
+	int idy = get_global_id(1);
+	if (idx < cols && idy < rows)
+	{
+		int pos = idy * (istep / sizeof(float)) + idx;
+		int wpos = idy * (wstep /sizeof(float)) + idx;
+		float w1 = weight1[wpos];
+		float w2 = weight2[wpos];
+		dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
+	}
+}
+
+__kernel void BlendLinear_C3_D5(
+	__global float *dst,
+	__global float *img1,
+	__global float *img2,
+	__global float *weight1,
+	__global float *weight2,
+	int rows,
+	int cols,
+	int istep,
+	int wstep
+	)
+{
+	int idx = get_global_id(0);
+	int idy = get_global_id(1);
+	int x = idx / 3;
+	int y = idy;
+	if (x < cols && y < rows)
+	{
+		int pos = idy * (istep / sizeof(float)) + idx;
+		int wpos = idy * (wstep /sizeof(float)) + x;
+		float w1 = weight1[wpos];
+		float w2 = weight2[wpos];
+		dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
+	}
+}
+
+__kernel void BlendLinear_C4_D5(
+	__global float *dst,
+	__global float *img1,
+	__global float *img2,
+	__global float *weight1,
+	__global float *weight2,
+	int rows,
+	int cols,
+	int istep,
+	int wstep
+	)
+{
+	int idx = get_global_id(0);
+	int idy = get_global_id(1);
+	int x = idx / 4;
+	int y = idy;
+	if (x < cols && y < rows)
+	{
+		int pos = idy * (istep / sizeof(float)) + idx;
+		int wpos = idy * (wstep /sizeof(float)) + x;
+		float w1 = weight1[wpos];
+		float w2 = weight2[wpos];
+		dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
+	}
+}
diff --git a/modules/ocl/src/kernels/imgproc_columnsum.cl b/modules/ocl/src/kernels/imgproc_columnsum.cl
new file mode 100644
index 0000000000..913b417d15
--- /dev/null
+++ b/modules/ocl/src/kernels/imgproc_columnsum.cl
@@ -0,0 +1,80 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Chunpeng Zhang chunpeng@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma OPENCL EXTENSION cl_amd_printf : enable
+#if defined (__ATI__)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+
+#elif defined (__NVIDIA__)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+
+////////////////////////////////////////////////////////////////////
+///////////////////////// columnSum ////////////////////////////////
+////////////////////////////////////////////////////////////////////
+/// CV_32FC1
+__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep)
+{
+	const int x = get_global_id(0);
+	
+	srcStep >>= 2;
+	dstStep >>= 2;
+
+	if (x < srcCols)
+    {
+		int srcIdx = x ;
+		int dstIdx = x ;
+
+        float sum = 0;
+		
+        for (int y = 0; y < srcRows; ++y)
+        {
+			sum += src[srcIdx];
+            dst[dstIdx] = sum;
+			srcIdx += srcStep;
+			dstIdx += dstStep;	
+        }
+	}
+}
diff --git a/modules/ocl/src/kernels/match_template.cl b/modules/ocl/src/kernels/match_template.cl
new file mode 100644
index 0000000000..4c5a4fc9ca
--- /dev/null
+++ b/modules/ocl/src/kernels/match_template.cl
@@ -0,0 +1,824 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma OPENCL EXTENSION cl_amd_printf : enable
+
+#if defined (__ATI__)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+
+#elif defined (__NVIDIA__)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+
+#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__))
+#define TYPE_IMAGE_SQSUM double
+#else
+#define TYPE_IMAGE_SQSUM ulong
+#endif
+
+//////////////////////////////////////////////////
+// utilities
+#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox)
+#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
+// normAcc* are accurate normalization routines which make GPU matchTemplate
+// consistent with CPU one
+float normAcc(float num, float denum)
+{
+	if(fabs(num) < denum)
+	{
+		return num / denum;
+	}
+	if(fabs(num) < denum * 1.125f)
+	{
+		return num > 0 ? 1 : -1;
+	}
+	return 0;
+}
+
+float normAcc_SQDIFF(float num, float denum)
+{
+	if(fabs(num) < denum)
+	{
+		return num / denum;
+	}
+	if(fabs(num) < denum * 1.125f)
+	{
+		return num > 0 ? 1 : -1;
+	}
+	return 1;
+}
+//////////////////////////////////////////////////////////////////////
+// normalize
+
+__kernel 
+void normalizeKernel_C1_D0
+(
+	__global const TYPE_IMAGE_SQSUM * img_sqsums,
+	__global float * res,
+	ulong tpl_sqsum,
+	int res_rows,
+	int res_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int img_sqsums_offset,
+	int img_sqsums_step,
+	int res_offset,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+	img_sqsums_step /= sizeof(*img_sqsums);
+	img_sqsums_offset /= sizeof(*img_sqsums);
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		float image_sqsum_ = (float)(
+			(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
+		res[res_idx] = normAcc(res[res_idx], sqrt(image_sqsum_ * tpl_sqsum));
+	}
+}
+
+__kernel 
+void matchTemplate_Prepared_SQDIFF_C1_D0
+(
+	__global const TYPE_IMAGE_SQSUM * img_sqsums,
+	__global float * res,
+	ulong tpl_sqsum,
+	int res_rows,
+	int res_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int img_sqsums_offset,
+	int img_sqsums_step,
+	int res_offset,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+	img_sqsums_step /= sizeof(*img_sqsums);
+	img_sqsums_offset /= sizeof(*img_sqsums);
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		float image_sqsum_ = (float)(
+			(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
+		res[res_idx] = image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum;
+	}
+}
+
+__kernel 
+void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
+(
+	__global const TYPE_IMAGE_SQSUM * img_sqsums,
+	__global float * res,
+	ulong tpl_sqsum,
+	int res_rows,
+	int res_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int img_sqsums_offset,
+	int img_sqsums_step,
+	int res_offset,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+	img_sqsums_step /= sizeof(*img_sqsums);
+	img_sqsums_offset /= sizeof(*img_sqsums);
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		float image_sqsum_ = (float)(
+			(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
+		res[res_idx] = normAcc_SQDIFF(image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum,
+										sqrt(image_sqsum_ * tpl_sqsum));
+	}
+}
+
+//////////////////////////////////////////////////
+// SQDIFF
+__kernel 
+void matchTemplate_Naive_SQDIFF_C1_D0
+(
+	__global const uchar * img,
+	__global const uchar * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	int delta;
+	int sum = 0;
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				delta = img_ptr[j] - tpl_ptr[j];
+				sum   = mad24(delta, delta, sum);
+			}
+		}
+		res[res_idx] = sum;
+	}
+}
+
+__kernel 
+void matchTemplate_Naive_SQDIFF_C1_D5
+(
+	__global const float * img,
+	__global const float * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	float delta;
+	float sum = 0;
+	img_step   /= sizeof(*img);
+	img_offset /= sizeof(*img);
+	tpl_step   /= sizeof(*tpl);
+	tpl_offset /= sizeof(*tpl);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				delta = img_ptr[j] - tpl_ptr[j];
+				sum   = mad(delta, delta, sum);
+			}
+		}
+		res[res_idx] = sum;
+	}
+}
+
+__kernel 
+void matchTemplate_Naive_SQDIFF_C4_D0
+(
+	__global const uchar4 * img,
+	__global const uchar4 * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	int4 delta;
+	int4 sum = (int4)(0, 0, 0, 0);
+	img_step   /= sizeof(*img);
+	img_offset /= sizeof(*img);
+	tpl_step   /= sizeof(*tpl);
+	tpl_offset /= sizeof(*tpl);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
+				delta.x = img_ptr[j].x - tpl_ptr[j].x;
+				delta.y = img_ptr[j].y - tpl_ptr[j].y;
+				delta.z = img_ptr[j].z - tpl_ptr[j].z;
+				delta.w = img_ptr[j].w - tpl_ptr[j].w;
+				sum   = mad24(delta, delta, sum);
+			}
+		}
+		res[res_idx] = sum.x + sum.y + sum.z + sum.w;
+	}
+}
+
+__kernel 
+void matchTemplate_Naive_SQDIFF_C4_D5
+(
+	__global const float4 * img,
+	__global const float4 * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	float4 delta;
+	float4 sum = (float4)(0, 0, 0, 0);
+	img_step   /= sizeof(*img);
+	img_offset /= sizeof(*img);
+	tpl_step   /= sizeof(*tpl);
+	tpl_offset /= sizeof(*tpl);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
+				delta.x = img_ptr[j].x - tpl_ptr[j].x;
+				delta.y = img_ptr[j].y - tpl_ptr[j].y;
+				delta.z = img_ptr[j].z - tpl_ptr[j].z;
+				delta.w = img_ptr[j].w - tpl_ptr[j].w;
+				sum   = mad(delta, delta, sum);
+			}
+		}
+		res[res_idx] = sum.x + sum.y + sum.z + sum.w;
+	}
+}
+
+//////////////////////////////////////////////////
+// CCORR
+__kernel 
+void matchTemplate_Naive_CCORR_C1_D0
+(
+	__global const uchar * img,
+	__global const uchar * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	int sum = 0;
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				sum = mad24(img_ptr[j], tpl_ptr[j], sum);
+			}
+		}
+		res[res_idx] = sum;
+	}
+}
+
+__kernel 
+void matchTemplate_Naive_CCORR_C1_D5
+(
+	__global const float * img,
+	__global const float * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	float sum = 0;
+	img_step   /= sizeof(*img);
+	img_offset /= sizeof(*img);
+	tpl_step   /= sizeof(*tpl);
+	tpl_offset /= sizeof(*tpl);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				sum = mad(img_ptr[j], tpl_ptr[j], sum);
+			}
+		}
+		res[res_idx] = sum;
+	}
+}
+
+__kernel 
+void matchTemplate_Naive_CCORR_C4_D0
+(
+	__global const uchar4 * img,
+	__global const uchar4 * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	int4 sum = (int4)(0, 0, 0, 0);
+	img_step   /= sizeof(*img);
+	img_offset /= sizeof(*img);
+	tpl_step   /= sizeof(*tpl);
+	tpl_offset /= sizeof(*tpl);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				sum   = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum);
+			}
+		}
+		res[res_idx] = sum.x + sum.y + sum.z + sum.w;
+	}
+}
+
+__kernel 
+void matchTemplate_Naive_CCORR_C4_D5
+(
+	__global const float4 * img,
+	__global const float4 * tpl,
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int img_offset,
+	int tpl_offset,
+	int res_offset,
+	int img_step,
+	int tpl_step,
+	int res_step
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int i,j;
+	float4 sum = (float4)(0, 0, 0, 0);
+	img_step   /= sizeof(*img);
+	img_offset /= sizeof(*img);
+	tpl_step   /= sizeof(*tpl);
+	tpl_offset /= sizeof(*tpl);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		for(i = 0; i < tpl_rows; i ++)
+		{
+			// get specific rows of img data
+			__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
+			__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
+			for(j = 0; j < tpl_cols; j ++)
+			{
+				sum = mad(convert_float4(img_ptr[j]), convert_float4(tpl_ptr[j]), sum);
+			}
+		}
+		res[res_idx] = sum.x + sum.y + sum.z + sum.w;
+	}
+}
+
+//////////////////////////////////////////////////
+// CCOFF
+__kernel 
+void matchTemplate_Prepared_CCOFF_C1_D0
+(
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int res_offset,
+	int res_step,
+	__global const uint * img_sums,
+	int img_sums_offset,
+	int img_sums_step,
+	float tpl_sum
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	
+	img_sums_offset   /= sizeof(*img_sums);
+	img_sums_step     /= sizeof(*img_sums);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+	
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		float sum = (float)(
+			(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
+		res[res_idx] -= sum * tpl_sum;
+	}
+}
+__kernel 
+void matchTemplate_Prepared_CCOFF_C4_D0
+(
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int res_offset,
+	int res_step,
+	__global const uint * img_sums_c0,
+	__global const uint * img_sums_c1,
+	__global const uint * img_sums_c2,
+	__global const uint * img_sums_c3,
+	int img_sums_offset,
+	int img_sums_step,
+	float tpl_sum_c0,
+	float tpl_sum_c1,
+	float tpl_sum_c2,
+	float tpl_sum_c3
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	
+	img_sums_offset   /= sizeof(*img_sums_c0);
+	img_sums_step     /= sizeof(*img_sums_c0);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+	
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		float ccorr = res[res_idx];
+		ccorr -= tpl_sum_c0*(float)(
+			(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
+		ccorr -= tpl_sum_c1*(float)(
+			(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
+		ccorr -= tpl_sum_c2*(float)(
+			(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
+		ccorr -= tpl_sum_c3*(float)(
+			(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
+		res[res_idx] = ccorr;
+	}
+}
+
+__kernel
+void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
+(
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int res_offset,
+	int res_step,
+	float weight,
+	__global const uint * img_sums,
+	int img_sums_offset,
+	int img_sums_step,
+	__global const TYPE_IMAGE_SQSUM * img_sqsums,
+	int img_sqsums_offset,
+	int img_sqsums_step,
+	float tpl_sum,
+	float tpl_sqsum
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	
+	img_sqsums_step   /= sizeof(*img_sqsums);
+	img_sqsums_offset /= sizeof(*img_sqsums);
+	img_sums_offset   /= sizeof(*img_sums);
+	img_sums_step     /= sizeof(*img_sums);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		float image_sum_ =  (float)(
+			(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
+
+		float image_sqsum_ = (float)(
+			(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
+		res[res_idx] = normAcc(res[res_idx] - image_sum_ * tpl_sum,
+							   sqrt(tpl_sqsum * (image_sqsum_ - weight * image_sum_ * image_sum_)));
+	}
+}
+__kernel
+void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
+(
+	__global float * res,
+	int img_rows,
+	int img_cols,
+	int tpl_rows,
+	int tpl_cols, 
+	int res_rows,
+	int res_cols,
+	int res_offset,
+	int res_step,
+	float weight,
+	__global const uint * img_sums_c0,
+	__global const uint * img_sums_c1,
+	__global const uint * img_sums_c2,
+	__global const uint * img_sums_c3,
+	int img_sums_offset,
+	int img_sums_step,
+	__global const TYPE_IMAGE_SQSUM * img_sqsums_c0,
+	__global const TYPE_IMAGE_SQSUM * img_sqsums_c1,
+	__global const TYPE_IMAGE_SQSUM * img_sqsums_c2,
+	__global const TYPE_IMAGE_SQSUM * img_sqsums_c3,
+	int img_sqsums_offset,
+	int img_sqsums_step,
+	float tpl_sum_c0,
+	float tpl_sum_c1,
+	float tpl_sum_c2,
+	float tpl_sum_c3,
+	float tpl_sqsum
+)
+{
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	
+	img_sqsums_step   /= sizeof(*img_sqsums_c0);
+	img_sqsums_offset /= sizeof(*img_sqsums_c0);
+	img_sums_offset   /= sizeof(*img_sums_c0);
+	img_sums_step     /= sizeof(*img_sums_c0);
+	res_step   /= sizeof(*res);
+	res_offset /= sizeof(*res);
+
+	int res_idx = mad24(gidy, res_step, res_offset + gidx);
+
+	if(gidx < res_cols && gidy < res_rows)
+	{
+		float image_sum_c0 =  (float)(
+			(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
+		float image_sum_c1 =  (float)(
+			(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
+		float image_sum_c2 =  (float)(
+			(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
+		float image_sum_c3 =  (float)(
+			(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
+		  - (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
+
+		float image_sqsum_c0 = (float)(
+			(img_sqsums_c0[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums_c0[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(0, 0)]));
+		float image_sqsum_c1 = (float)(
+			(img_sqsums_c1[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums_c1[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(0, 0)]));
+		float image_sqsum_c2 = (float)(
+			(img_sqsums_c2[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums_c2[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(0, 0)]));
+		float image_sqsum_c3 = (float)(
+			(img_sqsums_c3[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(tpl_cols, 0)]) -
+			(img_sqsums_c3[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(0, 0)]));
+
+		float num = res[res_idx] - 
+			image_sum_c0 * tpl_sum_c0 -
+			image_sum_c1 * tpl_sum_c1 -
+			image_sum_c2 * tpl_sum_c2 -
+			image_sum_c3 * tpl_sum_c3;
+		float denum = sqrt( tpl_sqsum * (
+			image_sqsum_c0 - weight * image_sum_c0 * image_sum_c0 +
+			image_sqsum_c1 - weight * image_sum_c1 * image_sum_c1 +
+			image_sqsum_c2 - weight * image_sum_c2 * image_sum_c2 +
+			image_sqsum_c3 - weight * image_sum_c0 * image_sum_c3)
+			);
+		res[res_idx] = normAcc(num, denum);
+	}
+}
+
diff --git a/modules/ocl/src/kernels/nonfree_surf.cl b/modules/ocl/src/kernels/nonfree_surf.cl
new file mode 100644
index 0000000000..16cd4be54e
--- /dev/null
+++ b/modules/ocl/src/kernels/nonfree_surf.cl
@@ -0,0 +1,1259 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma OPENCL EXTENSION cl_amd_printf : enable
+#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
+
+// dynamically change the precision used for floating type
+
+#if defined (__ATI__) || defined (__NVIDIA__)
+#define F double
+#else
+#define F float
+#endif
+
+// Image read mode
+__constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+#define CV_PI_F 3.14159265f
+
+// print greyscale image to show image layout
+__kernel void printImage(image2d_t img)
+{
+    printf("(%d, %d) - %3d \n", 
+        get_global_id(0), 
+        get_global_id(1), 
+        read_imageui(img, (int2)(get_global_id(0), get_global_id(1))).x
+        );
+}
+
+// Use integral image to calculate haar wavelets.
+// N = 2
+// for simple haar paatern
+float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int oldSize, int newSize, int y, int x)
+{
+
+    float ratio = (float)newSize / oldSize;
+
+    F d = 0;
+
+#pragma unroll
+    for (int k = 0; k < 2; ++k)
+    {
+        int dx1 = convert_int_rte(ratio * src[k][0]);
+        int dy1 = convert_int_rte(ratio * src[k][1]);
+        int dx2 = convert_int_rte(ratio * src[k][2]);
+        int dy2 = convert_int_rte(ratio * src[k][3]);
+
+        F t = 0;
+        t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
+        t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
+        t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
+        t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
+
+        d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
+    }
+
+    return (float)d;
+}
+
+// N = 3
+float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int oldSize, int newSize, int y, int x)
+{
+
+    float ratio = (float)newSize / oldSize;
+
+    F d = 0;
+
+#pragma unroll
+    for (int k = 0; k < 3; ++k)
+    {
+        int dx1 = convert_int_rte(ratio * src[k][0]);
+        int dy1 = convert_int_rte(ratio * src[k][1]);
+        int dx2 = convert_int_rte(ratio * src[k][2]);
+        int dy2 = convert_int_rte(ratio * src[k][3]);
+
+        F t = 0;
+        t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
+        t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
+        t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
+        t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
+
+        d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
+    }
+
+    return (float)d;
+}
+
+// N = 4
+float icvCalcHaarPatternSum_4(image2d_t sumTex, __constant float src[4][5], int oldSize, int newSize, int y, int x)
+{
+
+    float ratio = (float)newSize / oldSize;
+
+    F d = 0;
+
+#pragma unroll
+    for (int k = 0; k < 4; ++k)
+    {
+        int dx1 = convert_int_rte(ratio * src[k][0]);
+        int dy1 = convert_int_rte(ratio * src[k][1]);
+        int dx2 = convert_int_rte(ratio * src[k][2]);
+        int dy2 = convert_int_rte(ratio * src[k][3]);
+
+        F t = 0;
+        t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
+        t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
+        t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
+        t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
+
+        d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
+    }
+
+    return (float)d;
+}
+
+////////////////////////////////////////////////////////////////////////
+// Hessian
+
+__constant float c_DX [3][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
+__constant float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
+__constant float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
+
+__inline int calcSize(int octave, int layer)
+{
+    /* Wavelet size at first layer of first octave. */
+    const int HAAR_SIZE0 = 9;
+
+    /* Wavelet size increment between layers. This should be an even number,
+    such that the wavelet sizes in an octave are either all even or all odd.
+    This ensures that when looking for the neighbours of a sample, the layers
+    above and below are aligned correctly. */
+    const int HAAR_SIZE_INC = 6;
+
+    return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
+}
+
+
+//calculate targeted layer per-pixel determinant and trace with an integral image
+__kernel void icvCalcLayerDetAndTrace(
+    image2d_t sumTex, // input integral image
+    __global float * det,      // output Determinant
+    __global float * trace,    // output trace
+    int det_step,     // the step of det in bytes
+    int trace_step,   // the step of trace in bytes
+    int c_img_rows,
+    int c_img_cols,
+    int c_nOctaveLayers,
+    int c_octave,
+    int c_layer_rows
+    )
+{
+    det_step   /= sizeof(*det);
+    trace_step /= sizeof(*trace);
+    // Determine the indices
+    const int gridDim_y  = get_num_groups(1) / (c_nOctaveLayers + 2);
+    const int blockIdx_y = get_group_id(1) % gridDim_y;
+    const int blockIdx_z = get_group_id(1) / gridDim_y;
+
+    const int j = get_local_id(0) + get_group_id(0) * get_local_size(0);
+    const int i = get_local_id(1) + blockIdx_y * get_local_size(1);
+    const int layer = blockIdx_z;
+
+    const int size = calcSize(c_octave, layer);
+
+    const int samples_i = 1 + ((c_img_rows - size) >> c_octave);
+    const int samples_j = 1 + ((c_img_cols - size) >> c_octave);
+
+    // Ignore pixels where some of the kernel is outside the image
+    const int margin = (size >> 1) >> c_octave;
+
+    if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
+    {
+        const float dx  = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave);
+        const float dy  = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave);
+        const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave);
+
+        det  [j + margin + det_step   * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
+        trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy; 
+    }
+}
+
+
+////////////////////////////////////////////////////////////////////////
+// NONMAX
+
+__constant float c_DM[5] = {0, 0, 9, 9, 1};
+
+bool within_check(image2d_t maskSumTex, int sum_i, int sum_j, int size)
+{
+    float ratio = (float)size / 9.0f;
+
+    float d = 0;
+
+    int dx1 = convert_int_rte(ratio * c_DM[0]);
+    int dy1 = convert_int_rte(ratio * c_DM[1]);
+    int dx2 = convert_int_rte(ratio * c_DM[2]);
+    int dy2 = convert_int_rte(ratio * c_DM[3]);
+
+    float t = 0;
+
+    t += read_imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy1)).x;
+    t -= read_imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy2)).x;
+    t -= read_imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy1)).x;
+    t += read_imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy2)).x;
+
+    d += t * c_DM[4] / ((dx2 - dx1) * (dy2 - dy1));
+
+    return (d >= 0.5f);
+}
+
+// Non-maximal suppression to further filtering the candidates from previous step
+__kernel
+    void icvFindMaximaInLayer_withmask(
+    __global const float * det, 
+    __global const float * trace, 
+    __global int4 * maxPosBuffer, 
+    volatile __global unsigned int* maxCounter,
+    int counter_offset,
+    int det_step,     // the step of det in bytes
+    int trace_step,   // the step of trace in bytes
+    int c_img_rows,
+    int c_img_cols,
+    int c_nOctaveLayers,
+    int c_octave,
+    int c_layer_rows,
+    int c_layer_cols,
+    int c_max_candidates,
+    float c_hessianThreshold,
+    image2d_t maskSumTex
+    )
+{
+    volatile __local  float N9[768]; // threads.x * threads.y * 3
+
+    det_step   /= sizeof(*det);
+    trace_step /= sizeof(*trace);
+    maxCounter += counter_offset;
+
+    // Determine the indices
+    const int gridDim_y  = get_num_groups(1) / c_nOctaveLayers;
+    const int blockIdx_y = get_group_id(1)   % gridDim_y;
+    const int blockIdx_z = get_group_id(1)   / gridDim_y;
+
+    const int layer = blockIdx_z + 1;
+
+    const int size = calcSize(c_octave, layer);
+
+    // Ignore pixels without a 3x3x3 neighbourhood in the layer above
+    const int margin = ((calcSize(c_octave, layer + 1) >> 1) >> c_octave) + 1;
+
+    const int j = get_local_id(0) + get_group_id(0) * (get_local_size(0) - 2) + margin - 1;
+    const int i = get_local_id(1) + blockIdx_y * (get_local_size(1) - 2) + margin - 1;
+
+    // Is this thread within the hessian buffer?
+    const int zoff = get_local_size(0) * get_local_size(1);
+    const int localLin = get_local_id(0) + get_local_id(1) * get_local_size(0) + zoff;
+    N9[localLin - zoff] = 
+        det[det_step * 
+        (c_layer_rows * (layer - 1) + min(max(i, 0), c_img_rows - 1)) // y
+        + min(max(j, 0), c_img_cols - 1)];                            // x
+    N9[localLin       ] = 
+        det[det_step * 
+        (c_layer_rows * (layer    ) + min(max(i, 0), c_img_rows - 1)) // y
+        + min(max(j, 0), c_img_cols - 1)];                            // x
+    N9[localLin + zoff] = 
+        det[det_step * 
+        (c_layer_rows * (layer + 1) + min(max(i, 0), c_img_rows - 1)) // y
+        + min(max(j, 0), c_img_cols - 1)];                            // x
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (i < c_layer_rows - margin 
+        && j < c_layer_cols - margin
+        && get_local_id(0) > 0 
+        && get_local_id(0) < get_local_size(0) - 1
+        && get_local_id(1) > 0 
+        && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
+        )
+    {
+        float val0 = N9[localLin];
+
+        if (val0 > c_hessianThreshold)
+        {
+            // Coordinates for the start of the wavelet in the sum image. There
+            // is some integer division involved, so don't try to simplify this
+            // (cancel out sampleStep) without checking the result is the same
+            const int sum_i = (i - ((size >> 1) >> c_octave)) << c_octave;
+            const int sum_j = (j - ((size >> 1) >> c_octave)) << c_octave;
+
+            if (within_check(maskSumTex, sum_i, sum_j, size))
+            {
+                // Check to see if we have a max (in its 26 neighbours)
+                const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff]
+                &&                   val0 > N9[localLin     - get_local_size(0) - zoff]
+                &&                   val0 > N9[localLin + 1 - get_local_size(0) - zoff]
+                &&                   val0 > N9[localLin - 1                     - zoff]
+                &&                   val0 > N9[localLin                         - zoff]
+                &&                   val0 > N9[localLin + 1                     - zoff]
+                &&                   val0 > N9[localLin - 1 + get_local_size(0) - zoff]
+                &&                   val0 > N9[localLin     + get_local_size(0) - zoff]
+                &&                   val0 > N9[localLin + 1 + get_local_size(0) - zoff]
+
+                &&                   val0 > N9[localLin - 1 - get_local_size(0)]
+                &&                   val0 > N9[localLin     - get_local_size(0)]
+                &&                   val0 > N9[localLin + 1 - get_local_size(0)]
+                &&                   val0 > N9[localLin - 1                    ]
+                &&                   val0 > N9[localLin + 1                    ]
+                &&                   val0 > N9[localLin - 1 + get_local_size(0)]
+                &&                   val0 > N9[localLin     + get_local_size(0)]
+                &&                   val0 > N9[localLin + 1 + get_local_size(0)]
+
+                &&                   val0 > N9[localLin - 1 - get_local_size(0) + zoff]
+                &&                   val0 > N9[localLin     - get_local_size(0) + zoff]
+                &&                   val0 > N9[localLin + 1 - get_local_size(0) + zoff]
+                &&                   val0 > N9[localLin - 1                     + zoff]
+                &&                   val0 > N9[localLin                         + zoff]
+                &&                   val0 > N9[localLin + 1                     + zoff]
+                &&                   val0 > N9[localLin - 1 + get_local_size(0) + zoff]
+                &&                   val0 > N9[localLin     + get_local_size(0) + zoff]
+                &&                   val0 > N9[localLin + 1 + get_local_size(0) + zoff]
+                ;
+
+                if(condmax)
+                {
+                    unsigned int ind = atomic_inc(maxCounter);
+
+                    if (ind < c_max_candidates)
+                    {
+                        const int laplacian = (int) copysign(1.0f, trace[trace_step* (layer * c_layer_rows + i) + j]);
+
+                        maxPosBuffer[ind] = (int4)(j, i, layer, laplacian);
+                    }
+                }
+            }
+        }
+    }
+}
+
+__kernel
+    void icvFindMaximaInLayer(
+    __global float * det, 
+    __global float * trace, 
+    __global int4 * maxPosBuffer, 
+    volatile __global unsigned int* maxCounter,
+    int counter_offset,
+    int det_step,     // the step of det in bytes
+    int trace_step,   // the step of trace in bytes
+    int c_img_rows,
+    int c_img_cols,
+    int c_nOctaveLayers,
+    int c_octave,
+    int c_layer_rows,
+    int c_layer_cols,
+    int c_max_candidates,
+    float c_hessianThreshold
+    )
+{
+    volatile __local  float N9[768]; // threads.x * threads.y * 3
+
+    det_step   /= sizeof(float);
+    trace_step /= sizeof(float);
+    maxCounter += counter_offset;
+
+    // Determine the indices
+    const int gridDim_y  = get_num_groups(1) / c_nOctaveLayers;
+    const int blockIdx_y = get_group_id(1)   % gridDim_y;
+    const int blockIdx_z = get_group_id(1)   / gridDim_y;
+
+    const int layer = blockIdx_z + 1;
+
+    const int size = calcSize(c_octave, layer);
+
+    // Ignore pixels without a 3x3x3 neighbourhood in the layer above
+    const int margin = ((calcSize(c_octave, layer + 1) >> 1) >> c_octave) + 1;
+
+    const int j = get_local_id(0) + get_group_id(0) * (get_local_size(0) - 2) + margin - 1;
+    const int i = get_local_id(1) + blockIdx_y      * (get_local_size(1) - 2) + margin - 1;
+
+    // Is this thread within the hessian buffer?
+    const int zoff     = get_local_size(0) * get_local_size(1);
+    const int localLin = get_local_id(0) + get_local_id(1) * get_local_size(0) + zoff;
+
+    int l_x = min(max(j, 0), c_img_cols - 1);
+    int l_y = c_layer_rows * layer + min(max(i, 0), c_img_rows - 1);
+
+    N9[localLin - zoff] = 
+        det[det_step * (l_y - c_layer_rows) + l_x];
+    N9[localLin       ] = 
+        det[det_step * (l_y               ) + l_x];
+    N9[localLin + zoff] = 
+        det[det_step * (l_y + c_layer_rows) + l_x];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (i < c_layer_rows - margin 
+        && j < c_layer_cols - margin
+        && get_local_id(0) > 0 
+        && get_local_id(0) < get_local_size(0) - 1
+        && get_local_id(1) > 0 
+        && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
+        )
+    {
+        float val0 = N9[localLin];
+        if (val0 > c_hessianThreshold)
+        {
+            //printf(\"(%3d, %3d) N9[%3d]=%7.1f val0=%7.1f\\n\", l_x, l_y, localLin - zoff, N9[localLin], val0);
+            // Coordinates for the start of the wavelet in the sum image. There
+            // is some integer division involved, so don't try to simplify this
+            // (cancel out sampleStep) without checking the result is the same
+
+            // Check to see if we have a max (in its 26 neighbours)
+            const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff]
+            &&                   val0 > N9[localLin     - get_local_size(0) - zoff]
+            &&                   val0 > N9[localLin + 1 - get_local_size(0) - zoff]
+            &&                   val0 > N9[localLin - 1                     - zoff]
+            &&                   val0 > N9[localLin                         - zoff]
+            &&                   val0 > N9[localLin + 1                     - zoff]
+            &&                   val0 > N9[localLin - 1 + get_local_size(0) - zoff]
+            &&                   val0 > N9[localLin     + get_local_size(0) - zoff]
+            &&                   val0 > N9[localLin + 1 + get_local_size(0) - zoff]
+
+            &&                   val0 > N9[localLin - 1 - get_local_size(0)]
+            &&                   val0 > N9[localLin     - get_local_size(0)]
+            &&                   val0 > N9[localLin + 1 - get_local_size(0)]
+            &&                   val0 > N9[localLin - 1                    ]
+            &&                   val0 > N9[localLin + 1                    ]
+            &&                   val0 > N9[localLin - 1 + get_local_size(0)]
+            &&                   val0 > N9[localLin     + get_local_size(0)]
+            &&                   val0 > N9[localLin + 1 + get_local_size(0)]
+
+            &&                   val0 > N9[localLin - 1 - get_local_size(0) + zoff]
+            &&                   val0 > N9[localLin     - get_local_size(0) + zoff]
+            &&                   val0 > N9[localLin + 1 - get_local_size(0) + zoff]
+            &&                   val0 > N9[localLin - 1                     + zoff]
+            &&                   val0 > N9[localLin                         + zoff]
+            &&                   val0 > N9[localLin + 1                     + zoff]
+            &&                   val0 > N9[localLin - 1 + get_local_size(0) + zoff]
+            &&                   val0 > N9[localLin     + get_local_size(0) + zoff]
+            &&                   val0 > N9[localLin + 1 + get_local_size(0) + zoff]
+            ;
+
+            if(condmax)
+            {
+                unsigned int ind = atomic_inc(maxCounter);
+
+                if (ind < c_max_candidates)
+                {
+                    const int laplacian = (int) copysign(1.0f, trace[trace_step* (layer * c_layer_rows + i) + j]);
+
+                    maxPosBuffer[ind] = (int4)(j, i, layer, laplacian);
+                }
+            }
+        }
+    }
+}
+
+// solve 3x3 linear system Ax=b for floating point input
+inline bool solve3x3_float(volatile __local  const float A[3][3], volatile __local  const float b[3], volatile __local  float x[3])
+{
+    float det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
+        - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+        + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
+
+    if (det != 0)
+    {
+        F invdet = 1.0 / det;
+
+        x[0] = invdet * 
+            (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
+            A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) +
+            A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   ));
+
+        x[1] = invdet * 
+            (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) -
+            b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
+            A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0]));
+
+        x[2] = invdet * 
+            (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) -
+            A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) +
+            b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0]));
+
+        return true;
+    }
+    return false;
+}
+
+#define X_ROW          0
+#define Y_ROW          1
+#define LAPLACIAN_ROW  2
+#define OCTAVE_ROW     3
+#define SIZE_ROW       4
+#define ANGLE_ROW      5
+#define HESSIAN_ROW    6
+#define ROWS_COUNT     7
+
+////////////////////////////////////////////////////////////////////////
+// INTERPOLATION
+__kernel 
+    void icvInterpolateKeypoint(
+    __global const float * det, 
+    __global const int4 * maxPosBuffer,
+    __global float * keypoints,
+    volatile __global unsigned int * featureCounter,
+    int det_step,
+    int keypoints_step,
+    int c_img_rows,
+    int c_img_cols,
+    int c_octave,
+    int c_layer_rows,
+    int c_max_features
+    )
+{
+    det_step /= sizeof(*det);
+    keypoints_step /= sizeof(*keypoints);
+    __global float * featureX       = keypoints + X_ROW * keypoints_step;
+    __global float * featureY       = keypoints + Y_ROW * keypoints_step;
+    __global int * featureLaplacian = (__global int *)keypoints + LAPLACIAN_ROW * keypoints_step;
+    __global int * featureOctave    = (__global int *)keypoints + OCTAVE_ROW * keypoints_step;
+    __global float * featureSize    = keypoints + SIZE_ROW * keypoints_step;
+    __global float * featureHessian = keypoints + HESSIAN_ROW * keypoints_step;
+
+    const int4 maxPos = maxPosBuffer[get_group_id(0)];
+
+    const int j = maxPos.x - 1 + get_local_id(0);
+    const int i = maxPos.y - 1 + get_local_id(1);
+    const int layer = maxPos.z - 1 + get_local_id(2);
+
+    volatile __local  float N9[3][3][3];
+
+    N9[get_local_id(2)][get_local_id(1)][get_local_id(0)] = 
+        det[det_step * (c_layer_rows * layer + i) + j];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0)
+    {
+        volatile __local  float dD[3];
+
+        //dx
+        dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]);
+        //dy
+        dD[1] = -0.5f * (N9[1][2][1] - N9[1][0][1]);
+        //ds
+        dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]);
+
+        volatile __local  float H[3][3];
+
+        //dxx
+        H[0][0] = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
+        //dxy
+        H[0][1]= 0.25f * (N9[1][2][2] - N9[1][2][0] - N9[1][0][2] + N9[1][0][0]);
+        //dxs
+        H[0][2]= 0.25f * (N9[2][1][2] - N9[2][1][0] - N9[0][1][2] + N9[0][1][0]);
+        //dyx = dxy
+        H[1][0] = H[0][1];
+        //dyy
+        H[1][1] = N9[1][0][1] - 2.0f * N9[1][1][1] + N9[1][2][1];
+        //dys
+        H[1][2]= 0.25f * (N9[2][2][1] - N9[2][0][1] - N9[0][2][1] + N9[0][0][1]);
+        //dsx = dxs
+        H[2][0] = H[0][2];
+        //dsy = dys
+        H[2][1] = H[1][2];
+        //dss
+        H[2][2] = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
+
+        volatile __local  float x[3];
+
+        if (solve3x3_float(H, dD, x))
+        {
+            if (fabs(x[0]) <= 1.f && fabs(x[1]) <= 1.f && fabs(x[2]) <= 1.f)
+            {
+                // if the step is within the interpolation region, perform it
+
+                const int size = calcSize(c_octave, maxPos.z);
+
+                const int sum_i = (maxPos.y - ((size >> 1) >> c_octave)) << c_octave;
+                const int sum_j = (maxPos.x - ((size >> 1) >> c_octave)) << c_octave;
+
+                const float center_i = sum_i + (float)(size - 1) / 2;
+                const float center_j = sum_j + (float)(size - 1) / 2;
+
+                const float px = center_j + x[0] * (1 << c_octave);
+                const float py = center_i + x[1] * (1 << c_octave);
+
+                const int ds = size - calcSize(c_octave, maxPos.z - 1);
+                const float psize = round(size + x[2] * ds);
+
+                /* The sampling intervals and wavelet sized for selecting an orientation
+                and building the keypoint descriptor are defined relative to 's' */
+                const float s = psize * 1.2f / 9.0f;
+
+                /* To find the dominant orientation, the gradients in x and y are
+                sampled in a circle of radius 6s using wavelets of size 4s.
+                We ensure the gradient wavelet size is even to ensure the
+                wavelet pattern is balanced and symmetric around its center */
+                const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
+
+                // check when grad_wav_size is too big
+                if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
+                {
+                    // Get a new feature index.
+                    unsigned int ind = atomic_inc(featureCounter);
+
+                    if (ind < c_max_features)
+                    {
+                        featureX[ind] = px;
+                        featureY[ind] = py;
+                        featureLaplacian[ind] = maxPos.w;
+                        featureOctave[ind] = c_octave;
+                        featureSize[ind] = psize;
+                        featureHessian[ind] = N9[1][1][1];
+                    }
+                } // grad_wav_size check
+            } // If the subpixel interpolation worked
+        }
+    } // If this is thread 0.
+}
+
+////////////////////////////////////////////////////////////////////////
+// Orientation
+
+#define ORI_SEARCH_INC 5
+#define ORI_WIN        60
+#define ORI_SAMPLES    113
+
+__constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
+__constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
+__constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 
+    0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 
+    0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 
+    0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 
+    0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 
+    0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f, 
+    0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f, 
+    0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f, 
+    0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 
+    0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f, 
+    0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f, 
+    0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f, 
+    0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f, 
+    0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f, 
+    0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.002547456417232752f, 0.005233579315245152f,
+    0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 
+    0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f, 
+    0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f, 
+    0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f,
+    0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f, 
+    0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f, 
+    0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f,
+    0.001707611023448408f, 0.001455130288377404f};
+
+__constant float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
+__constant float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
+
+void reduce_32_sum(volatile __local  float * data, float partial_reduction, int tid)
+{
+#define op(A, B) (A)+(B)
+    data[tid] = partial_reduction;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 16) 
+    {
+        data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
+        data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
+        data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
+        data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
+        data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]); 
+    }
+#undef op
+}
+
+__kernel
+    void icvCalcOrientation(
+    image2d_t sumTex,
+    __global float * keypoints,
+    int keypoints_step,
+    int c_img_rows,
+    int c_img_cols
+    )
+{
+    keypoints_step /= sizeof(*keypoints);
+    __global float* featureX    = keypoints + X_ROW * keypoints_step;
+    __global float* featureY    = keypoints + Y_ROW * keypoints_step;
+    __global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
+    __global float* featureDir  = keypoints + ANGLE_ROW * keypoints_step;
+
+    volatile __local  float s_X[128];
+    volatile __local  float s_Y[128];
+    volatile __local  float s_angle[128];
+
+    volatile __local  float s_sumx[32 * 4];
+    volatile __local  float s_sumy[32 * 4];
+
+    /* The sampling intervals and wavelet sized for selecting an orientation
+    and building the keypoint descriptor are defined relative to 's' */
+    const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f;
+
+    /* To find the dominant orientation, the gradients in x and y are
+    sampled in a circle of radius 6s using wavelets of size 4s.
+    We ensure the gradient wavelet size is even to ensure the
+    wavelet pattern is balanced and symmetric around its center */
+    const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
+
+    // check when grad_wav_size is too big
+    if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
+        return;
+
+    // Calc X, Y, angle and store it to shared memory
+    const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
+
+    float X = 0.0f, Y = 0.0f, angle = 0.0f;
+
+    if (tid < ORI_SAMPLES)
+    {
+        const float margin = (float)(grad_wav_size - 1) / 2.0f;
+        const int x = convert_int_rte(featureX[get_group_id(0)] + c_aptX[tid] * s - margin);
+        const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin);
+
+        if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
+            x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
+        {
+            X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x);
+            Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
+
+            angle = atan2(Y, X);
+            if (angle < 0)
+                angle += 2.0f * CV_PI_F;
+            angle *= 180.0f / CV_PI_F;
+        }
+    }
+    s_X[tid] = X;
+    s_Y[tid] = Y;
+    s_angle[tid] = angle;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float bestx = 0, besty = 0, best_mod = 0;
+
+#pragma unroll
+    for (int i = 0; i < 18; ++i)
+    {
+        const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
+
+        float sumx = 0.0f, sumy = 0.0f;
+        int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
+        if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
+        {
+            sumx = s_X[get_local_id(0)];
+            sumy = s_Y[get_local_id(0)];
+        }
+        d = abs(convert_int_rte(s_angle[get_local_id(0) + 32]) - dir);
+        if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
+        {
+            sumx += s_X[get_local_id(0) + 32];
+            sumy += s_Y[get_local_id(0) + 32];
+        }
+        d = abs(convert_int_rte(s_angle[get_local_id(0) + 64]) - dir);
+        if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
+        {
+            sumx += s_X[get_local_id(0) + 64];
+            sumy += s_Y[get_local_id(0) + 64];
+        }
+        d = abs(convert_int_rte(s_angle[get_local_id(0) + 96]) - dir);
+        if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
+        {
+            sumx += s_X[get_local_id(0) + 96];
+            sumy += s_Y[get_local_id(0) + 96];
+        }
+
+        reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
+        reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
+
+        const float temp_mod = sumx * sumx + sumy * sumy;
+        if (temp_mod > best_mod)
+        {
+            best_mod = temp_mod;
+            bestx = sumx;
+            besty = sumy;
+        }
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    if (get_local_id(0) == 0)
+    {
+        s_X[get_local_id(1)] = bestx;
+        s_Y[get_local_id(1)] = besty;
+        s_angle[get_local_id(1)] = best_mod;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(1) == 0 && get_local_id(0) == 0)
+    {
+        int bestIdx = 0;
+
+        if (s_angle[1] > s_angle[bestIdx])
+            bestIdx = 1;
+        if (s_angle[2] > s_angle[bestIdx])
+            bestIdx = 2;
+        if (s_angle[3] > s_angle[bestIdx])
+            bestIdx = 3;
+
+        float kp_dir = atan2(s_Y[bestIdx], s_X[bestIdx]);
+        if (kp_dir < 0)
+            kp_dir += 2.0f * CV_PI_F;
+        kp_dir *= 180.0f / CV_PI_F;
+
+        featureDir[get_group_id(0)] = kp_dir;
+    }
+}
+
+#undef ORI_SEARCH_INC
+#undef ORI_WIN
+#undef ORI_SAMPLES
+
+////////////////////////////////////////////////////////////////////////
+// Descriptors
+
+#define PATCH_SZ 20
+
+__constant float c_DW[PATCH_SZ * PATCH_SZ] =
+{
+    3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
+    8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
+    1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
+    3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
+    5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
+    9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
+    0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
+    0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
+    0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
+    0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
+    0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
+    0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
+    0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
+    0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
+    9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
+    5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
+    3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
+    1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
+    8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
+    3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
+};
+
+// utility for linear filter
+inline uchar readerGet(
+    image2d_t src, 
+    const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir, 
+    int i, int j
+    )
+{
+    float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
+    float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
+    return (uchar)read_imageui(src, sampler, (float2)(pixel_x, pixel_y)).x;
+}
+
+inline float linearFilter(
+    image2d_t src, 
+    const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,  
+    float y, float x
+    )
+{
+    x -= 0.5f;
+    y -= 0.5f;
+
+    float out = 0.0f;
+
+    const int x1 = convert_int_rtn(x);
+    const int y1 = convert_int_rtn(y);
+    const int x2 = x1 + 1;
+    const int y2 = y1 + 1;
+
+    uchar src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1);
+    out = out + src_reg * ((x2 - x) * (y2 - y));
+
+    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2);
+    out = out + src_reg * ((x - x1) * (y2 - y));
+
+    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1);
+    out = out + src_reg * ((x2 - x) * (y - y1));
+
+    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2);
+    out = out + src_reg * ((x - x1) * (y - y1));
+
+    return out;
+}
+
+void calc_dx_dy(
+    image2d_t imgTex,
+    volatile __local  float s_dx_bin[25],
+    volatile __local  float s_dy_bin[25],
+    volatile __local  float s_PATCH[6][6],
+    __global const float* featureX, 
+    __global const float* featureY, 
+    __global const float* featureSize, 
+    __global const float* featureDir
+    )
+{
+    const float centerX = featureX[get_group_id(0)];
+    const float centerY = featureY[get_group_id(0)];
+    const float size = featureSize[get_group_id(0)];
+    const float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
+
+    /* The sampling intervals and wavelet sized for selecting an orientation
+    and building the keypoint descriptor are defined relative to 's' */
+    const float s = size * 1.2f / 9.0f;
+
+    /* Extract a window of pixels around the keypoint of size 20s */
+    const int win_size = (int)((PATCH_SZ + 1) * s);
+
+    float sin_dir;
+    float cos_dir;
+    sin_dir = sincos(descriptor_dir, &cos_dir);
+
+    /* Nearest neighbour version (faster) */
+    const float win_offset = -(float)(win_size - 1) / 2;
+
+    // Compute sampling points
+    // since grids are 2D, need to compute xBlock and yBlock indices
+    const int xBlock = (get_group_id(1) & 3);  // get_group_id(1) % 4
+    const int yBlock = (get_group_id(1) >> 2); // floor(get_group_id(1)/4)
+    const int xIndex = xBlock * 5 + get_local_id(0);
+    const int yIndex = yBlock * 5 + get_local_id(1);
+
+    const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
+    const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
+
+    s_PATCH[get_local_id(1)][get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) < 5 && get_local_id(1) < 5)
+    {
+        const int tid = get_local_id(1) * 5 + get_local_id(0);
+
+        const float dw = c_DW[yIndex * PATCH_SZ + xIndex];
+
+        const float vx = (
+            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1] - 
+            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] + 
+            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] - 
+            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ]) 
+            * dw;
+        const float vy = (
+            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ] - 
+            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] + 
+            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] - 
+            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1]) 
+            * dw;
+        s_dx_bin[tid] = vx;
+        s_dy_bin[tid] = vy;
+    }
+}
+void reduce_sum25(
+    volatile __local  float* sdata1, 
+    volatile __local  float* sdata2, 
+    volatile __local  float* sdata3, 
+    volatile __local  float* sdata4, 
+    int tid
+    )
+{
+    // first step is to reduce from 25 to 16
+    if (tid < 9) // use 9 threads
+    {
+        sdata1[tid] += sdata1[tid + 16];
+        sdata2[tid] += sdata2[tid + 16];
+        sdata3[tid] += sdata3[tid + 16];
+        sdata4[tid] += sdata4[tid + 16];
+    }
+
+    // sum (reduce) from 16 to 1 (unrolled - aligned to a half-warp)
+    if (tid < 8)
+    {
+        sdata1[tid] += sdata1[tid + 8];
+        sdata1[tid] += sdata1[tid + 4];
+        sdata1[tid] += sdata1[tid + 2];
+        sdata1[tid] += sdata1[tid + 1];
+
+        sdata2[tid] += sdata2[tid + 8];
+        sdata2[tid] += sdata2[tid + 4];
+        sdata2[tid] += sdata2[tid + 2];
+        sdata2[tid] += sdata2[tid + 1];
+
+        sdata3[tid] += sdata3[tid + 8];
+        sdata3[tid] += sdata3[tid + 4];
+        sdata3[tid] += sdata3[tid + 2];
+        sdata3[tid] += sdata3[tid + 1];
+
+        sdata4[tid] += sdata4[tid + 8];
+        sdata4[tid] += sdata4[tid + 4];
+        sdata4[tid] += sdata4[tid + 2];
+        sdata4[tid] += sdata4[tid + 1];
+    }
+}
+
+__kernel 
+    void compute_descriptors64(
+    image2d_t imgTex,
+    volatile __global float * descriptors, 
+    __global const float * keypoints,
+    int descriptors_step,
+    int keypoints_step
+    )
+{
+    descriptors_step /= sizeof(float);
+    keypoints_step   /= sizeof(float);
+
+    __global const float * featureX    = keypoints + X_ROW * keypoints_step;
+    __global const float * featureY    = keypoints + Y_ROW * keypoints_step;
+    __global const float * featureSize = keypoints + SIZE_ROW * keypoints_step;
+    __global const float * featureDir  = keypoints + ANGLE_ROW * keypoints_step;
+
+    // 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
+    volatile __local  float sdx[25];
+    volatile __local  float sdy[25];
+    volatile __local  float sdxabs[25];
+    volatile __local  float sdyabs[25];
+    volatile __local  float s_PATCH[6][6];
+
+    calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir);
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
+
+    if (tid < 25)
+    {
+        sdxabs[tid] = fabs(sdx[tid]); // |dx| array
+        sdyabs[tid] = fabs(sdy[tid]); // |dy| array
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
+
+        // write dx, dy, |dx|, |dy|
+        if (tid == 0)
+        {
+            descriptors_block[0] = sdx[0];
+            descriptors_block[1] = sdy[0];
+            descriptors_block[2] = sdxabs[0];
+            descriptors_block[3] = sdyabs[0];
+        }
+    }
+}
+__kernel 
+    void compute_descriptors128(
+    image2d_t imgTex,
+    __global volatile float * descriptors, 
+    __global float * keypoints,
+    int descriptors_step,
+    int keypoints_step
+    )
+{
+    descriptors_step /= sizeof(*descriptors);
+    keypoints_step   /= sizeof(*keypoints);
+
+    __global float * featureX   = keypoints + X_ROW * keypoints_step;
+    __global float * featureY   = keypoints + Y_ROW * keypoints_step;
+    __global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
+    __global float* featureDir  = keypoints + ANGLE_ROW * keypoints_step;
+
+    // 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
+    volatile __local  float sdx[25];
+    volatile __local  float sdy[25];
+
+    // sum (reduce) 5x5 area response
+    volatile __local  float sd1[25];
+    volatile __local  float sd2[25];
+    volatile __local  float sdabs1[25];
+    volatile __local  float sdabs2[25];
+    volatile __local  float s_PATCH[6][6];
+
+    calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir);
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
+
+    if (tid < 25)
+    {
+        if (sdy[tid] >= 0)
+        {
+            sd1[tid] = sdx[tid];
+            sdabs1[tid] = fabs(sdx[tid]);
+            sd2[tid] = 0;
+            sdabs2[tid] = 0;
+        }
+        else
+        {
+            sd1[tid] = 0;
+            sdabs1[tid] = 0;
+            sd2[tid] = sdx[tid];
+            sdabs2[tid] = fabs(sdx[tid]);
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
+
+        // write dx (dy >= 0), |dx| (dy >= 0), dx (dy < 0), |dx| (dy < 0)
+        if (tid == 0)
+        {
+            descriptors_block[0] = sd1[0];
+            descriptors_block[1] = sdabs1[0];
+            descriptors_block[2] = sd2[0];
+            descriptors_block[3] = sdabs2[0];
+        }
+
+        if (sdx[tid] >= 0)
+        {
+            sd1[tid] = sdy[tid];
+            sdabs1[tid] = fabs(sdy[tid]);
+            sd2[tid] = 0;
+            sdabs2[tid] = 0;
+        }
+        else
+        {
+            sd1[tid] = 0;
+            sdabs1[tid] = 0;
+            sd2[tid] = sdy[tid];
+            sdabs2[tid] = fabs(sdy[tid]);
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        // write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy| (dx < 0)
+        if (tid == 0)
+        {
+            descriptors_block[4] = sd1[0];
+            descriptors_block[5] = sdabs1[0];
+            descriptors_block[6] = sd2[0];
+            descriptors_block[7] = sdabs2[0];
+        }
+    }
+}
+
+__kernel 
+    void normalize_descriptors128(__global float * descriptors, int descriptors_step)
+{
+    descriptors_step /= sizeof(*descriptors);
+    // no need for thread ID
+    __global float* descriptor_base = descriptors + descriptors_step * get_group_id(0);
+
+    // read in the unnormalized descriptor values (squared)
+    volatile __local  float sqDesc[128];
+    const float lookup = descriptor_base[get_local_id(0)];
+    sqDesc[get_local_id(0)] = lookup * lookup;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) < 64)
+        sqDesc[get_local_id(0)] += sqDesc[get_local_id(0) + 64];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // reduction to get total
+    if (get_local_id(0) < 32)
+    {
+        volatile __local  float* smem = sqDesc;
+
+        smem[get_local_id(0)] += smem[get_local_id(0) + 32];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 16];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 8];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 4];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 2];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 1];
+    }
+
+    // compute length (square root)
+    volatile __local  float len;
+    if (get_local_id(0) == 0)
+    {
+        len = sqrt(sqDesc[0]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // normalize and store in output
+    descriptor_base[get_local_id(0)] = lookup / len;
+}
+__kernel 
+    void normalize_descriptors64(__global float * descriptors, int descriptors_step)
+{
+    descriptors_step /= sizeof(*descriptors);
+    // no need for thread ID
+    __global float* descriptor_base = descriptors + descriptors_step * get_group_id(0);
+
+    // read in the unnormalized descriptor values (squared)
+    volatile __local  float sqDesc[64];
+    const float lookup = descriptor_base[get_local_id(0)];
+    sqDesc[get_local_id(0)] = lookup * lookup;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // reduction to get total
+    if (get_local_id(0) < 32)
+    {
+        volatile __local  float* smem = sqDesc;
+
+        smem[get_local_id(0)] += smem[get_local_id(0) + 32];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 16];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 8];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 4];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 2];
+        smem[get_local_id(0)] += smem[get_local_id(0) + 1];
+    }
+
+    // compute length (square root)
+    volatile __local  float len;
+    if (get_local_id(0) == 0)
+    {
+        len = sqrt(sqDesc[0]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // normalize and store in output
+    descriptor_base[get_local_id(0)] = lookup / len;
+}
diff --git a/modules/ocl/src/kernels/objdetect_hog.cl b/modules/ocl/src/kernels/objdetect_hog.cl
new file mode 100644
index 0000000000..4a950fa1cb
--- /dev/null
+++ b/modules/ocl/src/kernels/objdetect_hog.cl
@@ -0,0 +1,450 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Wenju He, wenju@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#define CELL_WIDTH 8
+#define CELL_HEIGHT 8
+#define CELLS_PER_BLOCK_X 2
+#define CELLS_PER_BLOCK_Y 2
+#define NTHREADS 256
+#define CV_PI_F 3.1415926535897932384626433832795f
+
+//----------------------------------------------------------------------------
+// Histogram computation
+
+__kernel void compute_hists_kernel(const int width, const int cblock_stride_x, const int cblock_stride_y, 
+                                   const int cnbins, const int cblock_hist_size, const int img_block_width, 
+                                   const int grad_quadstep, const int qangle_step, 
+                                   __global const float* grad, __global const uchar* qangle, 
+                                   const float scale, __global float* block_hists, __local float* smem)
+{
+    const int lidX = get_local_id(0);
+    const int lidY = get_local_id(1);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    const int cell_x = lidX / 16;
+    const int cell_y = lidY;
+    const int cell_thread_x = lidX & 0xF;
+
+    __local float* hists = smem;
+    __local float* final_hist = smem + cnbins * 48;
+
+    const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
+    const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
+
+    __global const float* grad_ptr = grad + offset_y * grad_quadstep + (offset_x << 1);
+    __global const uchar* qangle_ptr = qangle + offset_y * qangle_step + (offset_x << 1);
+
+    // 12 means that 12 pixels affect on block's cell (in one row)
+    if (cell_thread_x < 12)
+    {
+        __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + cell_thread_x;
+        for (int bin_id = 0; bin_id < cnbins; ++bin_id)
+            hist[bin_id * 48] = 0.f;
+
+        const int dist_x = -4 + cell_thread_x - 4 * cell_x;
+
+        const int dist_y_begin = -4 - 4 * lidY;
+        for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
+        {
+            float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
+            uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
+
+            grad_ptr += grad_quadstep;
+            qangle_ptr += qangle_step;
+
+            int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
+            int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
+
+            float gaussian = exp(-(dist_center_y * dist_center_y + dist_center_x * dist_center_x) * scale);
+            float interp_weight = (8.f - fabs(dist_y + 0.5f)) * (8.f - fabs(dist_x + 0.5f)) / 64.f;
+
+            hist[bin.x * 48] += gaussian * interp_weight * vote.x;
+            hist[bin.y * 48] += gaussian * interp_weight * vote.y;
+        }
+
+        volatile __local float* hist_ = hist;
+        for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
+        {
+            if (cell_thread_x < 6) hist_[0] += hist_[6];
+            if (cell_thread_x < 3) hist_[0] += hist_[3];
+            if (cell_thread_x == 0)
+                final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    __global float* block_hist = block_hists + (gidY * img_block_width + gidX) * cblock_hist_size;
+
+    int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
+    if (tid < cblock_hist_size)
+        block_hist[tid] = final_hist[tid];
+}
+
+//-------------------------------------------------------------
+//  Normalization of histograms via L2Hys_norm
+//
+float reduce_smem(volatile __local float* smem, int size)
+{
+    unsigned int tid = get_local_id(0);
+    float sum = smem[tid];
+
+    if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; barrier(CLK_LOCAL_MEM_FENCE); }
+
+    if (tid < 32)
+    {
+        if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
+        if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
+        if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
+        if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
+        if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
+        if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    sum = smem[0];
+
+    return sum;
+}
+
+__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width,
+                                     __global float* block_hists, const float threshold, __local float *squares)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid;
+
+    float elem = 0.f;
+    if (tid < block_hist_size)
+        elem = hist[0];
+
+    squares[tid] = elem * elem;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    float sum = reduce_smem(squares, nthreads);
+
+    float scale = 1.0f / (sqrt(sum) + 0.1f * block_hist_size);
+    elem = min(elem * scale, threshold);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    squares[tid] = elem * elem;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    sum = reduce_smem(squares, nthreads);
+    scale = 1.0f / (sqrt(sum) + 1e-3f);
+
+    if (tid < block_hist_size)
+        hist[0] = elem * scale;
+}
+
+//---------------------------------------------------------------------
+//  Linear SVM based classification
+//
+__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width,
+                                    const int img_win_width, const int img_block_width,
+                                    const int win_block_stride_x, const int win_block_stride_y,
+                                    __global const float * block_hists, __global const float* coefs,
+                                    float free_coef, float threshold, __global uchar* labels)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    float product = 0.f;
+    for (int i = tid; i < cdescr_size; i += NTHREADS)
+    {
+        int offset_y = i / cdescr_width;
+        int offset_x = i - offset_y * cdescr_width;
+        product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
+    }
+
+    __local float products[NTHREADS];
+
+    products[tid] = product;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+ 
+    if (tid < 128) products[tid] = product = product + products[tid + 128];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    
+    if (tid < 64) products[tid] = product = product + products[tid + 64];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 32)
+    {
+        volatile __local float* smem = products;
+        smem[tid] = product = product + smem[tid + 32];
+        smem[tid] = product = product + smem[tid + 16];
+        smem[tid] = product = product + smem[tid + 8];
+        smem[tid] = product = product + smem[tid + 4];
+        smem[tid] = product = product + smem[tid + 2];
+        smem[tid] = product = product + smem[tid + 1];
+    }
+
+    if (tid == 0)
+        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
+}
+
+//----------------------------------------------------------------------------
+// Extract descriptors
+
+__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width,
+                                            const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
+									        __global const float* block_hists, __global float* descriptors)
+{
+    int tid = get_local_id(0);
+    int gidX = get_group_id(0);
+    int gidY = get_group_id(1);
+    
+    // Get left top corner of the window in src
+    __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    // Get left top corner of the window in dst
+    __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
+
+    // Copy elements from src to dst
+    for (int i = tid; i < cdescr_size; i += NTHREADS)
+    {
+        int offset_y = i / cdescr_width;
+        int offset_x = i - offset_y * cdescr_width;
+        descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
+    }
+}
+
+__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, 
+                                            const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x,
+                                            const int win_block_stride_y, __global const float* block_hists, __global float* descriptors)
+{
+    int tid = get_local_id(0);
+    int gidX = get_group_id(0);
+    int gidY = get_group_id(1);
+
+    // Get left top corner of the window in src
+    __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    // Get left top corner of the window in dst
+    __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
+
+    // Copy elements from src to dst
+    for (int i = tid; i < cdescr_size; i += NTHREADS)
+    {
+        int block_idx = i / cblock_hist_size;
+        int idx_in_block = i - block_idx * cblock_hist_size;
+
+        int y = block_idx / cnblocks_win_x;
+        int x = block_idx - y * cnblocks_win_x;
+
+        descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width  + x) * cblock_hist_size + idx_in_block];
+    }
+}
+
+//----------------------------------------------------------------------------
+// Gradients computation
+
+__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, 
+                                            const __global uchar4 * img, __global float * grad, __global uchar * qangle, 
+                                            const float angle_scale, const char correct_gamma, const int cnbins)
+{
+    const int x = get_global_id(0);
+    const int tid = get_local_id(0);
+    const int gSizeX = get_local_size(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const uchar4* row = img + gidY * img_step;
+
+    __local float sh_row[(NTHREADS + 2) * 3];
+
+    uchar4 val;
+    if (x < width)
+        val = row[x];
+    else
+        val = row[width - 2];
+
+    sh_row[tid + 1] = val.x;
+    sh_row[tid + 1 + (NTHREADS + 2)] = val.y;
+    sh_row[tid + 1 + 2 * (NTHREADS + 2)] = val.z;
+
+    if (tid == 0)
+    {
+        val = row[max(x - 1, 1)];
+        sh_row[0] = val.x;
+        sh_row[(NTHREADS + 2)] = val.y;
+        sh_row[2 * (NTHREADS + 2)] = val.z;
+    }
+
+    if (tid == gSizeX - 1)
+    {
+        val = row[min(x + 1, width - 2)];
+        sh_row[gSizeX + 1] = val.x;
+        sh_row[gSizeX + 1 + (NTHREADS + 2)] = val.y;
+        sh_row[gSizeX + 1 + 2 * (NTHREADS + 2)] = val.z;
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (x < width)
+    {
+        float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]);
+        float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
+
+        float3 dx;
+        if (correct_gamma == 1)
+            dx = sqrt(b) - sqrt(a);
+        else
+            dx = b - a;
+
+        float3 dy = (float3) 0.f;
+
+        if (gidY > 0 && gidY < height - 1)
+        {
+            a = convert_float3(img[(gidY - 1) * img_step + x].xyz);
+            b = convert_float3(img[(gidY + 1) * img_step + x].xyz);
+
+            if (correct_gamma == 1)
+                dy = sqrt(b) - sqrt(a);
+            else
+                dy = b - a;
+        }
+
+        float best_dx = dx.x;
+        float best_dy = dy.x;
+
+        float mag0 = dx.x * dx.x + dy.x * dy.x;
+        float mag1 = dx.y * dx.y + dy.y * dy.y;
+        if (mag0 < mag1)
+        {
+            best_dx = dx.y;
+            best_dy = dy.y;
+            mag0 = mag1;
+        }
+
+        mag1 = dx.z * dx.z + dy.z * dy.z;
+        if (mag0 < mag1)
+        {
+            best_dx = dx.z;
+            best_dy = dy.z;
+            mag0 = mag1;
+        }
+
+        mag0 = sqrt(mag0);
+
+        float ang = (atan2(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f;
+        int hidx = (int)floor(ang);
+        ang -= hidx;
+        hidx = (hidx + cnbins) % cnbins;
+
+        qangle[(gidY * qangle_step + x) << 1] = hidx;
+        qangle[((gidY * qangle_step + x) << 1) + 1] = (hidx + 1) % cnbins;
+        grad[(gidY * grad_quadstep + x) << 1] = mag0 * (1.f - ang);
+        grad[((gidY * grad_quadstep + x) << 1) + 1] = mag0 * ang;
+    }
+}
+
+__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
+                                            __global const uchar * img, __global float * grad, __global uchar * qangle, 
+                                            const float angle_scale, const char correct_gamma, const int cnbins)
+{
+    const int x = get_global_id(0);
+    const int tid = get_local_id(0);
+    const int gSizeX = get_local_size(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const uchar* row = img + gidY * img_step;
+
+    __local float sh_row[NTHREADS + 2];
+
+    if (x < width)
+        sh_row[tid + 1] = row[x];
+    else
+        sh_row[tid + 1] = row[width - 2];
+
+    if (tid == 0)
+        sh_row[0] = row[max(x - 1, 1)];
+
+    if (tid == gSizeX - 1)
+        sh_row[gSizeX + 1] = row[min(x + 1, width - 2)];
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (x < width)
+    {
+        float dx;
+
+        if (correct_gamma == 1)
+            dx = sqrt(sh_row[tid + 2]) - sqrt(sh_row[tid]);
+        else
+            dx = sh_row[tid + 2] - sh_row[tid];
+
+        float dy = 0.f;
+        if (gidY > 0 && gidY < height - 1)
+        {
+            float a = (float) img[ (gidY + 1) * img_step + x ];
+            float b = (float) img[ (gidY - 1) * img_step + x ];
+            if (correct_gamma == 1)
+                dy = sqrt(a) - sqrt(b);
+            else
+                dy = a - b;
+        }
+        float mag = sqrt(dx * dx + dy * dy);
+
+        float ang = (atan2(dy, dx) + CV_PI_F) * angle_scale - 0.5f;
+        int hidx = (int)floor(ang);
+        ang -= hidx;
+        hidx = (hidx + cnbins) % cnbins;
+
+        qangle[ (gidY * qangle_step + x) << 1 ]     = hidx;
+        qangle[ ((gidY * qangle_step + x) << 1) + 1 ] = (hidx + 1) % cnbins;
+        grad[ (gidY * grad_quadstep + x) << 1 ]       = mag * (1.f - ang);
+        grad[ ((gidY * grad_quadstep + x) << 1) + 1 ]   = mag * ang;
+    }
+}
diff --git a/modules/ocl/src/kernels/pyr_down.cl b/modules/ocl/src/kernels/pyr_down.cl
new file mode 100644
index 0000000000..38b4ec7c7f
--- /dev/null
+++ b/modules/ocl/src/kernels/pyr_down.cl
@@ -0,0 +1,500 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Dachuan Zhao, dachuan@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#pragma OPENCL EXTENSION cl_amd_printf : enable
+
+
+uchar round_uchar_uchar(uchar v)
+{ 
+	return v;
+}
+
+uchar round_uchar_int(int v)
+{ 
+    return (uchar)((uint)v <= 255 ? v : v > 0 ? 255 : 0); 
+}
+
+uchar round_uchar_float(float v)
+{ 
+	if(v - convert_int_sat_rte(v) > 1e-6 || v - convert_int_sat_rte(v) < -1e-6)
+	{
+		if(((int)v + 1) - (v + 0.5f) < 1e-6 && ((int)v + 1) - (v + 0.5f) > -1e-6)
+		{
+			v = (int)v + 0.51f;
+		}
+	}
+    int iv = convert_int_sat_rte(v);
+    return round_uchar_int(iv); 
+}
+
+uchar4 round_uchar4_uchar4(uchar4 v)
+{ 
+	return v;
+}
+
+uchar4 round_uchar4_int4(int4 v)
+{ 
+	uchar4 result;
+	result.x = (uchar)(v.x <= 255 ? v.x : v.x > 0 ? 255 : 0); 
+	result.y = (uchar)(v.y <= 255 ? v.y : v.y > 0 ? 255 : 0); 
+	result.z = (uchar)(v.z <= 255 ? v.z : v.z > 0 ? 255 : 0); 
+	result.w = (uchar)(v.w <= 255 ? v.w : v.w > 0 ? 255 : 0); 
+    return result; 
+}
+
+uchar4 round_uchar4_float4(float4 v)
+{ 
+	if(v.x - convert_int_sat_rte(v.x) > 1e-6 || v.x - convert_int_sat_rte(v.x) < -1e-6)
+	{
+		if(((int)(v.x) + 1) - (v.x + 0.5f) < 1e-6 && ((int)(v.x) + 1) - (v.x + 0.5f) > -1e-6)
+		{
+			v.x = (int)(v.x) + 0.51f;
+		}
+	}
+	if(v.y - convert_int_sat_rte(v.y) > 1e-6 || v.y - convert_int_sat_rte(v.y) < -1e-6)
+	{
+		if(((int)(v.y) + 1) - (v.y + 0.5f) < 1e-6 && ((int)(v.y) + 1) - (v.y + 0.5f) > -1e-6)
+		{
+			v.y = (int)(v.y) + 0.51f;
+		}
+	}
+	if(v.z - convert_int_sat_rte(v.z) > 1e-6 || v.z - convert_int_sat_rte(v.z) < -1e-6)
+	{
+		if(((int)(v.z) + 1) - (v.z + 0.5f) < 1e-6 && ((int)(v.z) + 1) - (v.z + 0.5f) > -1e-6)
+		{
+			v.z = (int)(v.z) + 0.51f;
+		}
+	}
+	if(v.w - convert_int_sat_rte(v.w) > 1e-6 || v.w - convert_int_sat_rte(v.w) < -1e-6)
+	{
+		if(((int)(v.w) + 1) - (v.w + 0.5f) < 1e-6 && ((int)(v.w) + 1) - (v.w + 0.5f) > -1e-6)
+		{
+			v.w = (int)(v.w) + 0.51f;
+		}
+	}
+    int4 iv = convert_int4_sat_rte(v);
+    return round_uchar4_int4(iv); 
+}
+
+
+
+
+int idx_row_low(int y, int last_row)
+{
+	if(y < 0)
+	{
+		y = -y;
+	}
+    return y % (last_row + 1);
+}
+
+int idx_row_high(int y, int last_row) 
+{
+	int i;
+	int j;
+	if(last_row - y < 0)
+	{
+		i = (y - last_row);
+	}
+	else
+	{
+		i = (last_row - y);
+	}
+	if(last_row - i < 0)
+	{
+		j = i - last_row;
+	}
+	else
+	{
+		j = last_row - i;
+	}
+    return j % (last_row + 1);
+}
+
+int idx_row(int y, int last_row)
+{
+    return idx_row_low(idx_row_high(y, last_row), last_row);
+}
+
+int idx_col_low(int x, int last_col)
+{
+	if(x < 0)
+	{
+		x = -x;
+	}
+    return x % (last_col + 1);
+}
+
+int idx_col_high(int x, int last_col) 
+{
+	int i;
+	int j;
+	if(last_col - x < 0)
+	{
+		i = (x - last_col);
+	}
+	else
+	{
+		i = (last_col - x);
+	}
+	if(last_col - i < 0)
+	{
+		j = i - last_col;
+	}
+	else
+	{
+		j = last_col - i;
+	}
+    return j % (last_col + 1);
+}
+
+int idx_col(int x, int last_col)
+{
+    return idx_col_low(idx_col_high(x, last_col), last_col);
+}
+
+__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols)
+{
+    const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
+    const int y = get_group_id(1);
+
+    __local float smem[256 + 4];
+
+    float sum;
+
+    const int src_y = 2*y;
+    const int last_row = srcRows - 1;
+    const int last_col = srcCols - 1;
+
+    sum = 0;
+
+    sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]);
+    sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]);
+    sum = sum + 0.375f  * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(x, last_col)]);
+    sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]);
+    sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]);
+
+    smem[2 + get_local_id(0)] = sum;
+
+    if (get_local_id(0) < 2)
+    {
+        const int left_x = x - 2;
+
+        sum = 0;
+
+        sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
+		sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
+		sum = sum + 0.375f  * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(left_x, last_col)]);
+		sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
+		sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
+
+        smem[get_local_id(0)] = sum;
+    }
+
+    if (get_local_id(0) > 253)
+    {
+        const int right_x = x + 2;
+
+        sum = 0;
+
+        sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
+		sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
+		sum = sum + 0.375f  * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(right_x, last_col)]);
+		sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
+		sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
+
+        smem[4 + get_local_id(0)] = sum;
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) < 128)
+    {
+        const int tid2 = get_local_id(0) * 2;
+
+        sum = 0;
+
+        sum = sum + 0.0625f * smem[2 + tid2 - 2];
+        sum = sum + 0.25f   * smem[2 + tid2 - 1];
+        sum = sum + 0.375f  * smem[2 + tid2    ];
+        sum = sum + 0.25f   * smem[2 + tid2 + 1];
+        sum = sum + 0.0625f * smem[2 + tid2 + 2];
+
+        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
+
+        if (dst_x < dstCols)
+            dst[y * dstStep + dst_x] = round_uchar_float(sum);
+    }
+}
+
+__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstOffset, int dstCols)
+{
+    const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
+    const int y = get_group_id(1);
+
+    __local float4 smem[256 + 4];
+
+    float4 sum;
+
+    const int src_y = 2*y;
+    const int last_row = srcRows - 1;
+    const int last_col = srcCols - 1;
+
+	float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
+	float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
+	float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
+
+    sum = 0;
+
+	sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
+	sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
+	sum = sum + co1  * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(x, last_col)]));
+	sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
+	sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
+
+	smem[2 + get_local_id(0)] = sum;
+
+	if (get_local_id(0) < 2)
+	{
+		const int left_x = x - 2;
+
+		sum = 0;
+
+		sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
+		sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
+		sum = sum + co1  * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
+		sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
+		sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
+
+		smem[get_local_id(0)] = sum;
+	}
+
+	if (get_local_id(0) > 253)
+	{
+		const int right_x = x + 2;
+
+		sum = 0;
+
+		sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
+		sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
+		sum = sum + co1  * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
+		sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
+		sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
+
+		smem[4 + get_local_id(0)] = sum;
+	}
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) < 128)
+    {
+        const int tid2 = get_local_id(0) * 2;
+
+        sum = 0;
+
+        sum = sum + co3 * smem[2 + tid2 - 2];
+        sum = sum + co2   * smem[2 + tid2 - 1];
+        sum = sum + co1  * smem[2 + tid2    ];
+        sum = sum + co2   * smem[2 + tid2 + 1];
+        sum = sum + co3 * smem[2 + tid2 + 2];
+
+        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
+
+        if (dst_x < dstCols)
+            dst[y * dstStep / 4 + dst_x] = round_uchar4_float4(sum);
+    }
+}
+
+__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float *dst, int dstStep, int dstOffset, int dstCols)
+{
+    const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
+    const int y = get_group_id(1);
+
+    __local float smem[256 + 4];
+
+    float sum;
+
+    const int src_y = 2*y;
+    const int last_row = srcRows - 1;
+    const int last_col = srcCols - 1;
+
+    sum = 0;
+
+    sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)];
+
+    smem[2 + get_local_id(0)] = sum;
+
+    if (get_local_id(0) < 2)
+    {
+        const int left_x = x - 2;
+
+        sum = 0;
+
+        sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)];
+		sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)];
+		sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(left_x, last_col)];
+		sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)];
+		sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)];
+
+        smem[get_local_id(0)] = sum;
+    }
+
+    if (get_local_id(0) > 253)
+    {
+        const int right_x = x + 2;
+
+        sum = 0;
+
+        sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)];
+		sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)];
+		sum = sum + 0.375f  * ((__global float*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(right_x, last_col)];
+		sum = sum + 0.25f   * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)];
+		sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)];
+
+        smem[4 + get_local_id(0)] = sum;
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) < 128)
+    {
+        const int tid2 = get_local_id(0) * 2;
+
+        sum = 0;
+
+        sum = sum + 0.0625f * smem[2 + tid2 - 2];
+        sum = sum + 0.25f   * smem[2 + tid2 - 1];
+        sum = sum + 0.375f  * smem[2 + tid2    ];
+        sum = sum + 0.25f   * smem[2 + tid2 + 1];
+        sum = sum + 0.0625f * smem[2 + tid2 + 2];
+
+        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
+
+        if (dst_x < dstCols)
+            dst[y * dstStep / 4 + dst_x] = sum;
+    }
+}
+
+__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstOffset, int dstCols)
+{
+    const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
+    const int y = get_group_id(1);
+
+    __local float4 smem[256 + 4];
+
+    float4 sum;
+
+    const int src_y = 2*y;
+    const int last_row = srcRows - 1;
+    const int last_col = srcCols - 1;
+
+	float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
+	float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
+	float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
+
+    sum = 0;
+
+	sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)];
+	sum = sum + co2   * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)];
+	sum = sum + co1  * ((__global float4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(x, last_col)];
+	sum = sum + co2   * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)];
+	sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)];
+
+	smem[2 + get_local_id(0)] = sum;
+
+	if (get_local_id(0) < 2)
+	{
+		const int left_x = x - 2;
+
+		sum = 0;
+
+		sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
+		sum = sum + co2   * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
+		sum = sum + co1  * ((__global float4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(left_x, last_col)];
+		sum = sum + co2   * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
+		sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
+
+		smem[get_local_id(0)] = sum;
+	}
+
+	if (get_local_id(0) > 253)
+	{
+		const int right_x = x + 2;
+
+		sum = 0;
+
+		sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
+		sum = sum + co2   * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
+		sum = sum + co1  * ((__global float4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(right_x, last_col)];
+		sum = sum + co2   * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
+		sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
+
+		smem[4 + get_local_id(0)] = sum;
+	}
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) < 128)
+    {
+        const int tid2 = get_local_id(0) * 2;
+
+        sum = 0;
+
+        sum = sum + co3 * smem[2 + tid2 - 2];
+        sum = sum + co2   * smem[2 + tid2 - 1];
+        sum = sum + co1  * smem[2 + tid2    ];
+        sum = sum + co2   * smem[2 + tid2 + 1];
+        sum = sum + co3 * smem[2 + tid2 + 2];
+
+        const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
+
+        if (dst_x < dstCols)
+            dst[y * dstStep / 16 + dst_x] = sum;
+    }
+}
diff --git a/modules/ocl/src/kernels/pyr_up.cl b/modules/ocl/src/kernels/pyr_up.cl
new file mode 100644
index 0000000000..dd3ba43d1b
--- /dev/null
+++ b/modules/ocl/src/kernels/pyr_up.cl
@@ -0,0 +1,750 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Zhang Chunpeng	chunpeng@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+//#pragma OPENCL EXTENSION cl_amd_printf : enable
+
+uchar get_valid_uchar(uchar data)
+{
+	return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
+}
+///////////////////////////////////////////////////////////////////////
+//////////////////////////  CV_8UC1  //////////////////////////////////
+///////////////////////////////////////////////////////////////////////
+__kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
+				int srcRows,int dstRows,int srcCols,int dstCols,
+				int srcOffset,int dstOffset,int srcStep,int dstStep)
+{
+	const int x = get_global_id(0);
+	const int y = get_global_id(1);
+	
+	__local float s_srcPatch[10][10];
+	__local float s_dstPatch[20][16];
+	
+
+	if( get_local_id(0) < 10 && get_local_id(1) < 10 )
+	{
+		int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
+		int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
+		
+		srcx = abs(srcx);
+		srcx = min(srcCols - 1,srcx);
+		
+		srcy = abs(srcy);
+		srcy = min(srcRows -1 ,srcy);
+		
+		s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
+		
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	float sum = 0;
+	const int evenFlag = (int)((get_local_id(0) & 1) == 0);
+	const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+	const bool  eveny = ((get_local_id(1) & 1) == 0);
+	const int tidx = get_local_id(0);
+	
+	if(eveny)
+	{
+		sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
+        sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
+        sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx    ) >> 1)];
+        sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
+        sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];	
+  	}
+	
+	s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
+	
+	if (get_local_id(1) < 2)
+    {
+		sum = 0;
+
+        if (eveny)
+        {
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
+            sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx    ) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
+            sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
+         }
+
+         s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
+     }
+     
+     if (get_local_id(1) > 13)
+     {
+		sum = 0;
+
+		if (eveny)
+		{
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
+			sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
+			sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx    ) >> 1)];
+			sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
+		}
+		s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
+     }
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	sum = 0;
+
+    const int tidy = get_local_id(1);
+
+    sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
+    sum = sum + 0.25f   * s_dstPatch[2 + tidy - 1][get_local_id(0)];
+    sum = sum + 0.375f  * s_dstPatch[2 + tidy    ][get_local_id(0)];
+    sum = sum + 0.25f   * s_dstPatch[2 + tidy + 1][get_local_id(0)];
+    sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
+
+    if ((x < dstCols) && (y < dstRows))
+		dst[x + y * dstStep] = (float)(4.0f * sum);
+
+}
+
+///////////////////////////////////////////////////////////////////////
+//////////////////////////  CV_16UC1  /////////////////////////////////
+///////////////////////////////////////////////////////////////////////
+__kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
+				int srcRows,int dstRows,int srcCols,int dstCols,
+				int srcOffset,int dstOffset,int srcStep,int dstStep)
+{
+	const int x = get_global_id(0);
+	const int y = get_global_id(1);
+	
+	__local float s_srcPatch[10][10];
+	__local float s_dstPatch[20][16];
+	
+	srcStep = srcStep >> 1;
+	dstStep = dstStep >> 1;
+	srcOffset = srcOffset >> 1;
+	dstOffset = dstOffset >> 1;
+	
+
+	if( get_local_id(0) < 10 && get_local_id(1) < 10 )
+	{
+		int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
+		int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
+		
+		srcx = abs(srcx);
+		srcx = min(srcCols - 1,srcx);
+		
+		srcy = abs(srcy);
+		srcy = min(srcRows -1 ,srcy);
+		
+		s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
+		
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	float sum = 0;
+	
+	const int evenFlag = (int)((get_local_id(0) & 1) == 0);
+	const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+	const bool  eveny = ((get_local_id(1) & 1) == 0);
+	const int tidx = get_local_id(0);
+	
+	if(eveny)
+	{
+		sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
+        sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
+        sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx    ) >> 1)];
+        sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
+        sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];	
+  	}
+	
+	s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
+	
+	if (get_local_id(1) < 2)
+    {
+		sum = 0;
+
+        if (eveny)
+        {
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
+            sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx    ) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
+            sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
+         }
+
+         s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
+     }
+     
+     if (get_local_id(1) > 13)
+     {
+		sum = 0;
+
+		if (eveny)
+		{
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
+			sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
+			sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx    ) >> 1)];
+			sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
+		}
+		s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
+     }
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	sum = 0;
+
+    const int tidy = get_local_id(1);
+
+    sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
+    sum = sum + 0.25f   * s_dstPatch[2 + tidy - 1][get_local_id(0)];
+    sum = sum + 0.375f  * s_dstPatch[2 + tidy    ][get_local_id(0)];
+    sum = sum + 0.25f   * s_dstPatch[2 + tidy + 1][get_local_id(0)];
+    sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
+
+    if ((x < dstCols) && (y < dstRows))
+		dst[x + y * dstStep] = (float)(4.0f * sum);
+
+}
+
+///////////////////////////////////////////////////////////////////////
+//////////////////////////  CV_32FC1  /////////////////////////////////
+///////////////////////////////////////////////////////////////////////
+__kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
+				int srcRows,int dstRows,int srcCols,int dstCols,
+				int srcOffset,int dstOffset,int srcStep,int dstStep)
+{
+	const int x = get_global_id(0);
+	const int y = get_global_id(1);
+	
+	__local float s_srcPatch[10][10];
+	__local float s_dstPatch[20][16];
+	
+	srcOffset = srcOffset >> 2;
+	dstOffset = dstOffset >> 2;
+	srcStep = srcStep >> 2;
+	dstStep = dstStep >> 2;
+	
+
+	if( get_local_id(0) < 10 && get_local_id(1) < 10 )
+	{
+		int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
+		int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
+		
+		srcx = abs(srcx);
+		srcx = min(srcCols - 1,srcx);
+		
+		srcy = abs(srcy);
+		srcy = min(srcRows -1 ,srcy);
+		
+		s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
+		
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	float sum = 0;
+	const int evenFlag = (int)((get_local_id(0) & 1) == 0);
+	const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+	const bool  eveny = ((get_local_id(1) & 1) == 0);
+	const int tidx = get_local_id(0);
+	
+	if(eveny)
+	{
+		sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
+        sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
+        sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx    ) >> 1)];
+        sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
+        sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];	
+  	}
+	
+	s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
+	
+	if (get_local_id(1) < 2)
+    {
+		sum = 0;
+
+        if (eveny)
+        {
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
+            sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx    ) >> 1)];
+            sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
+            sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
+         }
+
+         s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
+     }
+     
+     if (get_local_id(1) > 13)
+     {
+		sum = 0;
+
+		if (eveny)
+		{
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
+			sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
+			sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx    ) >> 1)];
+			sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
+			sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
+		}
+		s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
+     }
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	sum = 0;
+
+    const int tidy = get_local_id(1);
+
+    sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
+    sum = sum + 0.25f   * s_dstPatch[2 + tidy - 1][get_local_id(0)];
+    sum = sum + 0.375f  * s_dstPatch[2 + tidy    ][get_local_id(0)];
+    sum = sum + 0.25f   * s_dstPatch[2 + tidy + 1][get_local_id(0)];
+    sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
+
+    if ((x < dstCols) && (y < dstRows))
+		dst[x + y * dstStep] = (float)(4.0f * sum);
+
+}
+
+///////////////////////////////////////////////////////////////////////
+//////////////////////////  CV_8UC4  //////////////////////////////////
+///////////////////////////////////////////////////////////////////////
+float4 covert_uchar4_to_float4(uchar4 data)
+{
+	float4 f4Data = {0,0,0,0};
+	
+	f4Data.x = (float)data.x;
+	f4Data.y = (float)data.y;
+	f4Data.z = (float)data.z;
+	f4Data.w = (float)data.w;
+	
+	return f4Data;
+}
+
+
+uchar4 convert_float4_to_uchar4(float4 data)
+{
+	uchar4 u4Data;
+	
+	u4Data.x = get_valid_uchar(data.x);
+	u4Data.y = get_valid_uchar(data.y);
+	u4Data.z = get_valid_uchar(data.z);
+	u4Data.w = get_valid_uchar(data.w);
+	
+	return u4Data;
+}
+
+float4 int_x_float4(int leftOpr,float4 rightOpr)
+{
+	float4 result = {0,0,0,0};
+	
+	result.x = rightOpr.x * leftOpr;
+	result.y = rightOpr.y * leftOpr;
+	result.z = rightOpr.z * leftOpr;
+	result.w = rightOpr.w * leftOpr;
+	
+	return result;
+}
+
+float4 float4_x_float4(float4 leftOpr,float4 rightOpr)
+{
+	float4 result;
+	
+	result.x = leftOpr.x * rightOpr.x;
+	result.y = leftOpr.y * rightOpr.y;
+	result.z = leftOpr.z * rightOpr.z;
+	result.w = leftOpr.w * rightOpr.w;
+	
+	return result;
+}
+
+__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
+				int srcRows,int dstRows,int srcCols,int dstCols,
+				int srcOffset,int dstOffset,int srcStep,int dstStep)
+{
+	const int x = get_global_id(0);
+	const int y = get_global_id(1);
+	
+	__local float4 s_srcPatch[10][10];
+	__local float4 s_dstPatch[20][16];
+	
+	srcOffset >>= 2;
+	dstOffset >>= 2;
+	srcStep >>= 2;
+	dstStep >>= 2;
+	
+
+	if( get_local_id(0) < 10 && get_local_id(1) < 10 )
+	{
+		int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
+		int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
+		
+		srcx = abs(srcx);
+		srcx = min(srcCols - 1,srcx);
+		
+		srcy = abs(srcy);
+		srcy = min(srcRows -1 ,srcy);
+		
+		s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]);
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	float4 sum = (float4)(0,0,0,0);
+	
+	const int evenFlag = (int)((get_local_id(0) & 1) == 0);
+	const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+	const bool  eveny = ((get_local_id(1) & 1) == 0);
+	const int tidx = get_local_id(0);
+	
+	float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
+	float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
+	float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
+
+	
+	if(eveny)
+	{
+		sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx    ) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);	
+                
+  	}
+	
+	s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
+	
+	if (get_local_id(1) < 2)
+    {
+		sum = 0;
+
+        if (eveny)
+        {
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4( oddFlag , co2  ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx    ) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4( oddFlag , co2  ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
+         }
+
+         s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
+     }
+     
+     if (get_local_id(1) > 13)
+     {
+		sum = 0;
+
+		if (eveny)
+		{
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx    ) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
+			
+		}
+		s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
+     }
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	sum = 0;
+
+    const int tidy = get_local_id(1);
+
+    sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
+    sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
+    sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy    ][get_local_id(0)]);
+    sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
+    sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
+
+    if ((x < dstCols) && (y < dstRows))
+    {
+		dst[x + y * dstStep] = convert_float4_to_uchar4(int_x_float4(4.0f,sum));
+	}
+} 
+///////////////////////////////////////////////////////////////////////
+//////////////////////////  CV_16UC4 //////////////////////////////////
+///////////////////////////////////////////////////////////////////////
+float4 covert_ushort4_to_float4(ushort4 data)
+{
+	float4 f4Data = {0,0,0,0};
+	
+	f4Data.x = (float)data.x;
+	f4Data.y = (float)data.y;
+	f4Data.z = (float)data.z;
+	f4Data.w = (float)data.w;
+	
+	return f4Data;
+}
+
+
+ushort4 convert_float4_to_ushort4(float4 data)
+{
+	ushort4 u4Data;
+	
+	u4Data.x = (float)data.x;
+	u4Data.y = (float)data.y;
+	u4Data.z = (float)data.z;
+	u4Data.w = (float)data.w;
+	
+	return u4Data;
+}
+
+
+__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
+				int srcRows,int dstRows,int srcCols,int dstCols,
+				int srcOffset,int dstOffset,int srcStep,int dstStep)
+{
+	const int x = get_global_id(0);
+	const int y = get_global_id(1);
+	
+	__local float4 s_srcPatch[10][10];
+	__local float4 s_dstPatch[20][16];
+	
+	srcOffset >>= 3;
+	dstOffset >>= 3;
+	srcStep >>= 3;
+	dstStep >>= 3;
+	
+
+	if( get_local_id(0) < 10 && get_local_id(1) < 10 )
+	{
+		int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
+		int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
+		
+		srcx = abs(srcx);
+		srcx = min(srcCols - 1,srcx);
+		
+		srcy = abs(srcy);
+		srcy = min(srcRows -1 ,srcy);
+		
+		s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]);
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	float4 sum = (float4)(0,0,0,0);
+	
+	const int evenFlag = (int)((get_local_id(0) & 1) == 0);
+	const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+	const bool  eveny = ((get_local_id(1) & 1) == 0);
+	const int tidx = get_local_id(0);
+	
+	float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
+	float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
+	float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
+
+	
+	if(eveny)
+	{
+		sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx    ) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);	
+                
+  	}
+	
+	s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
+	
+	if (get_local_id(1) < 2)
+    {
+		sum = 0;
+
+        if (eveny)
+        {
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4( oddFlag , co2  ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx    ) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4( oddFlag , co2  ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
+         }
+
+         s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
+     }
+     
+     if (get_local_id(1) > 13)
+     {
+		sum = 0;
+
+		if (eveny)
+		{
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx    ) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
+			
+		}
+		s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
+     }
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	sum = 0;
+
+    const int tidy = get_local_id(1);
+
+    sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
+    sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
+    sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy    ][get_local_id(0)]);
+    sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
+    sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
+
+    if ((x < dstCols) && (y < dstRows))
+    {
+		dst[x + y * dstStep] = convert_float4_to_ushort4(int_x_float4(4.0f,sum));
+	}
+}
+
+///////////////////////////////////////////////////////////////////////
+//////////////////////////  CV_32FC4 //////////////////////////////////
+///////////////////////////////////////////////////////////////////////
+__kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
+				int srcRows,int dstRows,int srcCols,int dstCols,
+				int srcOffset,int dstOffset,int srcStep,int dstStep)
+{
+	const int x = get_global_id(0);
+	const int y = get_global_id(1);
+	
+	__local float4 s_srcPatch[10][10];
+	__local float4 s_dstPatch[20][16];
+	
+	srcOffset >>= 4;
+	dstOffset >>= 4;
+	srcStep >>= 4;
+	dstStep >>= 4;
+	
+	
+	if( get_local_id(0) < 10 && get_local_id(1) < 10 )
+	{
+		int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
+		int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
+		
+		srcx = abs(srcx);
+		srcx = min(srcCols - 1,srcx);
+		
+		srcy = abs(srcy);
+		srcy = min(srcRows -1 ,srcy);
+		
+		s_srcPatch[get_local_id(1)][get_local_id(0)] = (float4)(src[srcx + srcy * srcStep]);
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	float4 sum = (float4)(0,0,0,0);
+	
+	const int evenFlag = (int)((get_local_id(0) & 1) == 0);
+	const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+	const bool  eveny = ((get_local_id(1) & 1) == 0);
+	const int tidx = get_local_id(0);
+	
+	float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
+	float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
+	float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
+
+	
+	if(eveny)
+	{
+		sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx    ) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
+        sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);	
+                
+  	}
+	
+	s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
+	
+	if (get_local_id(1) < 2)
+    {
+		sum = 0;
+
+        if (eveny)
+        {
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4( oddFlag , co2  ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx    ) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4( oddFlag , co2  ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
+            sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
+         }
+
+         s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
+     }
+     
+     if (get_local_id(1) > 13)
+     {
+		sum = 0;
+
+		if (eveny)
+		{
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx    ) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
+			sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
+			
+		}
+		s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
+     }
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+	
+	sum = 0;
+
+    const int tidy = get_local_id(1);
+
+    sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
+    sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
+    sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy    ][get_local_id(0)]);
+    sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
+    sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
+
+    if ((x < dstCols) && (y < dstRows))
+    {
+		dst[x + y * dstStep] = 4.0f * sum;
+	}
+}
\ No newline at end of file
diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp
new file mode 100644
index 0000000000..ad31b00c68
--- /dev/null
+++ b/modules/ocl/src/match_template.cpp
@@ -0,0 +1,560 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include <iomanip>
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+#define EXT_FP64 0
+
+#if !defined (HAVE_OPENCL)
+void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
+#else
+//helper routines
+namespace cv
+{
+	namespace ocl
+	{
+		///////////////////////////OpenCL kernel strings///////////////////////////
+		extern const char *match_template;
+	}
+}
+
+namespace cv { namespace ocl
+{
+	void matchTemplate_SQDIFF(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+	void matchTemplate_SQDIFF_NORMED(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+	void matchTemplate_CCORR(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+	void matchTemplate_CCORR_NORMED(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+	void matchTemplate_CCOFF(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+	void matchTemplate_CCOFF_NORMED(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+
+	void matchTemplateNaive_SQDIFF(
+		const oclMat& image, const oclMat& templ, oclMat& result, int cn);
+
+	void matchTemplateNaive_CCORR(
+		const oclMat& image, const oclMat& templ, oclMat& result, int cn);
+
+	// Evaluates optimal template's area threshold. If 
+	// template's area is less  than the threshold, we use naive match 
+	// template version, otherwise FFT-based (if available)
+	int getTemplateThreshold(int method, int depth)
+	{
+		switch (method)
+		{
+		case CV_TM_CCORR: 
+			if (depth == CV_32F) return 250;
+			if (depth == CV_8U) return 300;
+			break;
+		case CV_TM_SQDIFF:
+			if (depth == CV_32F) return MAXSHORT; // do naive SQDIFF for CV_32F
+			if (depth == CV_8U) return 300;
+			break;
+		}
+		CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
+		return 0;
+	}
+
+
+	//////////////////////////////////////////////////////////////////////
+	// SQDIFF
+	void matchTemplate_SQDIFF(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+	{
+		result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+		if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
+		{
+			matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
+			return;
+		}
+		else
+		{
+			// TODO
+			CV_Error(CV_StsBadArg, "Not supported yet for this size template");
+		}
+	}
+
+	void matchTemplate_SQDIFF_NORMED(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+	{
+		matchTemplate_CCORR(image,templ,result,buf);
+		buf.image_sums.resize(1);
+		buf.image_sqsums.resize(1);
+
+		integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
+
+#if EXT_FP64 && SQRSUM_FIXED
+		unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
+#else
+		Mat sqr_mat = templ.reshape(1);
+		unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
+#endif
+
+		Context *clCxt = image.clCxt;
+		string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
+		vector< pair<size_t, const void *> > args;
+
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+		args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+		size_t globalThreads[3] = {result.cols, result.rows, 1};
+		size_t localThreads[3]  = {32, 8, 1};
+		openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
+	}
+
+	void matchTemplateNaive_SQDIFF(
+		const oclMat& image, const oclMat& templ, oclMat& result, int cn)
+	{
+		CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
+			|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
+		CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
+		CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
+
+		Context *clCxt = image.clCxt;
+		string kernelName = "matchTemplate_Naive_SQDIFF";
+
+		vector< pair<size_t, const void *> > args;
+
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+		size_t globalThreads[3] = {result.cols, result.rows, 1};
+		size_t localThreads[3]  = {32, 8, 1};
+		openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+	}
+
+	//////////////////////////////////////////////////////////////////////
+	// CCORR
+	void matchTemplate_CCORR(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+	{
+		result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+		if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
+		{
+			matchTemplateNaive_CCORR(image, templ, result, image.channels());
+			return;
+		}
+		else
+		{
+			CV_Error(CV_StsBadArg, "Not supported yet for this size template");
+			if(image.depth() == CV_8U && templ.depth() == CV_8U)
+			{
+				image.convertTo(buf.imagef, CV_32F);
+				templ.convertTo(buf.templf, CV_32F);
+			}
+			CV_Assert(image.channels() == 1);
+			oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
+			filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
+			result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
+		}
+	}
+
+	void matchTemplate_CCORR_NORMED(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+	{
+		matchTemplate_CCORR(image,templ,result,buf);
+		buf.image_sums.resize(1);
+		buf.image_sqsums.resize(1);
+
+		integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
+#if EXT_FP64 && SQRSUM_FIXED
+		unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
+#elif EXT_FP64
+		oclMat templ_c1 = templ.reshape(1);
+		multiply(templ_c1, templ_c1, templ_c1);
+		unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
+#else
+		Mat m_templ_c1 = templ.reshape(1);
+		multiply(m_templ_c1, m_templ_c1, m_templ_c1);
+		unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
+#endif
+		Context *clCxt = image.clCxt;
+		string kernelName = "normalizeKernel";
+		vector< pair<size_t, const void *> > args;
+
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+		args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+		size_t globalThreads[3] = {result.cols, result.rows, 1};
+		size_t localThreads[3]  = {32, 8, 1};
+		openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
+	}
+
+	void matchTemplateNaive_CCORR(
+		const oclMat& image, const oclMat& templ, oclMat& result, int cn)
+	{
+		CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
+			|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
+		CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
+		CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
+
+		Context *clCxt = image.clCxt;
+		string kernelName = "matchTemplate_Naive_CCORR";
+
+		vector< pair<size_t, const void *> > args;
+
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+		size_t globalThreads[3] = {result.cols, result.rows, 1};
+		size_t localThreads[3]  = {32, 8, 1};
+		openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+	}
+	//////////////////////////////////////////////////////////////////////
+	// CCOFF
+	void matchTemplate_CCOFF(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+	{
+		CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
+
+		matchTemplate_CCORR(image,templ,result,buf);
+
+		Context *clCxt = image.clCxt;
+		string kernelName;
+
+		kernelName = "matchTemplate_Prepared_CCOFF";
+		size_t globalThreads[3] = {result.cols, result.rows, 1};
+		size_t localThreads[3]  = {32, 8, 1};
+
+		vector< pair<size_t, const void *> > args;
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); 
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+		// to be continued in the following section
+		if(image.channels() == 1)
+		{
+			buf.image_sums.resize(1);
+			// FIXME: temp fix for incorrect integral kernel
+			oclMat tmp_oclmat;
+			integral(image, buf.image_sums[0], tmp_oclmat);
+
+			float templ_sum = 0;
+#if EXT_FP64
+			templ_sum = (float)sum(templ)[0] / templ.size().area();
+#else
+			Mat o_templ = templ;
+			templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
+#endif
+			args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+			args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+			args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+			args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
+		}
+		else
+		{
+			Vec4f templ_sum = Vec4f::all(0);
+#if EXT_FP64
+			split(image,buf.images);
+			templ_sum = sum(templ) / templ.size().area();
+#else 
+			// temp fix for non-double supported machine
+			Mat o_templ = templ, o_image = image;
+			vector<Mat> o_mat_vector;
+			o_mat_vector.resize(image.channels());
+			buf.images.resize(image.channels());
+			split(o_image, o_mat_vector);
+			for(int i = 0; i < o_mat_vector.size(); i ++)
+			{
+				buf.images[i] = oclMat(o_mat_vector[i]);
+			}
+			templ_sum = sum(o_templ) / templ.size().area();
+#endif
+			buf.image_sums.resize(buf.images.size());
+
+			for(int i = 0; i < image.channels(); i ++)
+			{
+				// FIXME: temp fix for incorrect integral kernel
+				oclMat omat_temp;
+				integral(buf.images[i], buf.image_sums[i], omat_temp);
+			}
+			switch(image.channels())
+			{
+			case 4:
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
+				args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+				args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
+				break;
+			default:
+				CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+				break;
+			}
+		}
+		openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+	}
+
+	void matchTemplate_CCOFF_NORMED(
+		const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+	{
+		image.convertTo(buf.imagef, CV_32F);
+		templ.convertTo(buf.templf, CV_32F);
+
+		matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
+		float scale = 1.f/templ.size().area();
+
+		Context *clCxt = image.clCxt;
+		string kernelName;
+
+		kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
+		size_t globalThreads[3] = {result.cols, result.rows, 1};
+		size_t localThreads[3]  = {32, 8, 1};
+
+		vector< pair<size_t, const void *> > args;
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); 
+		args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+		args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
+		// to be continued in the following section
+		if(image.channels() == 1)
+		{
+			buf.image_sums.resize(1);
+			buf.image_sqsums.resize(1);
+			integral(image, buf.image_sums[0], buf.image_sqsums[0]);
+			float templ_sum = 0;
+			float templ_sqsum = 0;
+#if EXT_FP64
+			templ_sum   = (float)sum(templ)[0];
+#if SQRSUM_FIXED
+			templ_sqsum = sqrSum(templ);
+#else
+			oclMat templ_sqr = templ;
+			multiply(templ,templ, templ_sqr);
+			templ_sqsum  = sum(templ_sqr)[0];
+#endif //SQRSUM_FIXED
+			templ_sqsum -= scale * templ_sum * templ_sum;
+			templ_sum   *= scale;
+#else
+			// temp fix for non-double supported machine
+			Mat o_templ = templ;
+			templ_sum   = (float)sum(o_templ)[0]; 
+			templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
+			templ_sum  *= scale;
+#endif
+			args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+			args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+			args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+			args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
+			args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
+			args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
+			args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
+			args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
+		}
+		else
+		{
+			Vec4f templ_sum   = Vec4f::all(0);
+			Vec4f templ_sqsum = Vec4f::all(0);
+#if EXT_FP64
+			split(image,buf.images);
+			templ_sum   = sum(templ);
+#if SQRSUM_FIXED
+			templ_sqsum = sqrSum(templ);
+#else
+			oclMat templ_sqr = templ;
+			multiply(templ,templ, templ_sqr);
+			templ_sqsum  = sum(templ_sqr);
+#endif //SQRSUM_FIXED
+			templ_sqsum -= scale * templ_sum * templ_sum;
+			
+#else 
+			// temp fix for non-double supported machine
+			Mat o_templ = templ, o_image = image;
+			
+			vector<Mat> o_mat_vector;
+			o_mat_vector.resize(image.channels());
+			buf.images.resize(image.channels());
+			split(o_image, o_mat_vector);
+			for(int i = 0; i < o_mat_vector.size(); i ++)
+			{
+				buf.images[i] = oclMat(o_mat_vector[i]);
+			}
+			templ_sum    = sum(o_templ);
+			templ_sqsum  = sum(o_templ.mul(o_templ));
+#endif
+			float templ_sqsum_sum = 0;
+			for(int i = 0; i < image.channels(); i ++)
+			{
+				templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
+			}
+			templ_sum   *= scale;
+			buf.image_sums.resize(buf.images.size());
+			buf.image_sqsums.resize(buf.images.size());
+			
+			for(int i = 0; i < image.channels(); i ++)
+			{
+				integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
+			}
+			
+			switch(image.channels())
+			{
+			case 4:
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
+				args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+				args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[1].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[2].data) );
+				args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[3].data) );
+				args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
+				args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
+				args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
+				break;
+			default:
+				CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+				break;
+			}
+		}
+		openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+	}
+
+}/*ocl*/} /*cv*/
+
+void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
+{
+	MatchTemplateBuf buf;
+	matchTemplate(image,templ, result, method,buf);
+}
+void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
+{
+	CV_Assert(image.type() == templ.type());
+	CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
+
+	typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
+
+	const Caller callers[] = { 
+		::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED, 
+		::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED, 
+		::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
+	};
+
+	Caller caller = callers[method];
+	CV_Assert(caller);
+	caller(image, templ, result, buf);
+}
+#endif //
diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp
new file mode 100644
index 0000000000..3f0a241cf7
--- /dev/null
+++ b/modules/ocl/src/pyrdown.cpp
@@ -0,0 +1,115 @@
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+using std::cout;
+using std::endl;
+
+namespace cv
+{
+    namespace ocl
+    {
+        ///////////////////////////OpenCL kernel strings///////////////////////////
+        extern const char *pyr_down;
+
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+/////////////////////// add subtract multiply divide /////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+template<typename T>
+void pyrdown_run(const oclMat &src, const oclMat &dst)
+{
+    CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows);
+
+    CV_Assert(src.type() == dst.type());
+    CV_Assert(src.depth() != CV_8S);
+
+    Context  *clCxt = src.clCxt;
+    //int channels = dst.channels();
+    //int depth = dst.depth();
+
+    string kernelName = "pyrDown";
+
+    //int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
+    //    {4, 0, 4, 4, 1, 1, 1},
+    //    {4, 0, 4, 4, 1, 1, 1},
+    //    {4, 0, 4, 4, 1, 1, 1}
+    //};
+
+    //size_t vector_length = vector_lengths[channels-1][depth];
+    //int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
+
+    size_t localThreads[3]  = { 256, 1, 1 };
+    size_t globalThreads[3] = { src.cols, dst.rows, 1};
+
+    //int dst_step1 = dst.cols * dst.elemSize();
+    vector<pair<size_t , const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
+
+    openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+}
+void pyrdown_run(const oclMat &src, const oclMat &dst)
+{
+	switch(src.depth())
+	{
+	case 0:
+	    pyrdown_run<unsigned char>(src, dst);
+		break;
+
+	case 1:
+	    pyrdown_run<char>(src, dst);
+		break;
+
+	case 2:
+	    pyrdown_run<unsigned short>(src, dst);
+		break;
+
+	case 3:
+	    pyrdown_run<short>(src, dst);
+		break;
+
+	case 4:
+	    pyrdown_run<int>(src, dst);
+		break;
+
+	case 5:
+	    pyrdown_run<float>(src, dst);
+		break;
+
+	case 6:
+	    pyrdown_run<double>(src, dst);
+		break;
+
+	default:
+		break;
+	}
+}
+//////////////////////////////////////////////////////////////////////////////
+// pyrDown
+
+void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
+{
+    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+
+	//src.step = src.rows;
+
+    dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
+
+	//dst.step = dst.rows;
+
+    pyrdown_run(src, dst);
+}
+
diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp
new file mode 100644
index 0000000000..ee0dfe382d
--- /dev/null
+++ b/modules/ocl/src/pyrup.cpp
@@ -0,0 +1,88 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//		Zhang Chunpeng chunpeng@multicorewareinc.com
+//    
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/* Haar features calculation */
+//#define EMU
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+#ifndef HAVE_OPENCL
+void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); }
+#else
+
+namespace cv { namespace ocl 
+{ 
+	extern const char *pyr_up;
+	void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
+	{
+		dst.create(src.rows * 2, src.cols * 2, src.type());
+		Context *clCxt = src.clCxt;
+		
+		const std::string kernelName = "pyrUp";
+  
+		std::vector< pair<size_t, const void *> > args;
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
+		args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
+		args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
+		
+		size_t globalThreads[3] = {dst.cols, dst.rows, 1};
+		size_t localThreads[3]  = {16, 16, 1};
+	    
+		openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+	}
+}};
+#endif // HAVE_OPENCL
\ No newline at end of file
diff --git a/modules/ocl/src/surf.cpp b/modules/ocl/src/surf.cpp
new file mode 100644
index 0000000000..7d9798de73
--- /dev/null
+++ b/modules/ocl/src/surf.cpp
@@ -0,0 +1,760 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <iomanip>
+#include "precomp.hpp"
+
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+#if !defined (HAVE_OPENCL)
+
+cv::ocl::SURF_OCL::SURF_OCL() { throw_nogpu(); }
+cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) { throw_nogpu(); }
+int cv::ocl::SURF_OCL::descriptorSize() const { throw_nogpu(); return 0;}
+void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>&, oclMat&) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat&, vector<float>&) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&, oclMat&, bool) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, oclMat&, bool) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, vector<float>&, bool) { throw_nogpu(); }
+void cv::ocl::SURF_OCL::releaseMemory() { throw_nogpu(); }
+
+#else /* !defined (HAVE_OPENCL) */
+namespace cv { namespace ocl 
+{
+    ///////////////////////////OpenCL kernel strings///////////////////////////
+    extern const char * nonfree_surf;
+}}
+
+namespace 
+{
+    static inline int divUp(int total, int grain)
+    {
+        return (total + grain - 1) / grain;
+    }
+    static inline int calcSize(int octave, int layer)
+    {
+        /* Wavelet size at first layer of first octave. */
+        const int HAAR_SIZE0 = 9;
+
+        /* Wavelet size increment between layers. This should be an even number,
+        such that the wavelet sizes in an octave are either all even or all odd.
+        This ensures that when looking for the neighbours of a sample, the layers
+
+        above and below are aligned correctly. */
+        const int HAAR_SIZE_INC = 6;
+
+        return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
+    }
+
+    class SURF_OCL_Invoker
+    {
+    public:
+        // facilities
+        void bindImgTex(const oclMat& img);
+        void bindSumTex(const oclMat& sum);
+        void bindMaskSumTex(const oclMat& maskSum);
+
+        //void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
+        //void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
+
+        // kernel callers declearations
+        void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows);
+
+        void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
+            int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
+
+        void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
+            oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures);
+
+        void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures);
+
+        void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures);
+        // end of kernel callers declearations
+
+
+        SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) :
+        surf_(surf),
+            img_cols(img.cols), img_rows(img.rows),
+            use_mask(!mask.empty())
+        {
+            CV_Assert(!img.empty() && img.type() == CV_8UC1);
+            CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
+            CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
+
+            const int min_size = calcSize(surf_.nOctaves - 1, 0);
+            CV_Assert(img_rows - min_size >= 0);
+            CV_Assert(img_cols - min_size >= 0);
+
+            const int layer_rows = img_rows >> (surf_.nOctaves - 1);
+            const int layer_cols = img_cols >> (surf_.nOctaves - 1);
+            const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
+            CV_Assert(layer_rows - 2 * min_margin > 0);
+            CV_Assert(layer_cols - 2 * min_margin > 0);
+
+            maxFeatures   = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535);
+            maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);
+
+            CV_Assert(maxFeatures > 0);
+
+            counters.create(1, surf_.nOctaves + 1, CV_32SC1);
+            counters.setTo(Scalar::all(0));
+
+            //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
+
+            bindImgTex(img);
+            oclMat integral_sqsum;
+            integral(img, surf_.sum, integral_sqsum); // the two argumented integral version is incorrect
+
+            bindSumTex(surf_.sum);
+            maskSumTex = 0;
+
+            if (use_mask)
+            {
+                throw std::exception();
+                //!FIXME
+                // temp fix for missing min overload
+                oclMat temp(mask.size(), mask.type());
+                temp.setTo(Scalar::all(1.0));
+                //cv::ocl::min(mask, temp, surf_.mask1);           ///////// disable this 
+                integral(surf_.mask1, surf_.maskSum);
+                bindMaskSumTex(surf_.maskSum);
+            }
+        }
+
+        void detectKeypoints(oclMat& keypoints)
+        {
+            // create image pyramid buffers
+            // different layers have same sized buffers, but they are sampled from gaussin kernel.
+            surf_.det.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);  
+            surf_.trace.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);
+
+            surf_.maxPosBuffer.create(1, maxCandidates, CV_32SC4);
+            keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1);
+            keypoints.setTo(Scalar::all(0));
+
+            for (int octave = 0; octave < surf_.nOctaves; ++octave)
+            {
+                const int layer_rows = img_rows >> octave;
+                const int layer_cols = img_cols >> octave;
+
+                //loadOctaveConstants(octave, layer_rows, layer_cols);
+
+                icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
+
+                icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
+                    octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
+
+                unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
+                maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
+
+                if (maxCounter > 0)
+                {
+                    icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
+                        keypoints, counters, octave, layer_rows, maxFeatures);
+                }
+            }
+            unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
+            featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
+
+            keypoints.cols = featureCounter;
+
+            if (surf_.upright)
+                keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
+            else
+                findOrientation(keypoints);
+        }
+
+        void findOrientation(oclMat& keypoints)
+        {
+            const int nFeatures = keypoints.cols;
+            if (nFeatures > 0)
+            {
+                icvCalcOrientation_gpu(keypoints, nFeatures);
+            }
+        }
+
+        void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize)
+        {
+            const int nFeatures = keypoints.cols;
+            if (nFeatures > 0)
+            {
+                descriptors.create(nFeatures, descriptorSize, CV_32F);
+                compute_descriptors_gpu(descriptors, keypoints, nFeatures);
+            }
+        }
+
+        ~SURF_OCL_Invoker()
+        {
+            if(imgTex)
+                openCLFree(imgTex);
+            if(sumTex)
+                openCLFree(sumTex);
+            if(maskSumTex)
+                openCLFree(maskSumTex);
+            additioalParamBuffer.release();
+        }
+
+    private:
+        SURF_OCL& surf_;
+
+        int img_cols, img_rows;
+
+        bool use_mask;
+
+        int maxCandidates;
+        int maxFeatures;
+
+        oclMat counters;
+
+        // texture buffers
+        cl_mem imgTex;
+        cl_mem sumTex;
+        cl_mem maskSumTex;
+
+        oclMat additioalParamBuffer;
+    };
+}
+
+cv::ocl::SURF_OCL::SURF_OCL()
+{
+    hessianThreshold = 100.0f;
+    extended = true;
+    nOctaves = 4;
+    nOctaveLayers = 2;
+    keypointsRatio = 0.01f;
+    upright = false;
+}
+
+cv::ocl::SURF_OCL::SURF_OCL(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright)
+{
+    hessianThreshold = _threshold;
+    extended = _extended;
+    nOctaves = _nOctaves;
+    nOctaveLayers = _nOctaveLayers;
+    keypointsRatio = _keypointsRatio;
+    upright = _upright;
+}
+
+int cv::ocl::SURF_OCL::descriptorSize() const
+{
+    return extended ? 128 : 64;
+}
+
+void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>& keypoints, oclMat& keypointsGPU)
+{
+    if (keypoints.empty())
+        keypointsGPU.release();
+    else
+    {
+        Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
+
+        float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
+        float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
+        int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
+        int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
+        float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
+        float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
+        float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
+
+        for (size_t i = 0, size = keypoints.size(); i < size; ++i)
+        {
+            const KeyPoint& kp = keypoints[i];
+            kp_x[i] = kp.pt.x;
+            kp_y[i] = kp.pt.y;
+            kp_octave[i] = kp.octave;
+            kp_size[i] = kp.size;
+            kp_dir[i] = kp.angle;
+            kp_hessian[i] = kp.response;
+            kp_laplacian[i] = 1;
+        }
+
+        keypointsGPU.upload(keypointsCPU);
+    }
+}
+
+void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector<KeyPoint>& keypoints)
+{
+    const int nFeatures = keypointsGPU.cols;
+
+    if (nFeatures == 0)
+        keypoints.clear();
+    else
+    {
+        CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT);
+
+        Mat keypointsCPU(keypointsGPU);
+
+        keypoints.resize(nFeatures);
+
+        float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
+        float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
+        int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
+        int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
+        float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
+        float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
+        float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
+
+        for (int i = 0; i < nFeatures; ++i)
+        {
+            KeyPoint& kp = keypoints[i];
+            kp.pt.x = kp_x[i];
+            kp.pt.y = kp_y[i];
+            kp.class_id = kp_laplacian[i];
+            kp.octave = kp_octave[i];
+            kp.size = kp_size[i];
+            kp.angle = kp_dir[i];
+            kp.response = kp_hessian[i];
+        }
+    }
+}
+
+void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat& descriptorsGPU, vector<float>& descriptors)
+{
+    if (descriptorsGPU.empty())
+        descriptors.clear();
+    else
+    {
+        CV_Assert(descriptorsGPU.type() == CV_32F);
+
+        descriptors.resize(descriptorsGPU.rows * descriptorsGPU.cols);
+        Mat descriptorsCPU(descriptorsGPU.size(), CV_32F, &descriptors[0]);
+        descriptorsGPU.download(descriptorsCPU);
+    }
+}
+
+void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints)
+{
+    if (!img.empty())
+    {
+        SURF_OCL_Invoker surf(*this, img, mask);
+
+        surf.detectKeypoints(keypoints);
+    }
+}
+
+void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,
+    bool useProvidedKeypoints)
+{
+    if (!img.empty())
+    {
+        SURF_OCL_Invoker surf(*this, img, mask);
+
+        if (!useProvidedKeypoints)
+            surf.detectKeypoints(keypoints);
+        else if (!upright)
+        {
+            surf.findOrientation(keypoints);
+        }
+
+        surf.computeDescriptors(keypoints, descriptors, descriptorSize());
+    }
+}
+
+void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints)
+{
+    oclMat keypointsGPU;
+
+    (*this)(img, mask, keypointsGPU);
+
+    downloadKeypoints(keypointsGPU, keypoints);
+}
+
+void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
+    oclMat& descriptors, bool useProvidedKeypoints)
+{
+    oclMat keypointsGPU;
+
+    if (useProvidedKeypoints)
+        uploadKeypoints(keypoints, keypointsGPU);
+
+    (*this)(img, mask, keypointsGPU, descriptors, useProvidedKeypoints);
+
+    downloadKeypoints(keypointsGPU, keypoints);
+}
+
+void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
+    vector<float>& descriptors, bool useProvidedKeypoints)
+{
+    oclMat descriptorsGPU;
+
+    (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);
+
+    downloadDescriptors(descriptorsGPU, descriptors);
+}
+
+void cv::ocl::SURF_OCL::releaseMemory()
+{
+    sum.release();
+    mask1.release();
+    maskSum.release();
+    intBuffer.release();
+    det.release();
+    trace.release();
+    maxPosBuffer.release();
+}
+
+// Facilities
+
+//// load SURF constants into device memory
+//void SURF_OCL_Invoker::loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
+//{
+//	Mat tmp(1, 9, CV_32FC1);
+//	float * tmp_data = tmp.ptr<float>();
+//	*tmp_data        = maxCandidates;
+//	*(++tmp_data)    = maxFeatures;
+//	*(++tmp_data)    = img_rows;
+//	*(++tmp_data)    = img_cols;
+//	*(++tmp_data)    = nOctaveLayers;
+//	*(++tmp_data)    = hessianThreshold;
+//	additioalParamBuffer = tmp;
+//}
+//void SURF_OCL_Invoker::loadOctaveConstants(int octave, int layer_rows, int layer_cols)
+//{
+//	Mat tmp = additioalParamBuffer;
+//	float * tmp_data = tmp.ptr<float>();
+//	tmp_data += 6;
+//	*tmp_data        = octave;
+//	*(++tmp_data)    = layer_rows;
+//	*(++tmp_data)    = layer_cols;
+//	additioalParamBuffer = tmp;
+//}
+
+// create and bind source buffer to image oject.
+void SURF_OCL_Invoker::bindImgTex(const oclMat& img)
+{
+    Mat cpu_img(img); // time consuming
+    cl_image_format format;
+    int err;
+
+    format.image_channel_data_type = CL_UNSIGNED_INT8;
+    format.image_channel_order     = CL_R;
+
+#if CL_VERSION_1_2
+    cl_image_desc desc;
+    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
+    desc.image_width      = cpu_img.cols;
+    desc.image_height     = cpu_img.rows;
+    desc.image_depth      = NULL;
+    desc.image_array_size = 1;
+    desc.image_row_pitch  = cpu_img.step;
+    desc.image_slice_pitch= 0;
+    desc.buffer           = NULL;
+    desc.num_mip_levels   = 0;
+    desc.num_samples      = 0;
+    imgTex = clCreateImage(img.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
+#else
+    imgTex = clCreateImage2D(
+        img.clCxt->impl->clContext, 
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
+        &format, 
+        cpu_img.cols, 
+        cpu_img.rows, 
+        cpu_img.step, 
+        cpu_img.data, 
+        &err);
+#endif
+    openCLSafeCall(err);
+}
+
+void SURF_OCL_Invoker::bindSumTex(const oclMat& sum)
+{
+    Mat cpu_img(sum); // time consuming
+    cl_image_format format;
+    int err;
+    format.image_channel_data_type = CL_UNSIGNED_INT32;
+    format.image_channel_order     = CL_R;
+#if CL_VERSION_1_2
+    cl_image_desc desc;
+    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
+    desc.image_width      = cpu_img.cols;
+    desc.image_height     = cpu_img.rows;
+    desc.image_depth      = NULL;
+    desc.image_array_size = 1;
+    desc.image_row_pitch  = cpu_img.step;
+    desc.image_slice_pitch= 0;
+    desc.buffer           = NULL;
+    desc.num_mip_levels   = 0;
+    desc.num_samples      = 0;
+    sumTex = clCreateImage(sum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
+#else
+    sumTex = clCreateImage2D(
+        sum.clCxt->impl->clContext, 
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
+        &format, 
+        cpu_img.cols, 
+        cpu_img.rows, 
+        cpu_img.step, 
+        cpu_img.data, 
+        &err);
+#endif
+    openCLSafeCall(err);
+}
+void SURF_OCL_Invoker::bindMaskSumTex(const oclMat& maskSum)
+{
+    Mat cpu_img(maskSum); // time consuming
+    cl_image_format format;
+    int err;
+    format.image_channel_data_type = CL_UNSIGNED_INT32;
+    format.image_channel_order     = CL_R;
+#if CL_VERSION_1_2
+    cl_image_desc desc;
+    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
+    desc.image_width      = cpu_img.cols;
+    desc.image_height     = cpu_img.rows;
+    desc.image_depth      = NULL;
+    desc.image_array_size = 1;
+    desc.image_row_pitch  = cpu_img.step;
+    desc.image_slice_pitch= 0;
+    desc.buffer           = NULL;
+    desc.num_mip_levels   = 0;
+    desc.num_samples      = 0;
+    maskSumTex = clCreateImage(maskSum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
+#else
+    maskSumTex = clCreateImage2D(
+        maskSum.clCxt->impl->clContext, 
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
+        &format, 
+        cpu_img.cols, 
+        cpu_img.rows, 
+        cpu_img.step, 
+        cpu_img.data, 
+        &err);
+#endif
+    openCLSafeCall(err);
+}
+
+////////////////////////////
+// kernel caller definitions
+void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int c_layer_rows)
+{
+    const int min_size = calcSize(octave, 0);
+    const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
+    const int max_samples_j = 1 + ((img_cols - min_size) >> octave);
+
+    Context *clCxt = det.clCxt;
+    string kernelName = "icvCalcLayerDetAndTrace";
+    vector< pair<size_t, const void *> > args;
+
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&nOctaveLayers));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows));
+
+    size_t localThreads[3]  = {16, 16, 1};
+    size_t globalThreads[3] = {
+        divUp(max_samples_j, localThreads[0]) * localThreads[0], 
+        divUp(max_samples_i, localThreads[1]) * localThreads[1] * (nOctaveLayers + 2), 
+        1};
+        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
+    int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
+{
+    const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
+
+    Context *clCxt = det.clCxt;
+    string kernelName = use_mask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer";
+    vector< pair<size_t, const void *> > args;
+
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&maxCounter.data));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&counterOffset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&nLayers));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&layer_cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&maxCandidates));
+    args.push_back( make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold));
+
+    if(use_mask)
+    {
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&maskSumTex));
+    }
+
+    size_t localThreads[3]  = {16, 16, 1};
+    size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) * localThreads[0], 
+        divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) * nLayers * localThreads[1], 
+        1};
+
+    openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
+    oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures)
+{
+    Context *clCxt = det.clCxt;
+    string kernelName = "icvInterpolateKeypoint";
+    vector< pair<size_t, const void *> > args;
+
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&counters.data));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures));
+
+    size_t localThreads[3]  = {3, 3, 3};
+    size_t globalThreads[3] = {maxCounter * localThreads[0], 1, 1};
+
+    openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures)
+{
+    Context * clCxt = counters.clCxt;
+    string kernelName = "icvCalcOrientation";
+
+    vector< pair<size_t, const void *> > args;
+
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
+
+    size_t localThreads[3]  = {32, 4, 1};
+    size_t globalThreads[3] = {nFeatures * localThreads[0], localThreads[1], 1};
+
+    openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures)
+{
+    // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
+    Context *clCxt = descriptors.clCxt;
+    string kernelName = "";
+    vector< pair<size_t, const void *> > args;
+    size_t localThreads[3]  = {1, 1, 1};
+    size_t globalThreads[3] = {1, 1, 1};
+
+    if(descriptors.cols == 64)
+    {
+        kernelName = "compute_descriptors64";
+
+        localThreads[0] = 6;
+        localThreads[1] = 6;
+
+        globalThreads[0] = nFeatures * localThreads[0];
+        globalThreads[1] = 16 * localThreads[1];
+
+        args.clear();
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
+        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+
+        kernelName = "normalize_descriptors64";
+
+        localThreads[0] = 64;
+        localThreads[1] = 1;
+
+        globalThreads[0] = nFeatures * localThreads[0];
+        globalThreads[1] = localThreads[1];
+
+        args.clear();
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
+        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+    }
+    else
+    {
+        kernelName = "compute_descriptors128";
+
+        localThreads[0] = 6;
+        localThreads[1] = 6;
+
+        globalThreads[0] = nFeatures * localThreads[0];
+        globalThreads[1] = 16 * localThreads[1];
+
+        args.clear();
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
+        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+
+        kernelName = "normalize_descriptors128";
+
+        localThreads[0] = 128;
+        localThreads[1] = 1;
+
+        globalThreads[0] = nFeatures * localThreads[0];
+        globalThreads[1] = localThreads[1];
+
+        args.clear();
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
+        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+    }
+}
+
+#endif // /* !defined (HAVE_OPENCL) */
+
diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp
new file mode 100644
index 0000000000..a0391b1bb0
--- /dev/null
+++ b/modules/ocl/test/test_blend.cpp
@@ -0,0 +1,83 @@
+#include "precomp.hpp"
+#include <iomanip>
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+template <typename T>
+void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
+{
+    result_gold.create(img1.size(), img1.type());
+
+    int cn = img1.channels();
+
+    for (int y = 0; y < img1.rows; ++y)
+    {
+        const float* weights1_row = weights1.ptr<float>(y);
+        const float* weights2_row = weights2.ptr<float>(y);
+        const T* img1_row = img1.ptr<T>(y);
+        const T* img2_row = img2.ptr<T>(y);
+        T* result_gold_row = result_gold.ptr<T>(y);
+
+        for (int x = 0; x < img1.cols * cn; ++x)
+        {
+            float w1 = weights1_row[x / cn];
+            float w2 = weights2_row[x / cn];
+            result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
+        }
+    }
+}
+
+PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/)
+{
+    std::vector<cv::ocl::Info> oclinfo;
+    cv::Size size;
+    int type;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        //devInfo = GET_PARAM(0);
+        size = GET_PARAM(0);
+        type = GET_PARAM(1);
+        /*useRoi = GET_PARAM(3);*/
+
+        int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+        CV_Assert(devnums > 0);
+    }
+};
+
+TEST_P(Blend, Accuracy)
+{
+    int depth = CV_MAT_DEPTH(type);
+
+    cv::Mat img1 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
+    cv::Mat img2 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
+    cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
+    cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
+
+	cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F);
+	cv::ocl::oclMat dst(size, type);
+	gimg1.upload(img1);
+	gimg2.upload(img2);
+	gweights1.upload(weights1);
+	gweights2.upload(weights2);
+	cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
+	cv::Mat result;
+    cv::Mat result_gold;
+	dst.download(result);
+    if (depth == CV_8U)
+        blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
+    else
+        blendLinearGold<float>(img1, img2, weights1, weights2, result_gold);
+
+    EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1 : 1e-5f, NULL)
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
+	DIFFERENT_SIZES,
+	testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
+));
\ No newline at end of file
diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/test/test_columnsum.cpp
new file mode 100644
index 0000000000..94e109d200
--- /dev/null
+++ b/modules/ocl/test/test_columnsum.cpp
@@ -0,0 +1,108 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//	   Chunpeng Zhang chunpeng@multicorewareinc.com
+//    
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+
+///////////////////////////////////////////////////////////////////////////////
+/// ColumnSum
+
+#ifdef HAVE_OPENCL
+
+////////////////////////////////////////////////////////////////////////
+// ColumnSum
+
+PARAM_TEST_CASE(ColumnSum, cv::Size, bool )
+{
+    cv::Size size;
+    cv::Mat src;
+	bool useRoi;
+	std::vector<cv::ocl::Info> oclinfo;
+
+    virtual void SetUp()
+    {
+        size = GET_PARAM(0);
+		useRoi = GET_PARAM(1);
+        int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+        CV_Assert(devnums > 0);
+    }
+};
+
+TEST_P(ColumnSum, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_32FC1);
+	//cv::Mat src(size,CV_32FC1);
+
+	//cv::ocl::oclMat d_dst = ::createMat(size,src.type(),useRoi);
+	cv::ocl::oclMat d_dst = loadMat(src,useRoi);
+
+    cv::ocl::columnSum(loadMat(src,useRoi),d_dst);
+
+    cv::Mat dst(d_dst);
+
+    for (int j = 0; j < src.cols; ++j)
+    {
+        float gold = src.at<float>(0, j);
+        float res = dst.at<float>(0, j);
+        ASSERT_NEAR(res, gold, 1e-5);
+    }
+
+    for (int i = 1; i < src.rows; ++i)
+    {
+        for (int j = 0; j < src.cols; ++j)
+        {
+            float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
+            float res = dst.at<float>(i, j);
+            ASSERT_NEAR(res, gold, 1e-5);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine(
+						DIFFERENT_SIZES,testing::Values(Inverse(false),Inverse(true))));
+
+
+#endif 
diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp
new file mode 100644
index 0000000000..4b51d4feca
--- /dev/null
+++ b/modules/ocl/test/test_fft.cpp
@@ -0,0 +1,97 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+using namespace std;
+#ifdef HAVE_CLAMDFFT
+////////////////////////////////////////////////////////////////////////////
+// Dft
+PARAM_TEST_CASE(Dft, cv::Size, bool) 
+{
+	cv::Size dft_size;
+	bool	 dft_rows;
+	std::vector<cv::ocl::Info> oclinfo;
+    virtual void SetUp()
+    {
+	    int devnums = getDevice(oclinfo);
+        CV_Assert(devnums > 0);
+		dft_size = GET_PARAM(0);
+		dft_rows = GET_PARAM(1);
+    }
+};
+
+TEST_P(Dft, C2C)
+{
+	cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
+	cv::Mat b_gold;
+	int flags = 0;
+	flags |= dft_rows ? cv::DFT_ROWS : 0;
+
+	cv::ocl::oclMat d_b;
+	
+	cv::dft(a, b_gold, flags);
+	cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+	EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
+}
+
+
+TEST_P(Dft, R2CthenC2R)
+{
+	cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
+	
+	int flags = 0;
+	//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
+
+	cv::ocl::oclMat d_b, d_c;
+	cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+	cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
+	EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
+}
+
+INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
+    testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
+    testing::Values(false, true)));
+
+#endif // HAVE_CLAMDFFT
diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp
new file mode 100644
index 0000000000..a836149cb0
--- /dev/null
+++ b/modules/ocl/test/test_gemm.cpp
@@ -0,0 +1,85 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+using namespace std;
+#ifdef HAVE_CLAMDBLAS
+////////////////////////////////////////////////////////////////////////////
+// GEMM
+PARAM_TEST_CASE(Gemm, int, cv::Size, int) 
+{
+	int      type;
+	cv::Size mat_size;
+	int		 flags;
+	vector<cv::ocl::Info> info;
+    virtual void SetUp()
+    {
+		type     = GET_PARAM(0);
+		mat_size = GET_PARAM(1);
+		flags    = GET_PARAM(2);
+		cv::ocl::getDevice(info);
+    }
+};
+
+TEST_P(Gemm, Accuracy)
+{
+	cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
+	cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
+	cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
+
+	cv::Mat dst;
+	cv::ocl::oclMat ocl_dst;
+
+	cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
+	cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
+
+	EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
+}
+
+INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
+	testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
+    testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
+    testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+#endif
diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_hog.cpp
new file mode 100644
index 0000000000..f49751576f
--- /dev/null
+++ b/modules/ocl/test/test_hog.cpp
@@ -0,0 +1,192 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//		Wenju He, wenju@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "opencv2/core/core.hpp"
+using namespace std;
+#ifdef HAVE_OPENCL
+
+
+PARAM_TEST_CASE(HOG,cv::Size,int)
+{
+	cv::Size winSize;
+	int type;
+	vector<cv::ocl::Info> info;
+	virtual void SetUp()
+	{
+		winSize = GET_PARAM(0);
+		type = GET_PARAM(1);
+		cv::ocl::getDevice(info);
+	}
+};
+
+TEST_P(HOG, GetDescriptors)
+{
+    // Load image
+    cv::Mat img_rgb = readImage("../../../samples/gpu/road.png");
+    ASSERT_FALSE(img_rgb.empty());
+
+    // Convert image
+    cv::Mat img;
+    switch (type)
+    {
+    case CV_8UC1:
+        cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
+        break;
+    case CV_8UC4:
+    default:
+        cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
+        break;
+    }
+    cv::ocl::oclMat d_img(img);
+
+    // HOGs
+    cv::ocl::HOGDescriptor ocl_hog;
+    ocl_hog.gamma_correction = true;
+    cv::HOGDescriptor hog;
+    hog.gammaCorrection = true;
+
+    // Compute descriptor
+    cv::ocl::oclMat d_descriptors;
+    ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
+    cv::Mat down_descriptors;
+    d_descriptors.download(down_descriptors);
+    down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
+
+    hog.setSVMDetector(hog.getDefaultPeopleDetector());
+    std::vector<float> descriptors;
+    switch (type)
+    {
+    case CV_8UC1:
+        hog.compute(img, descriptors, ocl_hog.win_size);
+        break;
+    case CV_8UC4:
+    default:
+        hog.compute(img_rgb, descriptors, ocl_hog.win_size);
+        break;
+    }
+    cv::Mat cpu_descriptors(descriptors);
+
+    EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2);
+}
+
+
+TEST_P(HOG, Detect)
+{
+    // Load image
+    cv::Mat img_rgb = readImage("../../../samples/gpu/road.png");
+    ASSERT_FALSE(img_rgb.empty());
+
+    // Convert image
+    cv::Mat img;
+    switch (type)
+    {
+    case CV_8UC1:
+        cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
+        break;
+    case CV_8UC4:
+    default:
+        cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
+        break;
+    }
+    cv::ocl::oclMat d_img(img);
+
+    // HOGs
+    if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
+        winSize = cv::Size(64, 128);
+    cv::ocl::HOGDescriptor ocl_hog(winSize);
+    ocl_hog.gamma_correction = true;
+
+    cv::HOGDescriptor hog;
+    hog.winSize = winSize;
+    hog.gammaCorrection = true;
+
+    if (winSize.width == 48 && winSize.height == 96)
+    {
+        // daimler's base
+        ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96());
+        hog.setSVMDetector(hog.getDaimlerPeopleDetector());
+    }
+    else if (winSize.width == 64 && winSize.height == 128)
+    {
+        ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128());
+        hog.setSVMDetector(hog.getDefaultPeopleDetector());
+    }
+    else
+    {
+        ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
+        hog.setSVMDetector(hog.getDefaultPeopleDetector());
+    }
+
+    // OpenCL detection
+    std::vector<cv::Point> d_v_locations;
+    ocl_hog.detect(d_img, d_v_locations, 0);
+    cv::Mat d_locations(d_v_locations);
+    
+    // CPU detection
+    std::vector<cv::Point> v_locations;
+    switch (type)
+    {
+    case CV_8UC1:
+        hog.detect(img, v_locations, 0);
+        break;
+    case CV_8UC4:
+    default:
+        hog.detect(img_rgb, v_locations, 0);
+        break;
+    }
+    cv::Mat locations(v_locations);
+
+    char s[100]={0};
+    EXPECT_MAT_NEAR(d_locations, locations, 0, s);
+}
+
+
+INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
+                        testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
+                        testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
+
+
+#endif //HAVE_OPENCL
diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp
new file mode 100644
index 0000000000..7d599a6152
--- /dev/null
+++ b/modules/ocl/test/test_match_template.cpp
@@ -0,0 +1,172 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+#define PERF_TEST 0
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate
+#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
+
+IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
+
+const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
+
+PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+    cv::Size size;
+    cv::Size templ_size;
+    int cn;
+    int method;
+	std::vector<cv::ocl::Info> oclinfo;
+
+    virtual void SetUp()
+    {
+        size = GET_PARAM(0);
+        templ_size = GET_PARAM(1);
+        cn = GET_PARAM(2);
+        method = GET_PARAM(3);
+        int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+        CV_Assert(devnums > 0);
+    }
+};
+
+TEST_P(MatchTemplate8U, Accuracy)
+{
+
+	std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+	std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
+	std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
+	std::cout << "Channels: " << cn << std::endl;
+
+	cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
+    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
+
+    cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
+	cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+
+    cv::Mat dst_gold;
+    cv::matchTemplate(image, templ, dst_gold, method);
+
+	char sss [100] = "";
+
+	cv::Mat mat_dst;
+	dst.download(mat_dst);
+
+
+    EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
+
+#if PERF_TEST
+	{
+		P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
+		P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
+	}
+#endif // PERF_TEST
+}
+
+PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+    cv::Size size;
+    cv::Size templ_size;
+    int cn;
+    int method;
+	std::vector<cv::ocl::Info> oclinfo;
+
+    virtual void SetUp()
+    {
+        size = GET_PARAM(0);
+        templ_size = GET_PARAM(1);
+        cn = GET_PARAM(2);
+        method = GET_PARAM(3);
+        int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+        CV_Assert(devnums > 0);
+    }
+};
+
+TEST_P(MatchTemplate32F, Accuracy)
+{
+    cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
+    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
+
+    cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
+	cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+
+    cv::Mat dst_gold;
+    cv::matchTemplate(image, templ, dst_gold, method);
+
+	char sss [100] = "";
+
+	cv::Mat mat_dst;
+	dst.download(mat_dst);
+
+    EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
+
+#if PERF_TEST
+	{
+		std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+		std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
+		std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
+		std::cout << "Channels: " << cn << std::endl;
+		P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
+		P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
+	}
+#endif // PERF_TEST
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, 
+	testing::Combine(
+    DIFFERENT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+    testing::Values(Channels(1), Channels(4)),
+	ALL_TEMPLATE_METHODS
+	)
+);
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
+    DIFFERENT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+    testing::Values(Channels(1), Channels(4)),
+    testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
+
diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyrdown.cpp
new file mode 100644
index 0000000000..f2270b4a8c
--- /dev/null
+++ b/modules/ocl/test/test_pyrdown.cpp
@@ -0,0 +1,295 @@
+///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Dachuan Zhao, dachuan@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+//#define PRINT_CPU_TIME 1000
+//#define PRINT_TIME
+
+
+#include "precomp.hpp"
+#include <iomanip>
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+PARAM_TEST_CASE(PyrDown, MatType, bool)
+{
+    int type;
+    cv::Scalar val;
+
+    //src mat
+    cv::Mat mat1;
+    cv::Mat mat2;
+    cv::Mat mask;
+    cv::Mat dst;
+    cv::Mat dst1; //bak, for two outputs
+
+    // set up roi
+    int roicols;
+    int roirows;
+    int src1x;
+    int src1y;
+    int src2x;
+    int src2y;
+    int dstx;
+    int dsty;
+    int maskx;
+    int masky;
+
+
+    //src mat with roi
+    cv::Mat mat1_roi;
+    cv::Mat mat2_roi;
+    cv::Mat mask_roi;
+    cv::Mat dst_roi;
+    cv::Mat dst1_roi; //bak
+    std::vector<cv::ocl::Info> oclinfo;
+    //ocl dst mat for testing
+    cv::ocl::oclMat gdst_whole;
+    cv::ocl::oclMat gdst1_whole; //bak
+
+    //ocl mat with roi
+    cv::ocl::oclMat gmat1;
+    cv::ocl::oclMat gmat2;
+    cv::ocl::oclMat gdst;
+    cv::ocl::oclMat gdst1;   //bak
+    cv::ocl::oclMat gmask;
+
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+
+        cv::RNG &rng = TS::ptr()->get_rng();
+
+        cv::Size size(MWIDTH, MHEIGHT);
+
+        mat1 = randomMat(rng, size, type, 5, 16, false);
+        mat2 = randomMat(rng, size, type, 5, 16, false);
+        dst  = randomMat(rng, size, type, 5, 16, false);
+        dst1  = randomMat(rng, size, type, 5, 16, false);
+        mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+        cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+        val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+
+        int devnums = getDevice(oclinfo);
+        CV_Assert(devnums > 0);
+        //if you want to use undefault device, set it here
+        //setDevice(oclinfo[0]);
+    }
+
+	void Cleanup()
+	{
+		mat1.release();
+		mat2.release();
+		mask.release();
+		dst.release();
+		dst1.release();
+		mat1_roi.release();
+		mat2_roi.release();
+		mask_roi.release();
+		dst_roi.release();
+		dst1_roi.release();
+
+		gdst_whole.release();
+		gdst1_whole.release();
+		gmat1.release();
+		gmat2.release();
+		gdst.release();
+		gdst1.release();
+		gmask.release();
+	}
+
+    void random_roi()
+    {
+        cv::RNG &rng = TS::ptr()->get_rng();
+
+#ifdef RANDOMROI
+        //randomize ROI
+        roicols = rng.uniform(1, mat1.cols);
+        roirows = rng.uniform(1, mat1.rows);
+        src1x   = rng.uniform(0, mat1.cols - roicols);
+        src1y   = rng.uniform(0, mat1.rows - roirows);
+        dstx    = rng.uniform(0, dst.cols  - roicols);
+        dsty    = rng.uniform(0, dst.rows  - roirows);
+#else
+        roicols = mat1.cols;
+        roirows = mat1.rows;
+        src1x = 0;
+        src1y = 0;
+        dstx = 0;
+        dsty = 0;
+#endif
+        maskx   = rng.uniform(0, mask.cols - roicols);
+        masky   = rng.uniform(0, mask.rows - roirows);
+        src2x   = rng.uniform(0, mat2.cols - roicols);
+        src2y   = rng.uniform(0, mat2.rows - roirows);
+        mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+        mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
+        mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+        dst_roi  = dst(Rect(dstx, dsty, roicols, roirows));
+        dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
+
+        gdst_whole = dst;
+        gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+        gdst1_whole = dst1;
+        gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+
+        gmat1 = mat1_roi;
+        gmat2 = mat2_roi;
+        gmask = mask_roi; //end
+    }
+
+};
+
+#define VARNAME(A) string(#A);
+
+
+void PrePrint()
+{
+		//for(int i = 0; i < MHEIGHT; i++)
+		//{
+		//	printf("(%d) ", i);
+		//	for(int k = 0; k < MWIDTH; k++)
+		//	{
+		//		printf("%d ", mat1_roi.data[i * MHEIGHT + k]);
+		//	}
+		//	printf("\n");
+		//}
+}
+
+void PostPrint()
+{
+		//dst_roi.convertTo(dst_roi,CV_32S);
+		//cpu_dst.convertTo(cpu_dst,CV_32S);
+		//dst_roi -= cpu_dst;
+		//cpu_dst -= dst_roi;
+		//for(int i = 0; i < MHEIGHT / 2; i++)
+		//{
+		//	printf("(%d) ", i);
+		//	for(int k = 0; k < MWIDTH / 2; k++)
+		//	{
+		//		if(gmat1.depth() == 0)
+		//		{
+		//			if(gmat1.channels() == 1)
+		//			{
+		//				printf("%d ", dst_roi.data[i * MHEIGHT / 2 + k]);
+		//			}
+		//			else
+		//			{
+		//				printf("%d ", ((unsigned*)dst_roi.data)[i * MHEIGHT / 2 + k]);
+		//			}
+		//		}
+		//		else if(gmat1.depth() == 5)
+		//		{
+		//			printf("%.6f ", ((float*)dst_roi.data)[i * MHEIGHT / 2 + k]);
+		//		}
+		//	}
+		//	printf("\n");
+		//}
+		//for(int i = 0; i < MHEIGHT / 2; i++)
+		//{
+		//	printf("(%d) ", i);
+		//	for(int k = 0; k < MWIDTH / 2; k++)
+		//	{
+		//		if(gmat1.depth() == 0)
+		//		{
+		//			if(gmat1.channels() == 1)
+		//			{
+		//				printf("%d ", cpu_dst.data[i * MHEIGHT / 2 + k]);
+		//			}
+		//			else
+		//			{
+		//				printf("%d ", ((unsigned*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
+		//			}
+		//		}
+		//		else if(gmat1.depth() == 5)
+		//		{
+		//			printf("%.6f ", ((float*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
+		//		}
+		//	}
+		//	printf("\n");
+		//}
+}
+
+////////////////////////////////PyrDown/////////////////////////////////////////////////
+//struct PyrDown : ArithmTestBase {};
+
+TEST_P(PyrDown, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+
+		cv::pyrDown(mat1_roi, dst_roi);
+		cv::ocl::pyrDown(gmat1, gdst);
+
+        cv::Mat cpu_dst;
+        gdst.download(cpu_dst);
+        char s[1024];
+        sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
+
+		EXPECT_MAT_NEAR(dst_roi, cpu_dst, dst_roi.depth() == CV_32F ? 1e-5f : 1.0f, s);
+
+		Cleanup();
+    }
+}
+
+
+
+
+//********test****************
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
+                            Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                            Values(false))); // Values(false) is the reserved parameter
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/test/test_pyrup.cpp
new file mode 100644
index 0000000000..c6c5b9c10c
--- /dev/null
+++ b/modules/ocl/test/test_pyrup.cpp
@@ -0,0 +1,91 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Zhang Chunpeng chunpeng@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "opencv2/core/core.hpp"
+
+#ifdef HAVE_OPENCL
+
+
+PARAM_TEST_CASE(PyrUp,cv::Size,int)
+{
+	cv::Size size;
+	int type;
+	std::vector<cv::ocl::Info> oclinfo;
+
+	virtual void SetUp()
+	{
+		int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+		CV_Assert(devnums > 0);
+		size = GET_PARAM(0);
+		type = GET_PARAM(1);
+	}
+};
+
+TEST_P(PyrUp,Accuracy)
+{
+	cv::Mat src = randomMat(size,type);
+	
+
+	cv::Mat dst_gold;
+	cv::pyrUp(src,dst_gold);
+
+	cv::ocl::oclMat dst;
+	cv::ocl::oclMat srcMat(src);
+	cv::ocl::pyrUp(srcMat,dst);
+	char s[100]={0};
+
+	EXPECT_MAT_NEAR(dst_gold, dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);	
+	
+}
+
+#if 1
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
+    testing::Values(cv::Size(32, 32)),
+    testing::Values(MatType(CV_8UC1),MatType(CV_16UC1),MatType(CV_32FC1),MatType(CV_8UC4),
+	MatType(CV_16UC4),MatType(CV_32FC4))));
+#endif
+
+#endif // HAVE_OPENCL
\ No newline at end of file
diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp
index ad6555b08c..be759ec412 100644
--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@@ -45,6 +45,20 @@ static int failmsg(const char *fmt, ...)
     return 0;
 }
 
+struct ArgInfo
+{
+    const char * name;
+    bool outputarg;
+    // more fields may be added if necessary
+
+    ArgInfo(const char * name_, bool outputarg_) 
+        : name(name_)
+        , outputarg(outputarg_) {}
+
+    // to match with older pyopencv_to function signature
+    operator const char *() const { return name; }
+};
+
 class PyAllowThreads
 {
 public:
@@ -199,7 +213,8 @@ NumpyAllocator g_numpyAllocator;
 
 enum { ARG_NONE = 0, ARG_MAT = 1, ARG_SCALAR = 2 };
 
-static int pyopencv_to(const PyObject* o, Mat& m, const char* name = "<unknown>", bool allowND=true)
+// special case, when the convertor needs full ArgInfo structure
+static int pyopencv_to(const PyObject* o, Mat& m, const ArgInfo info, bool allowND=true)
 {
     if(!o || o == Py_None)
     {
@@ -210,7 +225,7 @@ static int pyopencv_to(const PyObject* o, Mat& m, const char* name = "<unknown>"
 
     if( !PyArray_Check(o) )
     {
-        failmsg("%s is not a numpy array", name);
+        failmsg("%s is not a numpy array", info.name);
         return false;
     }
 
@@ -223,14 +238,14 @@ static int pyopencv_to(const PyObject* o, Mat& m, const char* name = "<unknown>"
 
     if( type < 0 )
     {
-        failmsg("%s data type = %d is not supported", name, typenum);
+        failmsg("%s data type = %d is not supported", info.name, typenum);
         return false;
     }
 
     int ndims = PyArray_NDIM(o);
     if(ndims >= CV_MAX_DIM)
     {
-        failmsg("%s dimensionality (=%d) is too high", name, ndims);
+        failmsg("%s dimensionality (=%d) is too high", info.name, ndims);
         return false;
     }
 
@@ -238,7 +253,21 @@ static int pyopencv_to(const PyObject* o, Mat& m, const char* name = "<unknown>"
     size_t step[CV_MAX_DIM+1], elemsize = CV_ELEM_SIZE1(type);
     const npy_intp* _sizes = PyArray_DIMS(o);
     const npy_intp* _strides = PyArray_STRIDES(o);
-    bool transposed = false;
+    bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX;
+
+    bool needcopy = (_strides[ndims-1] != elemsize) 
+        || (ismultichannel && _strides[ndims-2] != elemsize*_sizes[ndims-1]);
+    
+    if (needcopy)
+    {
+        if (info.outputarg)
+        {
+            failmsg("output array %s is not row-contiguous (step[ndims-1] != elemsize)", info.name);
+            return false;
+        }
+        o = (PyObject*)PyArray_GETCONTIGUOUS((PyArrayObject*)o);
+        _strides = PyArray_STRIDES(o);
+    }
 
     for(int i = 0; i < ndims; i++)
     {
@@ -246,20 +275,14 @@ static int pyopencv_to(const PyObject* o, Mat& m, const char* name = "<unknown>"
         step[i] = (size_t)_strides[i];
     }
 
-    if( ndims == 0 || step[ndims-1] > elemsize ) {
+    // handle degenerate case
+    if( ndims == 0) {
         size[ndims] = 1;
         step[ndims] = elemsize;
         ndims++;
     }
 
-    if( ndims >= 2 && step[0] < step[1] )
-    {
-        std::swap(size[0], size[1]);
-        std::swap(step[0], step[1]);
-        transposed = true;
-    }
-
-    if( ndims == 3 && size[2] <= CV_CN_MAX && step[1] == elemsize*size[2] )
+    if( ismultichannel )
     {
         ndims--;
         type |= CV_MAKETYPE(0, size[2]);
@@ -267,7 +290,7 @@ static int pyopencv_to(const PyObject* o, Mat& m, const char* name = "<unknown>"
 
     if( ndims > 2 && !allowND )
     {
-        failmsg("%s has more than 2 dimensions", name);
+        failmsg("%s has more than 2 dimensions", info.name);
         return false;
     }
 
@@ -276,18 +299,14 @@ static int pyopencv_to(const PyObject* o, Mat& m, const char* name = "<unknown>"
     if( m.data )
     {
         m.refcount = refcountFromPyObject(o);
-        m.addref(); // protect the original numpy array from deallocation
-                    // (since Mat destructor will decrement the reference counter)
+        if (!needcopy)
+        {
+            m.addref(); // protect the original numpy array from deallocation
+                        // (since Mat destructor will decrement the reference counter)
+        }
     };
     m.allocator = &g_numpyAllocator;
 
-    if( transposed )
-    {
-        Mat tmp;
-        tmp.allocator = &g_numpyAllocator;
-        transpose(m, tmp);
-        m = tmp;
-    }
     return true;
 }
 
@@ -593,7 +612,7 @@ static inline PyObject* pyopencv_from(const Point2d& p)
 
 template<typename _Tp> struct pyopencvVecConverter
 {
-    static bool to(PyObject* obj, vector<_Tp>& value, const char* name="<unknown>")
+    static bool to(PyObject* obj, vector<_Tp>& value, const ArgInfo info)
     {
         typedef typename DataType<_Tp>::channel_type _Cp;
         if(!obj || obj == Py_None)
@@ -601,12 +620,12 @@ template<typename _Tp> struct pyopencvVecConverter
         if (PyArray_Check(obj))
         {
             Mat m;
-            pyopencv_to(obj, m, name);
+            pyopencv_to(obj, m, info);
             m.copyTo(value);
         }
         if (!PySequence_Check(obj))
             return false;
-        PyObject *seq = PySequence_Fast(obj, name);
+        PyObject *seq = PySequence_Fast(obj, info.name);
         if (seq == NULL)
             return false;
         int i, j, n = (int)PySequence_Fast_GET_SIZE(seq);
@@ -635,7 +654,7 @@ template<typename _Tp> struct pyopencvVecConverter
                 if( PyArray_Check(item))
                 {
                     Mat src;
-                    pyopencv_to(item, src, name);
+                    pyopencv_to(item, src, info);
                     if( src.dims != 2 || src.channels() != 1 ||
                        ((src.cols != 1 || src.rows != channels) &&
                         (src.cols != channels || src.rows != 1)))
@@ -647,7 +666,7 @@ template<typename _Tp> struct pyopencvVecConverter
                     continue;
                 }
 
-                seq_i = PySequence_Fast(item, name);
+                seq_i = PySequence_Fast(item, info.name);
                 if( !seq_i || (int)PySequence_Fast_GET_SIZE(seq_i) != channels )
                 {
                     Py_XDECREF(seq_i);
@@ -694,9 +713,9 @@ template<typename _Tp> struct pyopencvVecConverter
 };
 
 
-template<typename _Tp> static inline bool pyopencv_to(PyObject* obj, vector<_Tp>& value, const char* name="<unknown>")
+template<typename _Tp> static inline bool pyopencv_to(PyObject* obj, vector<_Tp>& value, const ArgInfo info)
 {
-    return pyopencvVecConverter<_Tp>::to(obj, value, name);
+    return pyopencvVecConverter<_Tp>::to(obj, value, info);
 }
 
 template<typename _Tp> static inline PyObject* pyopencv_from(const vector<_Tp>& value)
@@ -707,13 +726,13 @@ template<typename _Tp> static inline PyObject* pyopencv_from(const vector<_Tp>&
 static PyObject* pyopencv_from(const KeyPoint&);
 static PyObject* pyopencv_from(const DMatch&);
 
-template<typename _Tp> static inline bool pyopencv_to_generic_vec(PyObject* obj, vector<_Tp>& value, const char* name="<unknown>")
+template<typename _Tp> static inline bool pyopencv_to_generic_vec(PyObject* obj, vector<_Tp>& value, const ArgInfo info)
 {
     if(!obj || obj == Py_None)
        return true;
     if (!PySequence_Check(obj))
         return false;
-    PyObject *seq = PySequence_Fast(obj, name);
+    PyObject *seq = PySequence_Fast(obj, info.name);
     if (seq == NULL)
         return false;
     int i, n = (int)PySequence_Fast_GET_SIZE(seq);
@@ -724,7 +743,7 @@ template<typename _Tp> static inline bool pyopencv_to_generic_vec(PyObject* obj,
     for( i = 0; i < n; i++ )
     {
         PyObject* item = items[i];
-        if(!pyopencv_to(item, value[i], name))
+        if(!pyopencv_to(item, value[i], info))
             break;
     }
     Py_DECREF(seq);
@@ -766,9 +785,9 @@ template<typename _Tp> struct pyopencvVecConverter<vector<_Tp> >
 
 template<> struct pyopencvVecConverter<Mat>
 {
-    static bool to(PyObject* obj, vector<Mat>& value, const char* name="<unknown>")
+    static bool to(PyObject* obj, vector<Mat>& value, const ArgInfo info)
     {
-        return pyopencv_to_generic_vec(obj, value, name);
+        return pyopencv_to_generic_vec(obj, value, info);
     }
 
     static PyObject* from(const vector<Mat>& value)
@@ -779,9 +798,9 @@ template<> struct pyopencvVecConverter<Mat>
 
 template<> struct pyopencvVecConverter<KeyPoint>
 {
-    static bool to(PyObject* obj, vector<KeyPoint>& value, const char* name="<unknown>")
+    static bool to(PyObject* obj, vector<KeyPoint>& value, const ArgInfo info)
     {
-        return pyopencv_to_generic_vec(obj, value, name);
+        return pyopencv_to_generic_vec(obj, value, info);
     }
 
     static PyObject* from(const vector<KeyPoint>& value)
@@ -792,9 +811,9 @@ template<> struct pyopencvVecConverter<KeyPoint>
 
 template<> struct pyopencvVecConverter<DMatch>
 {
-    static bool to(PyObject* obj, vector<DMatch>& value, const char* name="<unknown>")
+    static bool to(PyObject* obj, vector<DMatch>& value, const ArgInfo info)
     {
-        return pyopencv_to_generic_vec(obj, value, name);
+        return pyopencv_to_generic_vec(obj, value, info);
     }
 
     static PyObject* from(const vector<DMatch>& value)
@@ -805,9 +824,9 @@ template<> struct pyopencvVecConverter<DMatch>
 
 template<> struct pyopencvVecConverter<string>
 {
-    static bool to(PyObject* obj, vector<string>& value, const char* name="<unknown>")
+    static bool to(PyObject* obj, vector<string>& value, const ArgInfo info)
     {
-        return pyopencv_to_generic_vec(obj, value, name);
+        return pyopencv_to_generic_vec(obj, value, info);
     }
 
     static PyObject* from(const vector<string>& value)
diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py
index c7ae0663ac..b239f085bc 100644
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@@ -329,6 +329,9 @@ class ArgInfo(object):
     def isbig(self):
         return self.tp == "Mat" or self.tp == "vector_Mat"# or self.tp.startswith("vector")
 
+    def crepr(self):
+        return "ArgInfo(\"%s\", %d)" % (self.name, self.outputarg)
+
 
 class FuncVariant(object):
     def __init__(self, classname, name, decl, isconstructor):
@@ -561,7 +564,7 @@ class FuncInfo(object):
                     if amapping[1] == "O":
                         code_decl += "    PyObject* pyobj_%s = NULL;\n" % (a.name,)
                         parse_name = "pyobj_" + a.name
-                        code_cvt_list.append("pyopencv_to(pyobj_%s, %s)" % (a.name, a.name))
+                        code_cvt_list.append("pyopencv_to(pyobj_%s, %s, %s)" % (a.name, a.name, a.crepr()))
 
                 all_cargs.append([amapping, parse_name])
 
diff --git a/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp b/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp
index bab29a09e9..c700a169c0 100644
--- a/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp
@@ -43,6 +43,7 @@
 #ifndef __OPENCV_STITCHING_SEAM_FINDERS_HPP__
 #define __OPENCV_STITCHING_SEAM_FINDERS_HPP__
 
+#include <set>
 #include "opencv2/core/core.hpp"
 #include "opencv2/opencv_modules.hpp"
 
@@ -92,6 +93,115 @@ private:
 };
 
 
+class CV_EXPORTS DpSeamFinder : public SeamFinder
+{
+public:
+    enum CostFunction { COLOR, COLOR_GRAD };
+
+    DpSeamFinder(CostFunction costFunc = COLOR);
+
+    CostFunction costFunction() const { return costFunc_; }
+    void setCostFunction(CostFunction val) { costFunc_ = val; }
+
+    virtual void find(const std::vector<Mat> &src, const std::vector<Point> &corners,
+                      std::vector<Mat> &masks);
+
+private:
+    enum ComponentState
+    {
+        FIRST = 1, SECOND = 2, INTERS = 4,
+        INTERS_FIRST = INTERS | FIRST,
+        INTERS_SECOND = INTERS | SECOND
+    };
+
+    class ImagePairLess
+    {
+    public:
+        ImagePairLess(const std::vector<Mat> &images, const std::vector<Point> &corners)
+            : src_(&images[0]), corners_(&corners[0]) {}
+
+        bool operator() (const std::pair<int, int> &l, const std::pair<int, int> &r) const
+        {
+            Point c1 = corners_[l.first] + Point(src_[l.first].cols / 2, src_[l.first].rows / 2);
+            Point c2 = corners_[l.second] + Point(src_[l.second].cols / 2, src_[l.second].rows / 2);
+            int d1 = (c1 - c2).dot(c1 - c2);
+
+            c1 = corners_[r.first] + Point(src_[r.first].cols / 2, src_[r.first].rows / 2);
+            c2 = corners_[r.second] + Point(src_[r.second].cols / 2, src_[r.second].rows / 2);
+            int d2 = (c1 - c2).dot(c1 - c2);
+
+            return d1 < d2;
+        }
+
+    private:
+        const Mat *src_;
+        const Point *corners_;
+    };
+
+    class ClosePoints
+    {
+    public:
+        ClosePoints(int minDist) : minDist_(minDist) {}
+
+        bool operator() (const Point &p1, const Point &p2) const
+        {
+            int dist2 = (p1.x-p2.x) * (p1.x-p2.x) + (p1.y-p2.y) * (p1.y-p2.y);
+            return dist2 < minDist_ * minDist_;
+        }
+
+    private:
+        int minDist_;
+    };
+
+    void process(
+            const Mat &image1, const Mat &image2, Point tl1, Point tl2, Mat &mask1, Mat &mask2);
+
+    void findComponents();
+
+    void findEdges();
+
+    void resolveConflicts(
+            const Mat &image1, const Mat &image2, Point tl1, Point tl2, Mat &mask1, Mat &mask2);
+
+    void computeGradients(const Mat &image1, const Mat &image2);
+
+    bool hasOnlyOneNeighbor(int comp);
+
+    bool closeToContour(int y, int x, const Mat_<uchar> &contourMask);
+
+    bool getSeamTips(int comp1, int comp2, Point &p1, Point &p2);
+
+    void computeCosts(
+            const Mat &image1, const Mat &image2, Point tl1, Point tl2,
+            int comp, Mat_<float> &costV, Mat_<float> &costH);
+
+    bool estimateSeam(
+            const Mat &image1, const Mat &image2, Point tl1, Point tl2, int comp,
+            Point p1, Point p2, std::vector<Point> &seam, bool &isHorizontal);
+
+    void updateLabelsUsingSeam(
+            int comp1, int comp2, const std::vector<Point> &seam, bool isHorizontalSeam);
+
+    CostFunction costFunc_;
+
+    // processing images pair data
+    Point unionTl_, unionBr_;
+    Size unionSize_;
+    Mat_<uchar> mask1_, mask2_;
+    Mat_<uchar> contour1mask_, contour2mask_;
+    Mat_<float> gradx1_, grady1_;
+    Mat_<float> gradx2_, grady2_;
+
+    // components data
+    int ncomps_;
+    Mat_<int> labels_;
+    std::vector<ComponentState> states_;
+    std::vector<Point> tls_, brs_;
+    std::vector<std::vector<Point> > contours_;
+    std::set<std::pair<int, int> > edges_;
+};
+
+
 class CV_EXPORTS GraphCutSeamFinderBase
 {
 public:
diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp
index edfc9c3e4b..d1804f4e3b 100644
--- a/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp
@@ -1,764 +1,765 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_STITCHING_WARPERS_INL_HPP__
-#define __OPENCV_STITCHING_WARPERS_INL_HPP__
-
-#include "opencv2/core/core.hpp"
-#include "warpers.hpp" // Make your IDE see declarations
-
-namespace cv {
-namespace detail {
-
-template <class P>
-Point2f RotationWarperBase<P>::warpPoint(const Point2f &pt, const Mat &K, const Mat &R)
-{
-    projector_.setCameraParams(K, R);
-    Point2f uv;
-    projector_.mapForward(pt.x, pt.y, uv.x, uv.y);
-    return uv;
-}
-
-
-template <class P>
-Rect RotationWarperBase<P>::buildMaps(Size src_size, const Mat &K, const Mat &R, Mat &xmap, Mat &ymap)
-{
-    projector_.setCameraParams(K, R);
-
-    Point dst_tl, dst_br;
-    detectResultRoi(src_size, dst_tl, dst_br);
-
-    xmap.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, CV_32F);
-    ymap.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, CV_32F);
-
-    float x, y;
-    for (int v = dst_tl.y; v <= dst_br.y; ++v)
-    {
-        for (int u = dst_tl.x; u <= dst_br.x; ++u)
-        {
-            projector_.mapBackward(static_cast<float>(u), static_cast<float>(v), x, y);
-            xmap.at<float>(v - dst_tl.y, u - dst_tl.x) = x;
-            ymap.at<float>(v - dst_tl.y, u - dst_tl.x) = y;
-        }
-    }
-
-    return Rect(dst_tl, dst_br);
-}
-
-
-template <class P>
-Point RotationWarperBase<P>::warp(const Mat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-                                  Mat &dst)
-{    
-    Mat xmap, ymap;
-    Rect dst_roi = buildMaps(src.size(), K, R, xmap, ymap);    
-
-    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
-    remap(src, dst, xmap, ymap, interp_mode, border_mode);
-
-    return dst_roi.tl();
-}
-
-
-template <class P>
-void RotationWarperBase<P>::warpBackward(const Mat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-                                         Size dst_size, Mat &dst)
-{
-    projector_.setCameraParams(K, R);
-
-    Point src_tl, src_br;
-    detectResultRoi(dst_size, src_tl, src_br);
-    CV_Assert(src_br.x - src_tl.x + 1 == src.cols && src_br.y - src_tl.y + 1 == src.rows);
-
-    Mat xmap(dst_size, CV_32F);
-    Mat ymap(dst_size, CV_32F);
-
-    float u, v;
-    for (int y = 0; y < dst_size.height; ++y)
-    {
-        for (int x = 0; x < dst_size.width; ++x)
-        {
-            projector_.mapForward(static_cast<float>(x), static_cast<float>(y), u, v);
-            xmap.at<float>(y, x) = u - src_tl.x;
-            ymap.at<float>(y, x) = v - src_tl.y;
-        }
-    }
-
-    dst.create(dst_size, src.type());
-    remap(src, dst, xmap, ymap, interp_mode, border_mode);
-}
-
-
-template <class P>
-Rect RotationWarperBase<P>::warpRoi(Size src_size, const Mat &K, const Mat &R)
-{
-    projector_.setCameraParams(K, R);
-
-    Point dst_tl, dst_br;
-    detectResultRoi(src_size, dst_tl, dst_br);
-
-    return Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1));
-}
-
-
-template <class P>
-void RotationWarperBase<P>::detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br)
-{
-    float tl_uf = std::numeric_limits<float>::max();
-    float tl_vf = std::numeric_limits<float>::max();
-    float br_uf = -std::numeric_limits<float>::max();
-    float br_vf = -std::numeric_limits<float>::max();
-
-    float u, v;
-    for (int y = 0; y < src_size.height; ++y)
-    {
-        for (int x = 0; x < src_size.width; ++x)
-        {
-            projector_.mapForward(static_cast<float>(x), static_cast<float>(y), u, v);
-            tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-            br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
-        }
-    }
-
-    dst_tl.x = static_cast<int>(tl_uf);
-    dst_tl.y = static_cast<int>(tl_vf);
-    dst_br.x = static_cast<int>(br_uf);
-    dst_br.y = static_cast<int>(br_vf);
-}
-
-
-template <class P>
-void RotationWarperBase<P>::detectResultRoiByBorder(Size src_size, Point &dst_tl, Point &dst_br)
-{
-    float tl_uf = std::numeric_limits<float>::max();
-    float tl_vf = std::numeric_limits<float>::max();
-    float br_uf = -std::numeric_limits<float>::max();
-    float br_vf = -std::numeric_limits<float>::max();
-
-    float u, v;
-    for (float x = 0; x < src_size.width; ++x)
-    {
-        projector_.mapForward(static_cast<float>(x), 0, u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
-
-        projector_.mapForward(static_cast<float>(x), static_cast<float>(src_size.height - 1), u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
-    }
-    for (int y = 0; y < src_size.height; ++y)
-    {
-        projector_.mapForward(0, static_cast<float>(y), u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
-
-        projector_.mapForward(static_cast<float>(src_size.width - 1), static_cast<float>(y), u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
-    }
-
-    dst_tl.x = static_cast<int>(tl_uf);
-    dst_tl.y = static_cast<int>(tl_vf);
-    dst_br.x = static_cast<int>(br_uf);
-    dst_br.y = static_cast<int>(br_vf);
-}
-
-
-inline
-void PlaneProjector::mapForward(float x, float y, float &u, float &v)
-{
-    float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    x_ = t[0] + x_ / z_ * (1 - t[2]);
-    y_ = t[1] + y_ / z_ * (1 - t[2]);
-
-    u = scale * x_;
-    v = scale * y_;
-}
-
-
-inline
-void PlaneProjector::mapBackward(float u, float v, float &x, float &y)
-{
-    u = u / scale - t[0];
-    v = v / scale - t[1];
-
-    float z;
-    x = k_rinv[0] * u + k_rinv[1] * v + k_rinv[2] * (1 - t[2]);
-    y = k_rinv[3] * u + k_rinv[4] * v + k_rinv[5] * (1 - t[2]);
-    z = k_rinv[6] * u + k_rinv[7] * v + k_rinv[8] * (1 - t[2]);
-
-    x /= z;
-    y /= z;
-}
-
-
-inline
-void SphericalProjector::mapForward(float x, float y, float &u, float &v)
-{    
-    float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    u = scale * atan2f(x_, z_);
-    v = scale * (static_cast<float>(CV_PI) - acosf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_)));
-}
-
-
-inline
-void SphericalProjector::mapBackward(float u, float v, float &x, float &y)
-{
-    u /= scale;
-    v /= scale;
-
-    float sinv = sinf(static_cast<float>(CV_PI) - v);
-    float x_ = sinv * sinf(u);
-    float y_ = cosf(static_cast<float>(CV_PI) - v);
-    float z_ = sinv * cosf(u);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-
-inline
-void CylindricalProjector::mapForward(float x, float y, float &u, float &v)
-{
-    float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    u = scale * atan2f(x_, z_);
-    v = scale * y_ / sqrtf(x_ * x_ + z_ * z_);
-}
-
-
-inline
-void CylindricalProjector::mapBackward(float u, float v, float &x, float &y)
-{
-    u /= scale;
-    v /= scale;
-
-    float x_ = sinf(u);
-    float y_ = v;
-    float z_ = cosf(u);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void FisheyeProjector::mapForward(float x, float y, float &u, float &v)
-{
-	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-	float u_ = atan2f(x_, z_);
-    float v_ = (float)CV_PI - acosf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-
-	u = scale * v_ * cosf(u_);
-	v = scale * v_ * sinf(u_);
-}
-
-inline
-void FisheyeProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= scale;
-    v /= scale;
-
-	float u_ = atan2f(v, u);
-	float v_ = sqrtf(u*u + v*v);
-
-	float sinv = sinf((float)CV_PI - v_);
-    float x_ = sinv * sinf(u_);
-    float y_ = cosf((float)CV_PI - v_);
-    float z_ = sinv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-	if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void StereographicProjector::mapForward(float x, float y, float &u, float &v)
-{
-	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-	float u_ = atan2f(x_, z_);
-    float v_ = (float)CV_PI - acosf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-
-	float r = sinf(v_) / (1 - cosf(v_));
-	
-	u = scale * r * cos(u_);
-	v = scale * r * sin(u_);
-}
-
-inline
-void StereographicProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= scale;
-    v /= scale;
-
-	float u_ = atan2f(v, u);
-	float r = sqrtf(u*u + v*v);
-	float v_ = 2 * atanf(1.f / r);
-
-	float sinv = sinf((float)CV_PI - v_);
-    float x_ = sinv * sinf(u_);
-    float y_ = cosf((float)CV_PI - v_);
-    float z_ = sinv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-	if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void CompressedRectilinearProjector::mapForward(float x, float y, float &u, float &v)
-{    
-	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float u_ = atan2f(x_, z_);
-    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-
-	u = scale * a * tanf(u_ / a);
-	v = scale * b * tanf(v_) / cosf(u_);
-}
-
-inline
-void CompressedRectilinearProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= scale;
-    v /= scale;
-
-	float aatg = a * atanf(u / a);
-	float u_ = aatg;
-	float v_ = atanf(v * cosf(aatg) / b);
-
-    float cosv = cosf(v_);
-    float x_ = cosv * sinf(u_);
-    float y_ = sinf(v_);
-    float z_ = cosv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void CompressedRectilinearPortraitProjector::mapForward(float x, float y, float &u, float &v)
-{    
-	float y_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float x_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float u_ = atan2f(x_, z_);
-    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-
-	u = - scale * a * tanf(u_ / a);
-	v = scale * b * tanf(v_) / cosf(u_);
-}
-
-inline
-void CompressedRectilinearPortraitProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= - scale;
-    v /= scale;
-
-	float aatg = a * atanf(u / a);
-	float u_ = aatg;
-	float v_ = atanf(v * cosf( aatg ) / b);
-
-    float cosv = cosf(v_);
-    float y_ = cosv * sinf(u_);
-    float x_ = sinf(v_);
-    float z_ = cosv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void PaniniProjector::mapForward(float x, float y, float &u, float &v)
-{    
-	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float u_ = atan2f(x_, z_);
-    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-	
-	float tg = a * tanf(u_ / a);
-	u = scale * tg;
-	
-	float sinu = sinf(u_);
-	if ( fabs(sinu) < 1E-7 )
-		v = scale * b * tanf(v_);
-	else
-		v = scale * b * tg * tanf(v_) / sinu;
-}
-
-inline
-void PaniniProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= scale;
-    v /= scale;
-
-	float lamda = a * atanf(u / a);
-	float u_ = lamda;
-	
-	float v_;
-	if ( fabs(lamda) > 1E-7)
-		v_ = atanf(v * sinf(lamda) / (b * a * tanf(lamda / a)));
-	else 
-		v_ = atanf(v / b);
-
-    float cosv = cosf(v_);
-    float x_ = cosv * sinf(u_);
-    float y_ = sinf(v_);
-    float z_ = cosv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void PaniniPortraitProjector::mapForward(float x, float y, float &u, float &v)
-{    
-	float y_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float x_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float u_ = atan2f(x_, z_);
-    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-	
-	float tg = a * tanf(u_ / a);
-	u = - scale * tg;
-	
-	float sinu = sinf( u_ );
-	if ( fabs(sinu) < 1E-7 )
-		v = scale * b * tanf(v_);
-	else
-		v = scale * b * tg * tanf(v_) / sinu;
-}
-
-inline
-void PaniniPortraitProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= - scale;
-    v /= scale;
-
-	float lamda = a * atanf(u / a);
-	float u_ = lamda;
-	
-	float v_;
-	if ( fabs(lamda) > 1E-7)
-		v_ = atanf(v * sinf(lamda) / (b * a * tanf(lamda/a)));
-	else 
-		v_ = atanf(v / b);
-
-    float cosv = cosf(v_);
-    float y_ = cosv * sinf(u_);
-    float x_ = sinf(v_);
-    float z_ = cosv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void MercatorProjector::mapForward(float x, float y, float &u, float &v)
-{    
-	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float u_ = atan2f(x_, z_);
-    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-	
-	u = scale * u_;
-	v = scale * logf( tanf( (float)(CV_PI/4) + v_/2 ) );
-}
-
-inline
-void MercatorProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= scale;
-    v /= scale;
-
-	float v_ = atanf( sinhf(v) );
-	float u_ = u;
-
-    float cosv = cosf(v_);
-    float x_ = cosv * sinf(u_);
-    float y_ = sinf(v_);
-    float z_ = cosv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void TransverseMercatorProjector::mapForward(float x, float y, float &u, float &v)
-{    
-	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float u_ = atan2f(x_, z_);
-    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
-	
-	float B = cosf(v_) * sinf(u_);
-	
-	u = scale / 2 * logf( (1+B) / (1-B) );
-	v = scale * atan2f(tanf(v_), cosf(u_));
-}
-
-inline
-void TransverseMercatorProjector::mapBackward(float u, float v, float &x, float &y)
-{
-	u /= scale;
-    v /= scale;
-
-	float v_ = asinf( sinf(v) / coshf(u) );
-	float u_ = atan2f( sinhf(u), cos(v) );
-
-    float cosv = cosf(v_);
-    float x_ = cosv * sinf(u_);
-    float y_ = sinf(v_);
-    float z_ = cosv * cosf(u_);
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void SphericalPortraitProjector::mapForward(float x, float y, float &u0, float &v0)
-{    
-    float x0_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y0_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float x_ = y0_;
-    float y_ = x0_;
-    float u, v;
-
-    u = scale * atan2f(x_, z_);
-    v = scale * (static_cast<float>(CV_PI) - acosf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_)));
-
-    u0 = -u;//v;
-    v0 = v;//u;
-}
-
-
-inline
-void SphericalPortraitProjector::mapBackward(float u0, float v0, float &x, float &y)
-{
-    float u, v;
-    u = -u0;//v0;
-    v = v0;//u0;
-
-    u /= scale;
-    v /= scale;
-
-    float sinv = sinf(static_cast<float>(CV_PI) - v);
-    float x0_ = sinv * sinf(u);
-    float y0_ = cosf(static_cast<float>(CV_PI) - v);
-    float z_ = sinv * cosf(u);
-
-    float x_ = y0_;
-    float y_ = x0_;
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void CylindricalPortraitProjector::mapForward(float x, float y, float &u0, float &v0)
-{    
-    float x0_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y0_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_  = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float x_ = y0_;
-    float y_ = x0_;
-    float u, v;
-
-    u = scale * atan2f(x_, z_);
-    v = scale * y_ / sqrtf(x_ * x_ + z_ * z_);
-
-    u0 = -u;//v;
-    v0 = v;//u;
-}
-
-
-inline
-void CylindricalPortraitProjector::mapBackward(float u0, float v0, float &x, float &y)
-{
-    float u, v;
-    u = -u0;//v0;
-    v = v0;//u0;
-
-    u /= scale;
-    v /= scale;
-
-    float x0_ = sinf(u);
-    float y0_ = v;
-    float z_  = cosf(u);
-
-    float x_ = y0_;
-    float y_ = x0_;
-
-    float z;
-    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
-    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
-    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
-
-    if (z > 0) { x /= z; y /= z; }
-    else x = y = -1;
-}
-
-inline
-void PlanePortraitProjector::mapForward(float x, float y, float &u0, float &v0)
-{
-    float x0_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
-    float y0_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
-    float z_  = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
-
-    float x_ = y0_;
-    float y_ = x0_;
-
-    x_ = t[0] + x_ / z_ * (1 - t[2]);
-    y_ = t[1] + y_ / z_ * (1 - t[2]);
-
-    float u,v;
-    u = scale * x_;
-    v = scale * y_;
-
-    u0 = -u;
-    v0 = v;
-}
-
-
-inline
-void PlanePortraitProjector::mapBackward(float u0, float v0, float &x, float &y)
-{
-    float u, v;
-    u = -u0;
-    v = v0;
-
-    u = u / scale - t[0];
-    v = v / scale - t[1];
-
-    float z;
-    x = k_rinv[0] * v + k_rinv[1] * u + k_rinv[2] * (1 - t[2]);
-    y = k_rinv[3] * v + k_rinv[4] * u + k_rinv[5] * (1 - t[2]);
-    z = k_rinv[6] * v + k_rinv[7] * u + k_rinv[8] * (1 - t[2]);
-
-    x /= z;
-    y /= z;
-}
-
-
-} // namespace detail
-} // namespace cv
-
-#endif // __OPENCV_STITCHING_WARPERS_INL_HPP__
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_STITCHING_WARPERS_INL_HPP__
+#define __OPENCV_STITCHING_WARPERS_INL_HPP__
+
+#include "opencv2/core/core.hpp"
+#include "warpers.hpp" // Make your IDE see declarations
+
+namespace cv {
+namespace detail {
+
+template <class P>
+Point2f RotationWarperBase<P>::warpPoint(const Point2f &pt, const Mat &K, const Mat &R)
+{
+    projector_.setCameraParams(K, R);
+    Point2f uv;
+    projector_.mapForward(pt.x, pt.y, uv.x, uv.y);
+    return uv;
+}
+
+
+template <class P>
+Rect RotationWarperBase<P>::buildMaps(Size src_size, const Mat &K, const Mat &R, Mat &xmap, Mat &ymap)
+{
+    projector_.setCameraParams(K, R);
+
+    Point dst_tl, dst_br;
+    detectResultRoi(src_size, dst_tl, dst_br);
+
+    xmap.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, CV_32F);
+    ymap.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, CV_32F);
+
+    float x, y;
+    for (int v = dst_tl.y; v <= dst_br.y; ++v)
+    {
+        for (int u = dst_tl.x; u <= dst_br.x; ++u)
+        {
+            projector_.mapBackward(static_cast<float>(u), static_cast<float>(v), x, y);
+            xmap.at<float>(v - dst_tl.y, u - dst_tl.x) = x;
+            ymap.at<float>(v - dst_tl.y, u - dst_tl.x) = y;
+        }
+    }
+
+    return Rect(dst_tl, dst_br);
+}
+
+
+template <class P>
+Point RotationWarperBase<P>::warp(const Mat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+                                  Mat &dst)
+{
+    Mat xmap, ymap;
+    Rect dst_roi = buildMaps(src.size(), K, R, xmap, ymap);
+
+    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
+    remap(src, dst, xmap, ymap, interp_mode, border_mode);
+
+    return dst_roi.tl();
+}
+
+
+template <class P>
+void RotationWarperBase<P>::warpBackward(const Mat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+                                         Size dst_size, Mat &dst)
+{
+    projector_.setCameraParams(K, R);
+
+    Point src_tl, src_br;
+    detectResultRoi(dst_size, src_tl, src_br);
+    CV_Assert(src_br.x - src_tl.x + 1 == src.cols && src_br.y - src_tl.y + 1 == src.rows);
+
+    Mat xmap(dst_size, CV_32F);
+    Mat ymap(dst_size, CV_32F);
+
+    float u, v;
+    for (int y = 0; y < dst_size.height; ++y)
+    {
+        for (int x = 0; x < dst_size.width; ++x)
+        {
+            projector_.mapForward(static_cast<float>(x), static_cast<float>(y), u, v);
+            xmap.at<float>(y, x) = u - src_tl.x;
+            ymap.at<float>(y, x) = v - src_tl.y;
+        }
+    }
+
+    dst.create(dst_size, src.type());
+    remap(src, dst, xmap, ymap, interp_mode, border_mode);
+}
+
+
+template <class P>
+Rect RotationWarperBase<P>::warpRoi(Size src_size, const Mat &K, const Mat &R)
+{
+    projector_.setCameraParams(K, R);
+
+    Point dst_tl, dst_br;
+    detectResultRoi(src_size, dst_tl, dst_br);
+
+    return Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1));
+}
+
+
+template <class P>
+void RotationWarperBase<P>::detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br)
+{
+    float tl_uf = std::numeric_limits<float>::max();
+    float tl_vf = std::numeric_limits<float>::max();
+    float br_uf = -std::numeric_limits<float>::max();
+    float br_vf = -std::numeric_limits<float>::max();
+
+    float u, v;
+    for (int y = 0; y < src_size.height; ++y)
+    {
+        for (int x = 0; x < src_size.width; ++x)
+        {
+            projector_.mapForward(static_cast<float>(x), static_cast<float>(y), u, v);
+            tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
+            br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+        }
+    }
+
+    dst_tl.x = static_cast<int>(tl_uf);
+    dst_tl.y = static_cast<int>(tl_vf);
+    dst_br.x = static_cast<int>(br_uf);
+    dst_br.y = static_cast<int>(br_vf);
+}
+
+
+template <class P>
+void RotationWarperBase<P>::detectResultRoiByBorder(Size src_size, Point &dst_tl, Point &dst_br)
+{
+    float tl_uf = std::numeric_limits<float>::max();
+    float tl_vf = std::numeric_limits<float>::max();
+    float br_uf = -std::numeric_limits<float>::max();
+    float br_vf = -std::numeric_limits<float>::max();
+
+    float u, v;
+    for (float x = 0; x < src_size.width; ++x)
+    {
+        projector_.mapForward(static_cast<float>(x), 0, u, v);
+        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
+        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+
+        projector_.mapForward(static_cast<float>(x), static_cast<float>(src_size.height - 1), u, v);
+        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
+        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+    }
+    for (int y = 0; y < src_size.height; ++y)
+    {
+        projector_.mapForward(0, static_cast<float>(y), u, v);
+        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
+        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+
+        projector_.mapForward(static_cast<float>(src_size.width - 1), static_cast<float>(y), u, v);
+        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
+        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+    }
+
+    dst_tl.x = static_cast<int>(tl_uf);
+    dst_tl.y = static_cast<int>(tl_vf);
+    dst_br.x = static_cast<int>(br_uf);
+    dst_br.y = static_cast<int>(br_vf);
+}
+
+
+inline
+void PlaneProjector::mapForward(float x, float y, float &u, float &v)
+{
+    float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    x_ = t[0] + x_ / z_ * (1 - t[2]);
+    y_ = t[1] + y_ / z_ * (1 - t[2]);
+
+    u = scale * x_;
+    v = scale * y_;
+}
+
+
+inline
+void PlaneProjector::mapBackward(float u, float v, float &x, float &y)
+{
+    u = u / scale - t[0];
+    v = v / scale - t[1];
+
+    float z;
+    x = k_rinv[0] * u + k_rinv[1] * v + k_rinv[2] * (1 - t[2]);
+    y = k_rinv[3] * u + k_rinv[4] * v + k_rinv[5] * (1 - t[2]);
+    z = k_rinv[6] * u + k_rinv[7] * v + k_rinv[8] * (1 - t[2]);
+
+    x /= z;
+    y /= z;
+}
+
+
+inline
+void SphericalProjector::mapForward(float x, float y, float &u, float &v)
+{
+    float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    u = scale * atan2f(x_, z_);
+    float w = y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_);
+    v = scale * (static_cast<float>(CV_PI) - acosf(w == w ? w : 0));
+}
+
+
+inline
+void SphericalProjector::mapBackward(float u, float v, float &x, float &y)
+{
+    u /= scale;
+    v /= scale;
+
+    float sinv = sinf(static_cast<float>(CV_PI) - v);
+    float x_ = sinv * sinf(u);
+    float y_ = cosf(static_cast<float>(CV_PI) - v);
+    float z_ = sinv * cosf(u);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+
+inline
+void CylindricalProjector::mapForward(float x, float y, float &u, float &v)
+{
+    float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    u = scale * atan2f(x_, z_);
+    v = scale * y_ / sqrtf(x_ * x_ + z_ * z_);
+}
+
+
+inline
+void CylindricalProjector::mapBackward(float u, float v, float &x, float &y)
+{
+    u /= scale;
+    v /= scale;
+
+    float x_ = sinf(u);
+    float y_ = v;
+    float z_ = cosf(u);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void FisheyeProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+	float u_ = atan2f(x_, z_);
+    float v_ = (float)CV_PI - acosf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	u = scale * v_ * cosf(u_);
+	v = scale * v_ * sinf(u_);
+}
+
+inline
+void FisheyeProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= scale;
+    v /= scale;
+
+	float u_ = atan2f(v, u);
+	float v_ = sqrtf(u*u + v*v);
+
+	float sinv = sinf((float)CV_PI - v_);
+    float x_ = sinv * sinf(u_);
+    float y_ = cosf((float)CV_PI - v_);
+    float z_ = sinv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+	if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void StereographicProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+	float u_ = atan2f(x_, z_);
+    float v_ = (float)CV_PI - acosf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	float r = sinf(v_) / (1 - cosf(v_));
+
+	u = scale * r * cos(u_);
+	v = scale * r * sin(u_);
+}
+
+inline
+void StereographicProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= scale;
+    v /= scale;
+
+	float u_ = atan2f(v, u);
+	float r = sqrtf(u*u + v*v);
+	float v_ = 2 * atanf(1.f / r);
+
+	float sinv = sinf((float)CV_PI - v_);
+    float x_ = sinv * sinf(u_);
+    float y_ = cosf((float)CV_PI - v_);
+    float z_ = sinv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+	if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void CompressedRectilinearProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float u_ = atan2f(x_, z_);
+    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	u = scale * a * tanf(u_ / a);
+	v = scale * b * tanf(v_) / cosf(u_);
+}
+
+inline
+void CompressedRectilinearProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= scale;
+    v /= scale;
+
+	float aatg = a * atanf(u / a);
+	float u_ = aatg;
+	float v_ = atanf(v * cosf(aatg) / b);
+
+    float cosv = cosf(v_);
+    float x_ = cosv * sinf(u_);
+    float y_ = sinf(v_);
+    float z_ = cosv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void CompressedRectilinearPortraitProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float y_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float x_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float u_ = atan2f(x_, z_);
+    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	u = - scale * a * tanf(u_ / a);
+	v = scale * b * tanf(v_) / cosf(u_);
+}
+
+inline
+void CompressedRectilinearPortraitProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= - scale;
+    v /= scale;
+
+	float aatg = a * atanf(u / a);
+	float u_ = aatg;
+	float v_ = atanf(v * cosf( aatg ) / b);
+
+    float cosv = cosf(v_);
+    float y_ = cosv * sinf(u_);
+    float x_ = sinf(v_);
+    float z_ = cosv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void PaniniProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float u_ = atan2f(x_, z_);
+    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	float tg = a * tanf(u_ / a);
+	u = scale * tg;
+
+	float sinu = sinf(u_);
+	if ( fabs(sinu) < 1E-7 )
+		v = scale * b * tanf(v_);
+	else
+		v = scale * b * tg * tanf(v_) / sinu;
+}
+
+inline
+void PaniniProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= scale;
+    v /= scale;
+
+	float lamda = a * atanf(u / a);
+	float u_ = lamda;
+
+	float v_;
+	if ( fabs(lamda) > 1E-7)
+		v_ = atanf(v * sinf(lamda) / (b * a * tanf(lamda / a)));
+	else
+		v_ = atanf(v / b);
+
+    float cosv = cosf(v_);
+    float x_ = cosv * sinf(u_);
+    float y_ = sinf(v_);
+    float z_ = cosv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void PaniniPortraitProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float y_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float x_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float u_ = atan2f(x_, z_);
+    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	float tg = a * tanf(u_ / a);
+	u = - scale * tg;
+
+	float sinu = sinf( u_ );
+	if ( fabs(sinu) < 1E-7 )
+		v = scale * b * tanf(v_);
+	else
+		v = scale * b * tg * tanf(v_) / sinu;
+}
+
+inline
+void PaniniPortraitProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= - scale;
+    v /= scale;
+
+	float lamda = a * atanf(u / a);
+	float u_ = lamda;
+
+	float v_;
+	if ( fabs(lamda) > 1E-7)
+		v_ = atanf(v * sinf(lamda) / (b * a * tanf(lamda/a)));
+	else
+		v_ = atanf(v / b);
+
+    float cosv = cosf(v_);
+    float y_ = cosv * sinf(u_);
+    float x_ = sinf(v_);
+    float z_ = cosv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void MercatorProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float u_ = atan2f(x_, z_);
+    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	u = scale * u_;
+	v = scale * logf( tanf( (float)(CV_PI/4) + v_/2 ) );
+}
+
+inline
+void MercatorProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= scale;
+    v /= scale;
+
+	float v_ = atanf( sinhf(v) );
+	float u_ = u;
+
+    float cosv = cosf(v_);
+    float x_ = cosv * sinf(u_);
+    float y_ = sinf(v_);
+    float z_ = cosv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void TransverseMercatorProjector::mapForward(float x, float y, float &u, float &v)
+{
+	float x_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float u_ = atan2f(x_, z_);
+    float v_ = asinf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_));
+
+	float B = cosf(v_) * sinf(u_);
+
+	u = scale / 2 * logf( (1+B) / (1-B) );
+	v = scale * atan2f(tanf(v_), cosf(u_));
+}
+
+inline
+void TransverseMercatorProjector::mapBackward(float u, float v, float &x, float &y)
+{
+	u /= scale;
+    v /= scale;
+
+	float v_ = asinf( sinf(v) / coshf(u) );
+	float u_ = atan2f( sinhf(u), cos(v) );
+
+    float cosv = cosf(v_);
+    float x_ = cosv * sinf(u_);
+    float y_ = sinf(v_);
+    float z_ = cosv * cosf(u_);
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void SphericalPortraitProjector::mapForward(float x, float y, float &u0, float &v0)
+{
+    float x0_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y0_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_ = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float x_ = y0_;
+    float y_ = x0_;
+    float u, v;
+
+    u = scale * atan2f(x_, z_);
+    v = scale * (static_cast<float>(CV_PI) - acosf(y_ / sqrtf(x_ * x_ + y_ * y_ + z_ * z_)));
+
+    u0 = -u;//v;
+    v0 = v;//u;
+}
+
+
+inline
+void SphericalPortraitProjector::mapBackward(float u0, float v0, float &x, float &y)
+{
+    float u, v;
+    u = -u0;//v0;
+    v = v0;//u0;
+
+    u /= scale;
+    v /= scale;
+
+    float sinv = sinf(static_cast<float>(CV_PI) - v);
+    float x0_ = sinv * sinf(u);
+    float y0_ = cosf(static_cast<float>(CV_PI) - v);
+    float z_ = sinv * cosf(u);
+
+    float x_ = y0_;
+    float y_ = x0_;
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void CylindricalPortraitProjector::mapForward(float x, float y, float &u0, float &v0)
+{
+    float x0_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y0_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_  = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float x_ = y0_;
+    float y_ = x0_;
+    float u, v;
+
+    u = scale * atan2f(x_, z_);
+    v = scale * y_ / sqrtf(x_ * x_ + z_ * z_);
+
+    u0 = -u;//v;
+    v0 = v;//u;
+}
+
+
+inline
+void CylindricalPortraitProjector::mapBackward(float u0, float v0, float &x, float &y)
+{
+    float u, v;
+    u = -u0;//v0;
+    v = v0;//u0;
+
+    u /= scale;
+    v /= scale;
+
+    float x0_ = sinf(u);
+    float y0_ = v;
+    float z_  = cosf(u);
+
+    float x_ = y0_;
+    float y_ = x0_;
+
+    float z;
+    x = k_rinv[0] * x_ + k_rinv[1] * y_ + k_rinv[2] * z_;
+    y = k_rinv[3] * x_ + k_rinv[4] * y_ + k_rinv[5] * z_;
+    z = k_rinv[6] * x_ + k_rinv[7] * y_ + k_rinv[8] * z_;
+
+    if (z > 0) { x /= z; y /= z; }
+    else x = y = -1;
+}
+
+inline
+void PlanePortraitProjector::mapForward(float x, float y, float &u0, float &v0)
+{
+    float x0_ = r_kinv[0] * x + r_kinv[1] * y + r_kinv[2];
+    float y0_ = r_kinv[3] * x + r_kinv[4] * y + r_kinv[5];
+    float z_  = r_kinv[6] * x + r_kinv[7] * y + r_kinv[8];
+
+    float x_ = y0_;
+    float y_ = x0_;
+
+    x_ = t[0] + x_ / z_ * (1 - t[2]);
+    y_ = t[1] + y_ / z_ * (1 - t[2]);
+
+    float u,v;
+    u = scale * x_;
+    v = scale * y_;
+
+    u0 = -u;
+    v0 = v;
+}
+
+
+inline
+void PlanePortraitProjector::mapBackward(float u0, float v0, float &x, float &y)
+{
+    float u, v;
+    u = -u0;
+    v = v0;
+
+    u = u / scale - t[0];
+    v = v / scale - t[1];
+
+    float z;
+    x = k_rinv[0] * v + k_rinv[1] * u + k_rinv[2] * (1 - t[2]);
+    y = k_rinv[3] * v + k_rinv[4] * u + k_rinv[5] * (1 - t[2]);
+    z = k_rinv[6] * v + k_rinv[7] * u + k_rinv[8] * (1 - t[2]);
+
+    x /= z;
+    y /= z;
+}
+
+
+} // namespace detail
+} // namespace cv
+
+#endif // __OPENCV_STITCHING_WARPERS_INL_HPP__
diff --git a/modules/stitching/src/seam_finders.cpp b/modules/stitching/src/seam_finders.cpp
index 2f6c78c0e5..723a087034 100644
--- a/modules/stitching/src/seam_finders.cpp
+++ b/modules/stitching/src/seam_finders.cpp
@@ -41,6 +41,9 @@
 //M*/
 
 #include "precomp.hpp"
+#include <map>
+
+using namespace std;
 
 namespace cv {
 namespace detail {
@@ -152,6 +155,869 @@ void VoronoiSeamFinder::findInPair(size_t first, size_t second, Rect roi)
 }
 
 
+DpSeamFinder::DpSeamFinder(CostFunction costFunc) : costFunc_(costFunc) {}
+
+
+void DpSeamFinder::find(const vector<Mat> &src, const vector<Point> &corners, vector<Mat> &masks)
+{
+    LOGLN("Finding seams...");
+    int64 t = getTickCount();
+
+    if (src.size() == 0)
+        return;
+
+    vector<pair<int, int> > pairs;
+
+    for (size_t i = 0; i+1 < src.size(); ++i)
+        for (size_t j = i+1; j < src.size(); ++j)
+            pairs.push_back(make_pair(i, j));
+
+    sort(pairs.begin(), pairs.end(), ImagePairLess(src, corners));
+    reverse(pairs.begin(), pairs.end());
+
+    for (size_t i = 0; i < pairs.size(); ++i)
+    {
+        int i0 = pairs[i].first, i1 = pairs[i].second;
+        process(src[i0], src[i1], corners[i0], corners[i1], masks[i0], masks[i1]);
+    }
+
+    LOGLN("Finding seams, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
+}
+
+
+void DpSeamFinder::process(
+        const Mat &image1, const Mat &image2, Point tl1, Point tl2,
+        Mat &mask1, Mat &mask2)
+{
+    CV_Assert(image1.size() == mask1.size());
+    CV_Assert(image2.size() == mask2.size());
+
+    Point intersectTl(std::max(tl1.x, tl2.x), std::max(tl1.y, tl2.y));
+
+    Point intersectBr(std::min(tl1.x + image1.cols, tl2.x + image2.cols),
+                      std::min(tl1.y + image1.rows, tl2.y + image2.rows));
+
+    if (intersectTl.x >= intersectBr.x || intersectTl.y >= intersectBr.y)
+        return; // there are no conflicts
+
+    unionTl_ = Point(std::min(tl1.x, tl2.x), std::min(tl1.y, tl2.y));
+
+    unionBr_ = Point(std::max(tl1.x + image1.cols, tl2.x + image2.cols),
+                     std::max(tl1.y + image1.rows, tl2.y + image2.rows));
+
+    unionSize_ = Size(unionBr_.x - unionTl_.x, unionBr_.y - unionTl_.y);
+
+    mask1_ = Mat::zeros(unionSize_, CV_8U);
+    mask2_ = Mat::zeros(unionSize_, CV_8U);
+
+    Mat tmp = mask1_(Rect(tl1.x - unionTl_.x, tl1.y - unionTl_.y, mask1.cols, mask1.rows));
+    mask1.copyTo(tmp);
+
+    tmp = mask2_(Rect(tl2.x - unionTl_.x, tl2.y - unionTl_.y, mask2.cols, mask2.rows));
+    mask2.copyTo(tmp);
+
+    // find both images contour masks
+
+    contour1mask_ = Mat::zeros(unionSize_, CV_8U);
+    contour2mask_ = Mat::zeros(unionSize_, CV_8U);
+
+    for (int y = 0; y < unionSize_.height; ++y)
+    {
+        for (int x = 0; x < unionSize_.width; ++x)
+        {
+            if (mask1_(y, x) &&
+                ((x == 0 || !mask1_(y, x-1)) || (x == unionSize_.width-1 || !mask1_(y, x+1)) ||
+                 (y == 0 || !mask1_(y-1, x)) || (y == unionSize_.height-1 || !mask1_(y+1, x))))
+            {
+                contour1mask_(y, x) = 255;
+            }
+
+            if (mask2_(y, x) &&
+                ((x == 0 || !mask2_(y, x-1)) || (x == unionSize_.width-1 || !mask2_(y, x+1)) ||
+                 (y == 0 || !mask2_(y-1, x)) || (y == unionSize_.height-1 || !mask2_(y+1, x))))
+            {
+                contour2mask_(y, x) = 255;
+            }
+        }
+    }
+
+    findComponents();
+
+    findEdges();
+
+    resolveConflicts(image1, image2, tl1, tl2, mask1, mask2);
+}
+
+
+void DpSeamFinder::findComponents()
+{
+    // label all connected components and get information about them
+
+    ncomps_ = 0;
+    labels_.create(unionSize_);
+    states_.clear();
+    tls_.clear();
+    brs_.clear();
+    contours_.clear();
+
+    for (int y = 0; y < unionSize_.height; ++y)
+    {
+        for (int x = 0; x < unionSize_.width; ++x)
+        {
+            if (mask1_(y, x) && mask2_(y, x))
+                labels_(y, x) = numeric_limits<int>::max();
+            else if (mask1_(y, x))
+                labels_(y, x) = numeric_limits<int>::max()-1;
+            else if (mask2_(y, x))
+                labels_(y, x) = numeric_limits<int>::max()-2;
+            else
+                labels_(y, x) = 0;
+        }
+    }
+
+    for (int y = 0; y < unionSize_.height; ++y)
+    {
+        for (int x = 0; x < unionSize_.width; ++x)
+        {
+            if (labels_(y, x) >= numeric_limits<int>::max()-2)
+            {
+                if (labels_(y, x) == numeric_limits<int>::max())
+                    states_.push_back(INTERS);
+                else if (labels_(y, x) == numeric_limits<int>::max()-1)
+                    states_.push_back(FIRST);
+                else if (labels_(y, x) == numeric_limits<int>::max()-2)
+                    states_.push_back(SECOND);
+
+                floodFill(labels_, Point(x, y), ++ncomps_);
+                tls_.push_back(Point(x, y));
+                brs_.push_back(Point(x+1, y+1));
+                contours_.push_back(vector<Point>());
+            }
+
+            if (labels_(y, x))
+            {
+                int l = labels_(y, x);
+                int ci = l-1;
+
+                tls_[ci].x = std::min(tls_[ci].x, x);
+                tls_[ci].y = std::min(tls_[ci].y, y);
+                brs_[ci].x = std::max(brs_[ci].x, x+1);
+                brs_[ci].y = std::max(brs_[ci].y, y+1);
+
+                if ((x == 0 || labels_(y, x-1) != l) || (x == unionSize_.width-1 || labels_(y, x+1) != l) ||
+                    (y == 0 || labels_(y-1, x) != l) || (y == unionSize_.height-1 || labels_(y+1, x) != l))
+                {
+                    contours_[ci].push_back(Point(x, y));
+                }
+            }
+        }
+    }
+}
+
+
+void DpSeamFinder::findEdges()
+{
+    // find edges between components
+
+    map<pair<int, int>, int> wedges; // weighted edges
+
+    for (int ci = 0; ci < ncomps_-1; ++ci)
+    {
+        for (int cj = ci+1; cj < ncomps_; ++cj)
+        {
+            wedges[make_pair(ci, cj)] = 0;
+            wedges[make_pair(cj, ci)] = 0;
+        }
+    }
+
+    for (int ci = 0; ci < ncomps_; ++ci)
+    {
+        for (size_t i = 0; i < contours_[ci].size(); ++i)
+        {
+            int x = contours_[ci][i].x;
+            int y = contours_[ci][i].y;
+            int l = ci + 1;
+
+            if (x > 0 && labels_(y, x-1) && labels_(y, x-1) != l)
+            {
+                wedges[make_pair(ci, labels_(y, x-1)-1)]++;
+                wedges[make_pair(labels_(y, x-1)-1, ci)]++;
+            }
+
+            if (y > 0 && labels_(y-1, x) && labels_(y-1, x) != l)
+            {
+                wedges[make_pair(ci, labels_(y-1, x)-1)]++;
+                wedges[make_pair(labels_(y-1, x)-1, ci)]++;
+            }
+
+            if (x < unionSize_.width-1 && labels_(y, x+1) && labels_(y, x+1) != l)
+            {
+                wedges[make_pair(ci, labels_(y, x+1)-1)]++;
+                wedges[make_pair(labels_(y, x+1)-1, ci)]++;
+            }
+
+            if (y < unionSize_.height-1 && labels_(y+1, x) && labels_(y+1, x) != l)
+            {
+                wedges[make_pair(ci, labels_(y+1, x)-1)]++;
+                wedges[make_pair(labels_(y+1, x)-1, ci)]++;
+            }
+        }
+    }
+
+    edges_.clear();
+
+    for (int ci = 0; ci < ncomps_-1; ++ci)
+    {
+        for (int cj = ci+1; cj < ncomps_; ++cj)
+        {
+            map<pair<int, int>, int>::iterator itr = wedges.find(make_pair(ci, cj));
+            if (itr != wedges.end() && itr->second > 0)
+                edges_.insert(itr->first);
+
+            itr = wedges.find(make_pair(cj, ci));
+            if (itr != wedges.end() && itr->second > 0)
+                edges_.insert(itr->first);
+        }
+    }
+}
+
+
+void DpSeamFinder::resolveConflicts(
+        const Mat &image1, const Mat &image2, Point tl1, Point tl2, Mat &mask1, Mat &mask2)
+{
+    if (costFunc_ == COLOR_GRAD)
+        computeGradients(image1, image2);
+
+    // resolve conflicts between components
+
+    bool hasConflict = true;
+    while (hasConflict)
+    {
+        int c1, c2;
+        hasConflict = false;
+
+        for (set<pair<int, int> >::iterator itr = edges_.begin(); itr != edges_.end(); ++itr)
+        {
+            c1 = itr->first;
+            c2 = itr->second;
+
+            if ((states_[c1] & INTERS) && (states_[c1] & (~INTERS)) != states_[c2])
+            {
+                hasConflict = true;
+                break;
+            }
+        }
+
+        if (hasConflict)
+        {
+            int l1 = c1+1, l2 = c2+1;
+
+            if (hasOnlyOneNeighbor(c1))
+            {
+                // if the first components has only one adjacent component
+
+                for (int y = tls_[c1].y; y < brs_[c1].y; ++y)
+                    for (int x = tls_[c1].x; x < brs_[c1].x; ++x)
+                        if (labels_(y, x) == l1)
+                            labels_(y, x) = l2;
+
+                states_[c1] = states_[c2] == FIRST ? SECOND : FIRST;
+            }
+            else
+            {
+                // if the first component has more than one adjacent component
+
+                Point p1, p2;
+                if (getSeamTips(c1, c2, p1, p2))
+                {
+                    vector<Point> seam;
+                    bool isHorizontalSeam;
+
+                    if (estimateSeam(image1, image2, tl1, tl2, c1, p1, p2, seam, isHorizontalSeam))
+                        updateLabelsUsingSeam(c1, c2, seam, isHorizontalSeam);
+                }
+
+                states_[c1] = states_[c2] == FIRST ? INTERS_SECOND : INTERS_FIRST;
+            }
+
+            const int c[] = {c1, c2};
+            const int l[] = {l1, l2};
+
+            for (int i = 0; i < 2; ++i)
+            {
+                // update information about the (i+1)-th component
+
+                int x0 = tls_[c[i]].x, x1 = brs_[c[i]].x;
+                int y0 = tls_[c[i]].y, y1 = brs_[c[i]].y;
+
+                tls_[c[i]] = Point(numeric_limits<int>::max(), numeric_limits<int>::max());
+                brs_[c[i]] = Point(numeric_limits<int>::min(), numeric_limits<int>::min());
+                contours_[c[i]].clear();
+
+                for (int y = y0; y < y1; ++y)
+                {
+                    for (int x = x0; x < x1; ++x)
+                    {
+                        if (labels_(y, x) == l[i])
+                        {
+                            tls_[c[i]].x = std::min(tls_[c[i]].x, x);
+                            tls_[c[i]].y = std::min(tls_[c[i]].y, y);
+                            brs_[c[i]].x = std::max(brs_[c[i]].x, x+1);
+                            brs_[c[i]].y = std::max(brs_[c[i]].y, y+1);
+
+                            if ((x == 0 || labels_(y, x-1) != l[i]) || (x == unionSize_.width-1 || labels_(y, x+1) != l[i]) ||
+                                (y == 0 || labels_(y-1, x) != l[i]) || (y == unionSize_.height-1 || labels_(y+1, x) != l[i]))
+                            {
+                                contours_[c[i]].push_back(Point(x, y));
+                            }
+                        }
+                    }
+                }
+            }
+
+            // remove edges
+
+            edges_.erase(make_pair(c1, c2));
+            edges_.erase(make_pair(c2, c1));
+        }
+    }
+
+    // update masks
+
+    int dx1 = unionTl_.x - tl1.x, dy1 = unionTl_.y - tl1.y;
+    int dx2 = unionTl_.x - tl2.x, dy2 = unionTl_.y - tl2.y;
+
+    for (int y = 0; y < mask2.rows; ++y)
+    {
+        for (int x = 0; x < mask2.cols; ++x)
+        {
+             int l = labels_(y - dy2, x - dx2);
+             if (l > 0 && (states_[l-1] & FIRST) && mask1.at<uchar>(y - dy2 + dy1, x - dx2 + dx1))
+                mask2.at<uchar>(y, x) = 0;
+        }
+    }
+
+    for (int y = 0; y < mask1.rows; ++y)
+    {
+        for (int x = 0; x < mask1.cols; ++x)
+        {
+             int l = labels_(y - dy1, x - dx1);
+             if (l > 0 && (states_[l-1] & SECOND) && mask2.at<uchar>(y - dy1 + dy2, x - dx1 + dx2))
+                mask1.at<uchar>(y, x) = 0;
+        }
+    }
+}
+
+
+void DpSeamFinder::computeGradients(const Mat &image1, const Mat &image2)
+{
+    CV_Assert(costFunction() == COLOR_GRAD);
+
+    Mat gray;
+    cvtColor(image1, gray, CV_BGR2GRAY);
+    Sobel(gray, gradx1_, CV_32F, 1, 0);
+    Sobel(gray, grady1_, CV_32F, 0, 1);
+
+    cvtColor(image2, gray, CV_BGR2GRAY);
+    Sobel(gray, gradx2_, CV_32F, 1, 0);
+    Sobel(gray, grady2_, CV_32F, 0, 1);
+}
+
+
+bool DpSeamFinder::hasOnlyOneNeighbor(int comp)
+{
+    set<pair<int, int> >::iterator begin, end;
+    begin = lower_bound(edges_.begin(), edges_.end(), make_pair(comp, numeric_limits<int>::min()));
+    end = upper_bound(edges_.begin(), edges_.end(), make_pair(comp, numeric_limits<int>::max()));
+    return ++begin == end;
+}
+
+
+bool DpSeamFinder::closeToContour(int y, int x, const Mat_<uchar> &contourMask)
+{
+    const int rad = 2;
+
+    for (int dy = -rad; dy <= rad; ++dy)
+    {
+        if (y + dy >= 0 && y + dy < unionSize_.height)
+        {
+            for (int dx = -rad; dx <= rad; ++dx)
+            {
+                if (x + dx >= 0 && x + dx < unionSize_.width &&
+                    contourMask(y + dy, x + dx))
+                {
+                    return true;
+                }
+            }
+        }
+    }
+
+    return false;
+}
+
+
+bool DpSeamFinder::getSeamTips(int comp1, int comp2, Point &p1, Point &p2)
+{
+    CV_Assert(states_[comp1] & INTERS);
+
+    // find special points
+
+    vector<Point> specialPoints;
+    int l2 = comp2+1;
+
+    for (size_t i = 0; i < contours_[comp1].size(); ++i)
+    {
+        int x = contours_[comp1][i].x;
+        int y = contours_[comp1][i].y;
+
+        if (closeToContour(y, x, contour1mask_) &&
+            closeToContour(y, x, contour2mask_) &&
+            ((x > 0 && labels_(y, x-1) == l2) ||
+             (y > 0 && labels_(y-1, x) == l2) ||
+             (x < unionSize_.width-1 && labels_(y, x+1) == l2) ||
+             (y < unionSize_.height-1 && labels_(y+1, x) == l2)))
+        {
+            specialPoints.push_back(Point(x, y));
+        }
+    }
+
+    if (specialPoints.size() < 2)
+        return false;
+
+    // find clusters
+
+    vector<int> labels;
+    cv::partition(specialPoints, labels, ClosePoints(10));
+
+    int nlabels = *max_element(labels.begin(), labels.end()) + 1;
+    if (nlabels < 2)
+        return false;
+
+    vector<Point> sum(nlabels);
+    vector<vector<Point> > points(nlabels);
+
+    for (size_t i = 0; i < specialPoints.size(); ++i)
+    {
+        sum[labels[i]] += specialPoints[i];
+        points[labels[i]].push_back(specialPoints[i]);
+    }
+
+    // select two most distant clusters
+
+    int idx[2] = {-1,-1};
+    double maxDist = -numeric_limits<double>::max();
+
+    for (int i = 0; i < nlabels-1; ++i)
+    {
+        for (int j = i+1; j < nlabels; ++j)
+        {
+            double size1 = points[i].size(), size2 = points[j].size();
+            double cx1 = cvRound(sum[i].x / size1), cy1 = cvRound(sum[i].y / size1);
+            double cx2 = cvRound(sum[j].x / size2), cy2 = cvRound(sum[j].y / size1);
+
+            double dist = (cx1 - cx2) * (cx1 - cx2) + (cy1 - cy2) * (cy1 - cy2);
+            if (dist > maxDist)
+            {
+                maxDist = dist;
+                idx[0] = i;
+                idx[1] = j;
+            }
+        }
+    }
+
+    // select two points closest to the clusters' centers
+
+    Point p[2];
+
+    for (int i = 0; i < 2; ++i)
+    {
+        double size = points[idx[i]].size();
+        double cx = cvRound(sum[idx[i]].x / size);
+        double cy = cvRound(sum[idx[i]].y / size);
+
+        int closest = -1;
+        double minDist = numeric_limits<double>::max();
+
+        for (size_t j = 0; j < points[idx[i]].size(); ++j)
+        {
+            double dist = (points[idx[i]][j].x - cx) * (points[idx[i]][j].x - cx) +
+                          (points[idx[i]][j].y - cy) * (points[idx[i]][j].y - cy);
+            if (dist < minDist)
+            {
+                minDist = dist;
+                closest = j;
+            }
+        }
+
+        p[i] = points[idx[i]][closest];
+    }
+
+    p1 = p[0];
+    p2 = p[1];
+    return true;
+}
+
+
+namespace
+{
+
+template <typename T>
+float diffL2Square(const Mat &image1, int y1, int x1, const Mat &image2, int y2, int x2)
+{
+    const T *r1 = image1.ptr<T>(y1);
+    const T *r2 = image2.ptr<T>(y2);
+    return static_cast<float>(sqr(r1[3*x1] - r2[3*x2]) + sqr(r1[3*x1+1] - r2[3*x2+1]) +
+                              sqr(r1[3*x1+2] - r2[3*x2+2]));
+}
+
+} // namespace
+
+
+void DpSeamFinder::computeCosts(
+        const Mat &image1, const Mat &image2, Point tl1, Point tl2,
+        int comp, Mat_<float> &costV, Mat_<float> &costH)
+{
+    CV_Assert(states_[comp] & INTERS);
+
+    // compute costs    
+
+    float (*diff)(const Mat&, int, int, const Mat&, int, int) = 0;
+    if (image1.type() == CV_32FC3 && image2.type() == CV_32FC3)
+        diff = diffL2Square<float>;
+    else if (image1.type() == CV_8UC3 && image2.type() == CV_8UC3)
+        diff = diffL2Square<uchar>;
+    else
+        CV_Error(CV_StsBadArg, "both images must have CV_32FC3 or CV_8UC3 type");
+
+    int l = comp+1;
+    Rect roi(tls_[comp], brs_[comp]);
+
+    int dx1 = unionTl_.x - tl1.x, dy1 = unionTl_.y - tl1.y;
+    int dx2 = unionTl_.x - tl2.x, dy2 = unionTl_.y - tl2.y;
+
+    const float badRegionCost = normL2(Point3f(255.f, 255.f, 255.f),
+                                       Point3f(0.f, 0.f, 0.f));
+
+    costV.create(roi.height, roi.width+1);
+
+    for (int y = roi.y; y < roi.br().y; ++y)
+    {
+        for (int x = roi.x; x < roi.br().x+1; ++x)
+        {
+            if (labels_(y, x) == l && x > 0 && labels_(y, x-1) == l)
+            {
+                float costColor = (diff(image1, y + dy1, x + dx1 - 1, image2, y + dy2, x + dx2) +
+                                   diff(image1, y + dy1, x + dx1, image2, y + dy2, x + dx2 - 1)) / 2;
+                if (costFunc_ == COLOR)
+                    costV(y - roi.y, x - roi.x) = costColor;
+                else if (costFunc_ == COLOR_GRAD)
+                {
+                    float costGrad = std::abs(gradx1_(y + dy1, x + dx1)) + std::abs(gradx1_(y + dy1, x + dx1 - 1)) +
+                                     std::abs(gradx2_(y + dy2, x + dx2)) + std::abs(gradx2_(y + dy2, x + dx2 - 1)) + 1.f;
+                    costV(y - roi.y, x - roi.x) = costColor / costGrad;
+                }
+            }
+            else
+                costV(y - roi.y, x - roi.x) = badRegionCost;
+        }
+    }
+
+    costH.create(roi.height+1, roi.width);
+
+    for (int y = roi.y; y < roi.br().y+1; ++y)
+    {
+        for (int x = roi.x; x < roi.br().x; ++x)
+        {
+            if (labels_(y, x) == l && y > 0 && labels_(y-1, x) == l)
+            {
+                float costColor = (diff(image1, y + dy1 - 1, x + dx1, image2, y + dy2, x + dx2) +
+                                   diff(image1, y + dy1, x + dx1, image2, y + dy2 - 1, x + dx2)) / 2;
+                if (costFunc_ == COLOR)
+                    costH(y - roi.y, x - roi.x) = costColor;
+                else if (costFunc_ == COLOR_GRAD)
+                {
+                    float costGrad = std::abs(grady1_(y + dy1, x + dx1)) + std::abs(grady1_(y + dy1 - 1, x + dx1)) +
+                                     std::abs(grady2_(y + dy2, x + dx2)) + std::abs(grady2_(y + dy2 - 1, x + dx2)) + 1.f;
+                    costH(y - roi.y, x - roi.x) = costColor / costGrad;
+                }
+            }
+            else
+                costH(y - roi.y, x - roi.x) = badRegionCost;
+        }
+    }
+}
+
+
+bool DpSeamFinder::estimateSeam(
+        const Mat &image1, const Mat &image2, Point tl1, Point tl2, int comp,
+        Point p1, Point p2, vector<Point> &seam, bool &isHorizontal)
+{
+    CV_Assert(states_[comp] & INTERS);
+
+    Mat_<float> costV, costH;
+    computeCosts(image1, image2, tl1, tl2, comp, costV, costH);
+
+    Rect roi(tls_[comp], brs_[comp]);
+    Point src = p1 - roi.tl();
+    Point dst = p2 - roi.tl();
+    int l = comp+1;
+
+    // estimate seam direction
+
+    bool swapped = false;
+    isHorizontal = std::abs(dst.x - src.x) > std::abs(dst.y - src.y);
+
+    if (isHorizontal)
+    {
+        if (src.x > dst.x)
+        {
+            std::swap(src, dst);
+            swapped = true;
+        }
+    }
+    else if (src.y > dst.y)
+    {
+        swapped = true;
+        std::swap(src, dst);
+    }
+
+    // find optimal control
+
+    Mat_<uchar> control = Mat::zeros(roi.size(), CV_8U);
+    Mat_<uchar> reachable = Mat::zeros(roi.size(), CV_8U);
+    Mat_<float> cost = Mat::zeros(roi.size(), CV_32F);
+
+    reachable(src) = 1;
+    cost(src) = 0.f;
+
+    int nsteps;
+    pair<float, int> steps[3];
+
+    if (isHorizontal)
+    {
+        for (int x = src.x+1; x <= dst.x; ++x)
+        {
+            for (int y = 0; y < roi.height; ++y)
+            {
+                // seam follows along upper side of pixels
+
+                nsteps = 0;
+
+                if (labels_(y + roi.y, x + roi.x) == l)
+                {
+                    if (reachable(y, x-1))
+                        steps[nsteps++] = make_pair(cost(y, x-1) + costH(y, x-1), 1);
+                    if (y > 0 && reachable(y-1, x-1))
+                        steps[nsteps++] = make_pair(cost(y-1, x-1) + costH(y-1, x-1) + costV(y-1, x), 2);
+                    if (y < roi.height-1 && reachable(y+1, x-1))
+                        steps[nsteps++] = make_pair(cost(y+1, x-1) + costH(y+1, x-1) + costV(y, x), 3);
+                }
+
+                if (nsteps)
+                {
+                    pair<float, int> opt = *min_element(steps, steps + nsteps);
+                    cost(y, x) = opt.first;
+                    control(y, x) = opt.second;
+                    reachable(y, x) = 255;
+                }
+            }
+        }
+    }
+    else
+    {
+        for (int y = src.y+1; y <= dst.y; ++y)
+        {
+            for (int x = 0; x < roi.width; ++x)
+            {
+                // seam follows along left side of pixels
+
+                nsteps = 0;
+
+                if (labels_(y + roi.y, x + roi.x) == l)
+                {
+                    if (reachable(y-1, x))
+                        steps[nsteps++] = make_pair(cost(y-1, x) + costV(y-1, x), 1);
+                    if (x > 0 && reachable(y-1, x-1))
+                        steps[nsteps++] = make_pair(cost(y-1, x-1) + costV(y-1, x-1) + costH(y, x-1), 2);
+                    if (x < roi.width-1 && reachable(y-1, x+1))
+                        steps[nsteps++] = make_pair(cost(y-1, x+1) + costV(y-1, x+1) + costH(y, x), 3);
+                }
+
+                if (nsteps)
+                {
+                    pair<float, int> opt = *min_element(steps, steps + nsteps);
+                    cost(y, x) = opt.first;
+                    control(y, x) = opt.second;
+                    reachable(y, x) = 255;
+                }
+            }
+        }
+    }
+
+    if (!reachable(dst))
+        return false;
+
+    // restore seam
+
+    Point p = dst;
+    seam.clear();
+    seam.push_back(p + roi.tl());
+
+    if (isHorizontal)
+    {
+        for (; p.x != src.x; seam.push_back(p + roi.tl()))
+        {
+            if (control(p) == 2) p.y--;
+            else if (control(p) == 3) p.y++;
+            p.x--;
+        }
+    }
+    else
+    {
+        for (; p.y != src.y; seam.push_back(p + roi.tl()))
+        {
+            if (control(p) == 2) p.x--;
+            else if (control(p) == 3) p.x++;
+            p.y--;
+        }
+    }
+
+    if (!swapped)
+        reverse(seam.begin(), seam.end());
+
+    CV_Assert(seam.front() == p1);
+    CV_Assert(seam.back() == p2);
+    return true;
+}
+
+
+void DpSeamFinder::updateLabelsUsingSeam(
+        int comp1, int comp2, const vector<Point> &seam, bool isHorizontalSeam)
+{
+    Mat_<int> mask = Mat::zeros(brs_[comp1].y - tls_[comp1].y,
+                                brs_[comp1].x - tls_[comp1].x, CV_32S);
+
+    for (size_t i = 0; i < contours_[comp1].size(); ++i)
+        mask(contours_[comp1][i] - tls_[comp1]) = 255;
+
+    for (size_t i = 0; i < seam.size(); ++i)
+        mask(seam[i] - tls_[comp1]) = 255;
+
+    // find connected components after seam carving
+
+    int l1 = comp1+1, l2 = comp2+1;
+
+    int ncomps = 0;
+
+    for (int y = 0; y < mask.rows; ++y)
+        for (int x = 0; x < mask.cols; ++x)
+            if (!mask(y, x) && labels_(y + tls_[comp1].y, x + tls_[comp1].x) == l1)
+                floodFill(mask, Point(x, y), ++ncomps);
+
+    for (size_t i = 0; i < contours_[comp1].size(); ++i)
+    {
+        int x = contours_[comp1][i].x - tls_[comp1].x;
+        int y = contours_[comp1][i].y - tls_[comp1].y;
+
+        bool ok = false;
+        static const int dx[] = {-1, +1, 0, 0, -1, +1, -1, +1};
+        static const int dy[] = {0, 0, -1, +1, -1, -1, +1, +1};
+
+        for (int j = 0; j < 8; ++j)
+        {
+            int c = x + dx[j];
+            int r = y + dy[j];
+
+            if (c >= 0 && c < mask.cols && r >= 0 && r < mask.rows &&
+                mask(r, c) && mask(r, c) != 255)
+            {
+                ok = true;
+                mask(y, x) = mask(r, c);
+            }
+        }
+
+        if (!ok)
+            mask(y, x) = 0;
+    }
+
+    if (isHorizontalSeam)
+    {
+        for (size_t i = 0; i < seam.size(); ++i)
+        {
+            int x = seam[i].x - tls_[comp1].x;
+            int y = seam[i].y - tls_[comp1].y;
+
+            if (y < mask.rows-1 && mask(y+1, x) && mask(y+1, x) != 255)
+                mask(y, x) = mask(y+1, x);
+            else
+                mask(y, x) = 0;
+        }
+    }
+    else
+    {
+        for (size_t i = 0; i < seam.size(); ++i)
+        {
+            int x = seam[i].x - tls_[comp1].x;
+            int y = seam[i].y - tls_[comp1].y;
+
+            if (x < mask.cols-1 && mask(y, x+1) && mask(y, x+1) != 255)
+                mask(y, x) = mask(y, x+1);
+            else
+                mask(y, x) = 0;
+        }
+    }
+
+    // find new components connected with the second component and
+    // with other components except the ones we are working with
+
+    map<int, int> connect2;
+    map<int, int> connectOther;
+
+    for (int i = 1; i <= ncomps; ++i)
+    {
+        connect2.insert(make_pair(i, 0));
+        connectOther.insert(make_pair(i, 0));
+    }
+
+    for (size_t i = 0; i < contours_[comp1].size(); ++i)
+    {
+        int x = contours_[comp1][i].x;
+        int y = contours_[comp1][i].y;
+
+        if ((x > 0 && labels_(y, x-1) == l2) ||
+            (y > 0 && labels_(y-1, x) == l2) ||
+            (x < unionSize_.width-1 && labels_(y, x+1) == l2) ||
+            (y < unionSize_.height-1 && labels_(y+1, x) == l2))
+        {
+            connect2[mask(y - tls_[comp1].y, x - tls_[comp1].x)]++;
+        }
+
+        if ((x > 0 && labels_(y, x-1) != l1 && labels_(y, x-1) != l2) ||
+            (y > 0 && labels_(y-1, x) != l1 && labels_(y-1, x) != l2) ||
+            (x < unionSize_.width-1 && labels_(y, x+1) != l1 && labels_(y, x+1) != l2) ||
+            (y < unionSize_.height-1 && labels_(y+1, x) != l1 && labels_(y+1, x) != l2))
+        {
+            connectOther[mask(y - tls_[comp1].y, x - tls_[comp1].x)]++;
+        }
+    }
+
+    vector<int> isAdjComp(ncomps + 1, 0);
+
+    for (map<int, int>::iterator itr = connect2.begin(); itr != connect2.end(); ++itr)
+    {
+        double len = contours_[comp1].size();
+        isAdjComp[itr->first] = itr->second / len > 0.05 && connectOther.find(itr->first)->second / len < 0.1;
+    }
+
+    // update labels
+
+    for (int y = 0; y < mask.rows; ++y)
+        for (int x = 0; x < mask.cols; ++x)
+            if (mask(y, x) && isAdjComp[mask(y, x)])
+                labels_(y + tls_[comp1].y, x + tls_[comp1].x) = l2;
+}
+
+
 class GraphCutSeamFinder::Impl : public PairwiseSeamFinder
 {
 public:
diff --git a/modules/ts/misc/run.py b/modules/ts/misc/run.py
index 79dad8d8fd..f2c213ed11 100644
--- a/modules/ts/misc/run.py
+++ b/modules/ts/misc/run.py
@@ -56,6 +56,7 @@ parse_patterns = (
   {'name': "tests_dir",                'default': None,       'pattern': re.compile("^EXECUTABLE_OUTPUT_PATH:PATH=(.+)$")},
   {'name': "build_type",               'default': "Release",  'pattern': re.compile("^CMAKE_BUILD_TYPE:STRING=(.*)$")},
   {'name': "svnversion_path",          'default': None,       'pattern': re.compile("^SVNVERSION_PATH:FILEPATH=(.*)$")},
+  {'name': "git_executable",           'default': None,       'pattern': re.compile("^GIT_EXECUTABLE:FILEPATH=(.*)$")},
   {'name': "cxx_flags",                'default': "",         'pattern': re.compile("^CMAKE_CXX_FLAGS:STRING=(.*)$")},
   {'name': "cxx_flags_debug",          'default': "",         'pattern': re.compile("^CMAKE_CXX_FLAGS_DEBUG:STRING=(.*)$")},
   {'name': "cxx_flags_release",        'default': "",         'pattern': re.compile("^CMAKE_CXX_FLAGS_RELEASE:STRING=(.*)$")},
@@ -303,13 +304,15 @@ class RunInfo(object):
         # detect target arch
         if self.targetos == "android":
             if "armeabi-v7a" in self.android_abi:
-                self.targetarch = "ARMv7a"
+                self.targetarch = "armv7a"
             elif "armeabi-v6" in self.android_abi:
-                self.targetarch = "ARMv6"
+                self.targetarch = "armv6"
             elif "armeabi" in self.android_abi:
-                self.targetarch = "ARMv5te"
+                self.targetarch = "armv5te"
             elif "x86" in self.android_abi:
                 self.targetarch = "x86"
+            elif "mips" in self.android_abi:
+                self.targetarch = "mips"
             else:
                 self.targetarch = "ARM"
         elif self.is_x64 and hostmachine in ["AMD64", "x86_64"]:
@@ -327,20 +330,38 @@ class RunInfo(object):
 
         self.hardware = None
 
-        self.getSvnVersion(self.cmake_home, "cmake_home_svn")
+        self.cmake_home_vcver = self.getVCVersion(self.cmake_home)
         if self.opencv_home == self.cmake_home:
-            self.opencv_home_svn = self.cmake_home_svn
+            self.opencv_home_vcver = self.cmake_home_vcver
         else:
-            self.getSvnVersion(self.opencv_home, "opencv_home_svn")
+            self.opencv_home_vcver = self.getVCVersion(self.opencv_home)
 
         self.tests = self.getAvailableTestApps()
 
-    def getSvnVersion(self, path, name):
+    def getVCVersion(self, root_path):
+        if os.path.isdir(os.path.join(root_path, ".svn")):
+            return self.getSvnVersion(root_path)
+        elif os.path.isdir(os.path.join(root_path, ".git")):
+            return self.getGitHash(root_path)
+        return None
+
+    def getGitHash(self, path):
+        if not path or not self.git_executable:
+            return None
+        try:
+            output = Popen([self.git_executable, "rev-parse", "--short", "HEAD"], stdout=PIPE, stderr=PIPE, cwd = path).communicate()
+            if not output[1]:
+                return output[0].strip()
+            else:
+                return None
+        except OSError:
+            return None
+
+    def getSvnVersion(self, path):
         if not path:
-            setattr(self, name, None)
-            return
-        if not self.svnversion_path and hostos == 'nt':
-            self.tryGetSvnVersionWithTortoise(path, name)
+            val = None
+        elif not self.svnversion_path and hostos == 'nt':
+            val = self.tryGetSvnVersionWithTortoise(path)
         else:
             svnversion = self.svnversion_path
             if not svnversion:
@@ -348,13 +369,16 @@ class RunInfo(object):
             try:
                 output = Popen([svnversion, "-n", path], stdout=PIPE, stderr=PIPE).communicate()
                 if not output[1]:
-                    setattr(self, name, output[0])
+                    val = output[0]
                 else:
-                    setattr(self, name, None)
+                    val = None
             except OSError:
-                setattr(self, name, None)
+                val = None
+        if val:
+            val = val.replace(" ", "_")
+        return val
 
-    def tryGetSvnVersionWithTortoise(self, path, name):
+    def tryGetSvnVersionWithTortoise(self, path):
         try:
             wcrev = "SubWCRev.exe"
             dir = tempfile.mkdtemp()
@@ -371,9 +395,9 @@ class RunInfo(object):
                 tmpfile = open(tmpfilename2, "r")
                 version = tmpfile.read()
                 tmpfile.close()
-            setattr(self, name, version)
+            return version
         except:
-            setattr(self, name, None)
+            return None
         finally:
             if dir:
                 shutil.rmtree(dir)
@@ -406,13 +430,13 @@ class RunInfo(object):
         if app.startswith(self.nameprefix):
             app = app[len(self.nameprefix):]
 
-        if self.cmake_home_svn:
-            if self.cmake_home_svn == self.opencv_home_svn:
-                rev = self.cmake_home_svn
-            elif self.opencv_home_svn:
-                rev = self.cmake_home_svn + "-" + self.opencv_home_svn
+        if self.cmake_home_vcver:
+            if self.cmake_home_vcver == self.opencv_home_vcver:
+                rev = self.cmake_home_vcver
+            elif self.opencv_home_vcver:
+                rev = self.cmake_home_vcver + "-" + self.opencv_home_vcver
             else:
-                rev = self.cmake_home_svn
+                rev = self.cmake_home_vcver
         else:
             rev = None
         if rev:
@@ -484,7 +508,6 @@ class RunInfo(object):
                     else:
                         prev_option = prev_option + " " + opt
                 options.append(tmpfile[1])
-                print options
                 output = Popen(options, stdout=PIPE, stderr=PIPE).communicate()
                 compiler_output = output[1]
                 os.remove(tmpfile[1])
@@ -506,7 +529,7 @@ class RunInfo(object):
                 hw = "CUDA_"
             else:
                 hw = ""
-            tstamp = timestamp.strftime("%Y-%m-%d--%H-%M-%S")
+            tstamp = timestamp.strftime("%Y%m%d-%H%M%S")
             return "%s_%s_%s_%s%s%s.xml" % (app, self.targetos, self.targetarch, hw, rev, tstamp)
 
     def getTest(self, name):
diff --git a/modules/video/include/opencv2/video/background_segm.hpp b/modules/video/include/opencv2/video/background_segm.hpp
index a71cf3e538..9c37ffacfb 100644
--- a/modules/video/include/opencv2/video/background_segm.hpp
+++ b/modules/video/include/opencv2/video/background_segm.hpp
@@ -199,111 +199,20 @@ protected:
  */
 class CV_EXPORTS BackgroundSubtractorGMG: public cv::BackgroundSubtractor
 {
-protected:
-    /**
-     *  Used internally to represent a single feature in a histogram.
-     *  Feature is a color and an associated likelihood (weight in the histogram).
-     */
-    struct CV_EXPORTS HistogramFeatureGMG
-    {
-        /**
-         * Default constructor.
-         * Initializes likelihood of feature to 0, color remains uninitialized.
-         */
-        HistogramFeatureGMG(){likelihood = 0.0;}
-
-        /**
-         * Copy constructor.
-         * Required to use HistogramFeatureGMG in a std::vector
-         * @see operator =()
-         */
-        HistogramFeatureGMG(const HistogramFeatureGMG& orig){
-            color = orig.color; likelihood = orig.likelihood;
-        }
-
-        /**
-         * Assignment operator.
-         * Required to use HistogramFeatureGMG in a std::vector
-         */
-        HistogramFeatureGMG& operator =(const HistogramFeatureGMG& orig){
-            color = orig.color; likelihood = orig.likelihood; return *this;
-        }
-
-        /**
-         * Tests equality of histogram features.
-         * Equality is tested only by matching the color (feature), not the likelihood.
-         * This operator is used to look up an observed feature in a histogram.
-         */
-        bool operator ==(HistogramFeatureGMG &rhs);
-
-        //! Regardless of the image datatype, it is quantized and mapped to an integer and represented as a vector.
-        vector<size_t>          color;
-
-        //! Represents the weight of feature in the histogram.
-        float                   likelihood;
-        friend class PixelModelGMG;
-    };
-
-    /**
-     *  Representation of the statistical model of a single pixel for use in the background subtraction
-     *  algorithm.
-     */
-    class CV_EXPORTS PixelModelGMG
-    {
-    public:
-        PixelModelGMG();
-        ~PixelModelGMG();
-
-        /**
-         *  Incorporate the last observed feature into the statistical model.
-         *
-         *  @param learningRate The adaptation parameter for the histogram. -1.0 to use default. Value
-         *                      should be between 0.0 and 1.0, the higher the value, the faster the
-         *                      adaptation. 1.0 is limiting case where fast adaptation means no memory.
-         */
-        void    insertFeature(double learningRate = -1.0);
-
-        /**
-         *  Set the feature last observed, to save before incorporating it into the statistical
-         *  model with insertFeature().
-         *
-         *  @param feature      The feature (color) just observed.
-         */
-        void    setLastObservedFeature(BackgroundSubtractorGMG::HistogramFeatureGMG feature);
-        /**
-         *  Set the upper limit for the number of features to store in the histogram. Use to adjust
-         *  memory requirements.
-         *
-         *  @param max          size_t representing the max number of features.
-         */
-        void    setMaxFeatures(size_t max) {
-            maxFeatures = max; histogram.resize(max); histogram.clear();
-        }
-        /**
-         *  Normalize the histogram, so sum of weights of all features = 1.0
-         */
-        void    normalizeHistogram();
-        /**
-         *  Return the weight of a feature in the histogram. If the feature is not represented in the
-         *  histogram, the weight returned is 0.0.
-         */
-        double  getLikelihood(HistogramFeatureGMG f);
-        PixelModelGMG& operator *=(const float &rhs);
-        //friend class BackgroundSubtractorGMG;
-        //friend class HistogramFeatureGMG;
-    private:
-        size_t numFeatures; //!< number of features in histogram
-        size_t maxFeatures; //!< max allowable features in histogram
-        std::list<HistogramFeatureGMG> histogram; //!< represents the histogram as a list of features
-        HistogramFeatureGMG            lastObservedFeature;
-        //!< store last observed feature in case we need to add it to histogram
-    };
-
 public:
     BackgroundSubtractorGMG();
     virtual ~BackgroundSubtractorGMG();
     virtual AlgorithmInfo* info() const;
 
+    /**
+     * Validate parameters and set up data structures for appropriate image size.
+     * Must call before running on data.
+     * @param frameSize input frame size
+     * @param min       minimum value taken on by pixels in image sequence. Usually 0
+     * @param max       maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
+     */
+    void initialize(cv::Size frameSize, double min, double max);
+
     /**
      * Performs single-frame background subtraction and builds up a statistical background image
      * model.
@@ -313,28 +222,10 @@ public:
     virtual void operator()(InputArray image, OutputArray fgmask, double learningRate=-1.0);
 
     /**
-     * Validate parameters and set up data structures for appropriate image type. Must call before
-     * running on data.
-     * @param image One sample image from dataset
-     * @param min   minimum value taken on by pixels in image sequence. Usually 0
-     * @param max   maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
+     * Releases all inner buffers.
      */
-    void    initializeType(InputArray image, double min, double max);
-    /**
-     * Selectively update the background model. Only update background model for pixels identified
-     * as background.
-     * @param mask  Mask image same size as images in sequence. Must be 8UC1 matrix, 255 for foreground
-     * and 0 for background.
-     */
-    void    updateBackgroundModel(InputArray mask);
-    /**
-     * Retrieve the greyscale image representing the probability that each pixel is foreground given
-     * the current estimated background model. Values are 0.0 (black) to 1.0 (white).
-     * @param img The 32FC1 image representing per-pixel probabilities that the pixel is foreground.
-     */
-    void    getPosteriorImage(OutputArray img);
+    void release();
 
-protected:
     //! Total number of distinct colors to maintain in histogram.
     int     maxFeatures;
     //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
@@ -345,31 +236,25 @@ protected:
     int     quantizationLevels;
     //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
     double  backgroundPrior;
+    //! Value above which pixel is determined to be FG.
+    double  decisionThreshold;
+    //! Smoothing radius, in pixels, for cleaning up FG image.
+    int     smoothingRadius;
+    //! Perform background model update
+    bool updateBackgroundModel;
 
-    double  decisionThreshold; //!< value above which pixel is determined to be FG.
-    int     smoothingRadius;  //!< smoothing radius, in pixels, for cleaning up FG image.
+private:
+    double maxVal_;
+    double minVal_;
 
-    double maxVal, minVal;
+    cv::Size frameSize_;
+    int frameNum_;
 
-    /*
-     * General Parameters
-     */
-    int      imWidth;        //!< width of image.
-    int      imHeight;       //!< height of image.
-    size_t   numPixels;
+    cv::Mat_<int> nfeatures_;
+    cv::Mat_<int> colors_;
+    cv::Mat_<float> weights_;
 
-    unsigned int numChannels; //!< Number of channels in image.
-
-    bool    isDataInitialized;
-    //!< After general parameters are set, data structures must be initialized.
-
-    /*
-     * Data Structures
-     */
-    vector<PixelModelGMG>       pixels; //!< Probabilistic background models for each pixel in image.
-    int                         frameNum; //!< Frame number counter, used to count frames in training mode.
-    Mat                         posteriorImage;  //!< Posterior probability image.
-    Mat                         fgMaskImage;   //!< Foreground mask image.
+    cv::Mat buf_;
 };
 
 }
diff --git a/modules/video/src/bgfg_gmg.cpp b/modules/video/src/bgfg_gmg.cpp
index 163445a45b..acec3ed73a 100644
--- a/modules/video/src/bgfg_gmg.cpp
+++ b/modules/video/src/bgfg_gmg.cpp
@@ -48,12 +48,7 @@
 
 #include "precomp.hpp"
 
-using namespace std;
-
-namespace cv
-{
-
-BackgroundSubtractorGMG::BackgroundSubtractorGMG()
+cv::BackgroundSubtractorGMG::BackgroundSubtractorGMG()
 {
     /*
      * Default Parameter Values. Override with algorithm "set" method.
@@ -65,392 +60,293 @@ BackgroundSubtractorGMG::BackgroundSubtractorGMG()
     backgroundPrior = 0.8;
     decisionThreshold = 0.8;
     smoothingRadius = 7;
+    updateBackgroundModel = true;
 }
 
-void BackgroundSubtractorGMG::initializeType(InputArray _image, double min, double max)
+cv::BackgroundSubtractorGMG::~BackgroundSubtractorGMG()
 {
-    minVal = min;
-    maxVal = max;
-
-    if (minVal == maxVal)
-    {
-        CV_Error_(CV_StsBadArg,("minVal and maxVal cannot be the same."));
-    }
-
-    /*
-     * Parameter validation
-     */
-    if (maxFeatures <= 0)
-    {
-        CV_Error_(CV_StsBadArg,
-                ("maxFeatures parameter must be 1 or greater. Instead, it is %d.",maxFeatures));
-    }
-    if (learningRate < 0.0 || learningRate > 1.0)
-    {
-        CV_Error_(CV_StsBadArg,
-                ("learningRate parameter must be in the range [0.0,1.0]. Instead, it is %f.",
-                learningRate));
-    }
-    if (numInitializationFrames < 1)
-    {
-        CV_Error_(CV_StsBadArg,
-                ("numInitializationFrames must be at least 1. Instead, it is %d.",
-                        numInitializationFrames));
-    }
-    if (quantizationLevels < 1)
-    {
-        CV_Error_(CV_StsBadArg,
-                ("quantizationLevels must be at least 1 (preferably more). Instead it is %d.",
-                        quantizationLevels));
-    }
-    if (backgroundPrior < 0.0 || backgroundPrior > 1.0)
-    {
-        CV_Error_(CV_StsBadArg,
-                ("backgroundPrior must be a probability, between 0.0 and 1.0. Instead it is %f.",
-                        backgroundPrior));
-    }
-
-    /*
-     * Detect and accommodate the image depth
-     */
-    Mat image = _image.getMat();
-    numChannels = image.channels();
-
-    /*
-     * Color quantization [0 | | | | max] --> [0 | | max]
-     *  (0) Use double as intermediary to convert all types to int.
-     *  (i) Shift min to 0,
-     *  (ii) max/(num intervals) = factor.  x/factor * factor = quantized result, after integer operation.
-     */
-
-    /*
-     * Data Structure Initialization
-     */
-    imWidth = image.cols;
-    imHeight = image.rows;
-    numPixels = image.total();
-    pixels.resize(numPixels);
-    frameNum = 0;
-
-    // used to iterate through matrix of type unknown at compile time
-    //elemSize = image.elemSize();
-    //elemSize1 = image.elemSize1();
-
-    vector<PixelModelGMG>::iterator pixel;
-    vector<PixelModelGMG>::iterator pixel_end = pixels.end();
-    for (pixel = pixels.begin(); pixel != pixel_end; ++pixel)
-    {
-        pixel->setMaxFeatures(maxFeatures);
-    }
-
-    fgMaskImage = Mat::zeros(imHeight, imWidth, CV_8UC1);  // 8-bit unsigned mask. 255 for FG, 0 for BG
-    posteriorImage = Mat::zeros(imHeight, imWidth, CV_32FC1);  // float for storing probabilities. Can be viewed directly with imshow.
-    isDataInitialized = true;
 }
 
-void BackgroundSubtractorGMG::operator()(InputArray _image, OutputArray _fgmask, double newLearningRate)
+void cv::BackgroundSubtractorGMG::initialize(cv::Size frameSize, double min, double max)
 {
-    if (!isDataInitialized)
+    CV_Assert(min < max);
+    CV_Assert(maxFeatures > 0);
+    CV_Assert(learningRate >= 0.0 && learningRate <= 1.0);
+    CV_Assert(numInitializationFrames >= 1);
+    CV_Assert(quantizationLevels >= 1 && quantizationLevels <= 255);
+    CV_Assert(backgroundPrior >= 0.0 && backgroundPrior <= 1.0);
+
+    minVal_ = min;
+    maxVal_ = max;
+
+    frameSize_ = frameSize;
+    frameNum_ = 0;
+
+    nfeatures_.create(frameSize_);
+    colors_.create(frameSize_.area(), maxFeatures);
+    weights_.create(frameSize_.area(), maxFeatures);
+
+    nfeatures_.setTo(cv::Scalar::all(0));
+}
+
+namespace
+{
+    float findFeature(int color, const int* colors, const float* weights, int nfeatures)
     {
-        CV_Error(CV_StsError,"BackgroundSubstractorGMG has not been initialized. Call initialize() first.\n");
+        for (int i = 0; i < nfeatures; ++i)
+        {
+            if (color == colors[i])
+                return weights[i];
+        }
+
+        // not in histogram, so return 0.
+        return 0.0f;
     }
 
-    /*
-     * Update learning rate parameter, if desired
-     */
+    void normalizeHistogram(float* weights, int nfeatures)
+    {
+        float total = 0.0f;
+        for (int i = 0; i < nfeatures; ++i)
+            total += weights[i];
+
+        if (total != 0.0f)
+        {
+            for (int i = 0; i < nfeatures; ++i)
+                weights[i] /= total;
+        }
+    }
+
+    bool insertFeature(int color, float weight, int* colors, float* weights, int& nfeatures, int maxFeatures)
+    {
+        int idx = -1;
+        for (int i = 0; i < nfeatures; ++i)
+        {
+            if (color == colors[i])
+            {
+                // feature in histogram
+                weight += weights[i];
+                idx = i;
+                break;
+            }
+        }
+
+        if (idx >= 0)
+        {
+            // move feature to beginning of list
+
+            ::memmove(colors + 1, colors, idx * sizeof(int));
+            ::memmove(weights + 1, weights, idx * sizeof(float));
+
+            colors[0] = color;
+            weights[0] = weight;
+        }
+        else if (nfeatures == maxFeatures)
+        {
+            // discard oldest feature
+
+            ::memmove(colors + 1, colors, (nfeatures - 1) * sizeof(int));
+            ::memmove(weights + 1, weights, (nfeatures - 1) * sizeof(float));
+
+            colors[0] = color;
+            weights[0] = weight;
+        }
+        else
+        {
+            colors[nfeatures] = color;
+            weights[nfeatures] = weight;
+
+            ++nfeatures;
+
+            return true;
+        }
+
+        return false;
+    }
+}
+
+namespace
+{
+    template <int cn> struct Quantization_
+    {
+        template <typename T>
+        static inline int apply(T val, double minVal, double maxVal, int quantizationLevels)
+        {
+            int res = 0;
+            res |= static_cast<int>((val[0] - minVal) * quantizationLevels / (maxVal - minVal));
+            res |= static_cast<int>((val[1] - minVal) * quantizationLevels / (maxVal - minVal)) << 8;
+            res |= static_cast<int>((val[2] - minVal) * quantizationLevels / (maxVal - minVal)) << 16;
+            return res;
+        }
+    };
+    template <> struct Quantization_<1>
+    {
+        template <typename T>
+        static inline int apply(T val, double minVal, double maxVal, int quantizationLevels)
+        {
+            return static_cast<int>((val - minVal) * quantizationLevels / (maxVal - minVal));
+        }
+    };
+    template <typename T> struct Quantization
+    {
+        static int apply(const void* src_, int x, double minVal, double maxVal, int quantizationLevels)
+        {
+            const T* src = static_cast<const T*>(src_);
+            return Quantization_<cv::DataType<T>::channels>::apply(src[x], minVal, maxVal, quantizationLevels);
+        }
+    };
+
+    class GMG_LoopBody : public cv::ParallelLoopBody
+    {
+    public:
+        GMG_LoopBody(const cv::Mat& frame, const cv::Mat& fgmask, const cv::Mat_<int>& nfeatures, const cv::Mat_<int>& colors, const cv::Mat_<float>& weights,
+                     int maxFeatures, double learningRate, int numInitializationFrames, int quantizationLevels, double backgroundPrior, double decisionThreshold,
+                     double maxVal, double minVal, int frameNum, bool updateBackgroundModel) :
+            frame_(frame), fgmask_(fgmask), nfeatures_(nfeatures), colors_(colors), weights_(weights),
+            maxFeatures_(maxFeatures), learningRate_(learningRate), numInitializationFrames_(numInitializationFrames),
+            quantizationLevels_(quantizationLevels), backgroundPrior_(backgroundPrior), decisionThreshold_(decisionThreshold),
+            maxVal_(maxVal), minVal_(minVal), frameNum_(frameNum), updateBackgroundModel_(updateBackgroundModel)
+        {
+        }
+
+        void operator() (const cv::Range& range) const;
+
+    private:
+        const cv::Mat frame_;
+
+        mutable cv::Mat_<uchar> fgmask_;
+
+        mutable cv::Mat_<int> nfeatures_;
+        mutable cv::Mat_<int> colors_;
+        mutable cv::Mat_<float> weights_;
+
+        int     maxFeatures_;
+        double  learningRate_;
+        int     numInitializationFrames_;
+        int     quantizationLevels_;
+        double  backgroundPrior_;
+        double  decisionThreshold_;
+        bool updateBackgroundModel_;
+
+        double maxVal_;
+        double minVal_;
+        int frameNum_;
+    };
+
+    void GMG_LoopBody::operator() (const cv::Range& range) const
+    {
+        typedef int (*func_t)(const void* src_, int x, double minVal, double maxVal, int quantizationLevels);
+        static const func_t funcs[6][4] =
+        {
+            {Quantization<uchar>::apply, 0, Quantization<cv::Vec3b>::apply, Quantization<cv::Vec4b>::apply},
+            {0,0,0,0},
+            {Quantization<ushort>::apply, 0, Quantization<cv::Vec3w>::apply, Quantization<cv::Vec4w>::apply},
+            {0,0,0,0},
+            {0,0,0,0},
+            {Quantization<float>::apply, 0, Quantization<cv::Vec3f>::apply, Quantization<cv::Vec4f>::apply},
+        };
+
+        const func_t func = funcs[frame_.depth()][frame_.channels() - 1];
+        CV_Assert(func != 0);
+
+        for (int y = range.start, featureIdx = y * frame_.cols; y < range.end; ++y)
+        {
+            const uchar* frame_row = frame_.ptr(y);
+            int* nfeatures_row = nfeatures_[y];
+            uchar* fgmask_row = fgmask_[y];
+
+            for (int x = 0; x < frame_.cols; ++x, ++featureIdx)
+            {
+                int nfeatures = nfeatures_row[x];
+                int* colors = colors_[featureIdx];
+                float* weights = weights_[featureIdx];
+
+                int newFeatureColor = func(frame_row, x, minVal_, maxVal_, quantizationLevels_);
+
+                bool isForeground = false;
+
+                if (frameNum_ >= numInitializationFrames_)
+                {
+                    // typical operation
+
+                    const double weight = findFeature(newFeatureColor, colors, weights, nfeatures);
+
+                    // see Godbehere, Matsukawa, Goldberg (2012) for reasoning behind this implementation of Bayes rule
+                    const double posterior = (weight * backgroundPrior_) / (weight * backgroundPrior_ + (1.0 - weight) * (1.0 - backgroundPrior_));
+
+                    isForeground = ((1.0 - posterior) > decisionThreshold_);
+
+                    // update histogram.
+
+                    if (updateBackgroundModel_)
+                    {
+                        for (int i = 0; i < nfeatures; ++i)
+                            weights[i] *= 1.0f - learningRate_;
+
+                        bool inserted = insertFeature(newFeatureColor, learningRate_, colors, weights, nfeatures, maxFeatures_);
+
+                        if (inserted)
+                        {
+                            normalizeHistogram(weights, nfeatures);
+                            nfeatures_row[x] = nfeatures;
+                        }
+                    }
+                }
+                else if (updateBackgroundModel_)
+                {
+                    // training-mode update
+
+                    insertFeature(newFeatureColor, 1.0f, colors, weights, nfeatures, maxFeatures_);
+
+                    if (frameNum_ == numInitializationFrames_ - 1)
+                        normalizeHistogram(weights, nfeatures);
+                }
+
+                fgmask_row[x] = (uchar)(-isForeground);
+            }
+        }
+    }
+}
+
+void cv::BackgroundSubtractorGMG::operator ()(InputArray _frame, OutputArray _fgmask, double newLearningRate)
+{
+    cv::Mat frame = _frame.getMat();
+
+    CV_Assert(frame.depth() == CV_8U || frame.depth() == CV_16U || frame.depth() == CV_32F);
+    CV_Assert(frame.channels() == 1 || frame.channels() == 3 || frame.channels() == 4);
+
     if (newLearningRate != -1.0)
     {
-        if (newLearningRate < 0.0 || newLearningRate > 1.0)
-        {
-            CV_Error(CV_StsOutOfRange,"Learning rate for Operator () must be between 0.0 and 1.0.\n");
-        }
-        this->learningRate = newLearningRate;
+        CV_Assert(newLearningRate >= 0.0 && newLearningRate <= 1.0);
+        learningRate = newLearningRate;
     }
 
-    Mat image = _image.getMat();
+    if (frame.size() != frameSize_)
+        initialize(frame.size(), 0.0, frame.depth() == CV_8U ? 255.0 : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0);
 
-    _fgmask.create(imHeight,imWidth,CV_8U);
-    fgMaskImage = _fgmask.getMat();  // 8-bit unsigned mask. 255 for FG, 0 for BG
+    _fgmask.create(frameSize_, CV_8UC1);
+    cv::Mat fgmask = _fgmask.getMat();
 
-    /*
-     * Iterate over pixels in image
-     */
-    // grab data at each pixel (1,2,3 channels, int, float, etc.)
-    // grab data as an array of bytes. Then, send that array to a function that reads data into vector of appropriate types... and quantizing... before saving as a feature, which is a vector of flexitypes, so code can be portable.
-    // multiple channels do have sequential storage, use mat::elemSize() and mat::elemSize1()
-    vector<PixelModelGMG>::iterator pixel;
-    vector<PixelModelGMG>::iterator pixel_end = pixels.end();
-    size_t i;
-    //#pragma omp parallel
-    for (i = 0, pixel=pixels.begin(); pixel != pixel_end; ++i,++pixel)
+    GMG_LoopBody body(frame, fgmask, nfeatures_, colors_, weights_,
+                      maxFeatures, learningRate, numInitializationFrames, quantizationLevels, backgroundPrior, decisionThreshold,
+                      maxVal_, minVal_, frameNum_, updateBackgroundModel);
+    cv::parallel_for_(cv::Range(0, frame.rows), body);
+
+    if (smoothingRadius > 0)
     {
-        HistogramFeatureGMG newFeature;
-        newFeature.color.clear();
-        int irow = int(i / imWidth);
-        int icol = i % imWidth;
-        for (size_t c = 0; c < numChannels; ++c)
-        {
-            /*
-             * Perform quantization. in each channel. (color-min)*(levels)/(max-min).
-             * Shifts min to 0 and scales, finally casting to an int.
-             */
-            double color;
-            switch(image.depth())
-            {
-                case CV_8U: color = image.ptr<uchar>(irow)[icol * numChannels + c]; break;
-                case CV_8S: color = image.ptr<schar>(irow)[icol * numChannels + c]; break;
-                case CV_16U: color = image.ptr<ushort>(irow)[icol * numChannels + c]; break;
-                case CV_16S: color = image.ptr<short>(irow)[icol * numChannels + c]; break;
-                case CV_32S: color = image.ptr<int>(irow)[icol * numChannels + c]; break;
-                case CV_32F: color = image.ptr<float>(irow)[icol * numChannels + c]; break;
-                case CV_64F: color = image.ptr<double>(irow)[icol * numChannels + c]; break;
-                default: color = 0; break;
-            }
-            size_t quantizedColor = (size_t)((color-minVal)*quantizationLevels/(maxVal-minVal));
-            newFeature.color.push_back(quantizedColor);
-        }
-        // now that the feature is ready for use, put it in the histogram
-
-        if (frameNum > numInitializationFrames)  // typical operation
-        {
-            newFeature.likelihood = float(learningRate);
-            /*
-             * (1) Query histogram to find posterior probability of feature under model.
-             */
-            float likelihood = (float)pixel->getLikelihood(newFeature);
-
-            // see Godbehere, Matsukawa, Goldberg (2012) for reasoning behind this implementation of Bayes rule
-            float posterior = float((likelihood*backgroundPrior)/(likelihood*backgroundPrior+(1-likelihood)*(1-backgroundPrior)));
-
-            /*
-             * (2) feed posterior probability into the posterior image
-             */
-            int row,col;
-            col = i%imWidth;
-            row = int(i-col)/imWidth;
-            posteriorImage.at<float>(row,col) = (1.0f-posterior);
-        }
-        pixel->setLastObservedFeature(newFeature);
-    }
-    /*
-     * (3) Perform filtering and threshold operations to yield final mask image.
-     *
-     * 2 options. First is morphological open/close as before. Second is "median filtering" which Jon Barron says is good to remove noise
-     */
-    Mat thresholdedPosterior;
-    threshold(posteriorImage,thresholdedPosterior,decisionThreshold,1.0,THRESH_BINARY);
-    thresholdedPosterior.convertTo(fgMaskImage,CV_8U,255);  // convert image to integer space for further filtering and mask creation
-    medianBlur(fgMaskImage,fgMaskImage,smoothingRadius);
-
-    fgMaskImage.copyTo(_fgmask);
-
-    ++frameNum;  // keep track of how many frames we have processed
-}
-
-void BackgroundSubtractorGMG::getPosteriorImage(OutputArray _img)
-{
-    _img.create(Size(imWidth,imHeight),CV_32F);
-    Mat img = _img.getMat();
-    posteriorImage.copyTo(img);
-}
-
-void BackgroundSubtractorGMG::updateBackgroundModel(InputArray _mask)
-{
-    CV_Assert(_mask.size() == Size(imWidth,imHeight));  // mask should be same size as image
-
-    Mat maskImg = _mask.getMat();
-//#pragma omp parallel
-    for (int i = 0; i < imHeight; ++i)
-    {
-//#pragma omp parallel
-        for (int j = 0; j < imWidth; ++j)
-        {
-            if (frameNum <= numInitializationFrames + 1)
-            {
-                // insert previously observed feature into the histogram. -1.0 parameter indicates training.
-                pixels[i*imWidth+j].insertFeature(-1.0);
-                if (frameNum >= numInitializationFrames+1)  // training is done, normalize
-                {
-                    pixels[i*imWidth+j].normalizeHistogram();
-                }
-            }
-            // if mask is 0, pixel is identified as a background pixel, so update histogram.
-            else if (maskImg.at<uchar>(i,j) == 0)
-            {
-                pixels[i*imWidth+j].insertFeature(learningRate); // updates the histogram for the next iteration.
-            }
-        }
-    }
-}
-
-BackgroundSubtractorGMG::~BackgroundSubtractorGMG()
-{
-
-}
-
-BackgroundSubtractorGMG::PixelModelGMG::PixelModelGMG()
-{
-    numFeatures = 0;
-    maxFeatures = 0;
-}
-
-BackgroundSubtractorGMG::PixelModelGMG::~PixelModelGMG()
-{
-
-}
-
-void BackgroundSubtractorGMG::PixelModelGMG::setLastObservedFeature(HistogramFeatureGMG f)
-{
-    this->lastObservedFeature = f;
-}
-
-double BackgroundSubtractorGMG::PixelModelGMG::getLikelihood(BackgroundSubtractorGMG::HistogramFeatureGMG f)
-{
-    std::list<HistogramFeatureGMG>::iterator feature = histogram.begin();
-    std::list<HistogramFeatureGMG>::iterator feature_end = histogram.end();
-
-    for (feature = histogram.begin(); feature != feature_end; ++feature)
-    {
-        // comparing only feature color, not likelihood. See equality operator for HistogramFeatureGMG
-        if (f == *feature)
-        {
-            return feature->likelihood;
-        }
+        cv::medianBlur(fgmask, buf_, smoothingRadius);
+        cv::swap(fgmask, buf_);
     }
 
-    return 0.0; // not in histogram, so return 0.
+    // keep track of how many frames we have processed
+    ++frameNum_;
 }
 
-void BackgroundSubtractorGMG::PixelModelGMG::insertFeature(double learningRate)
+void cv::BackgroundSubtractorGMG::release()
 {
+    frameSize_ = cv::Size();
 
-    std::list<HistogramFeatureGMG>::iterator feature;
-    std::list<HistogramFeatureGMG>::iterator swap_end;
-    std::list<HistogramFeatureGMG>::iterator last_feature = histogram.end();
-    /*
-     * If feature is in histogram already, add the weights, and move feature to front.
-     * If there are too many features, remove the end feature and push new feature to beginning
-     */
-    if (learningRate == -1.0) // then, this is a training-mode update.
-    {
-        /*
-         * (1) Check if feature already represented in histogram
-         */
-        lastObservedFeature.likelihood = 1.0;
-
-        for (feature = histogram.begin(); feature != last_feature; ++feature)
-        {
-            if (lastObservedFeature == *feature)  // feature in histogram
-            {
-                feature->likelihood += lastObservedFeature.likelihood;
-                // now, move feature to beginning of list and break the loop
-                HistogramFeatureGMG tomove = *feature;
-                histogram.erase(feature);
-                histogram.push_front(tomove);
-                return;
-            }
-        }
-        if (numFeatures == maxFeatures)
-        {
-            histogram.pop_back(); // discard oldest feature
-            histogram.push_front(lastObservedFeature);
-        }
-        else
-        {
-            histogram.push_front(lastObservedFeature);
-            ++numFeatures;
-        }
-    }
-    else
-    {
-        /*
-         * (1) Scale entire histogram by scaling factor
-         * (2) Scale input feature.
-         * (3) Check if feature already represented. If so, simply add.
-         * (4) If feature is not represented, remove old feature, distribute weight evenly among existing features, add in new feature.
-         */
-        *this *= float(1.0-learningRate);
-        lastObservedFeature.likelihood = float(learningRate);
-
-        for (feature = histogram.begin(); feature != last_feature; ++feature)
-        {
-            if (lastObservedFeature == *feature)  // feature in histogram
-            {
-                lastObservedFeature.likelihood += feature->likelihood;
-                histogram.erase(feature);
-                histogram.push_front(lastObservedFeature);
-                return;  // done with the update.
-            }
-        }
-        if (numFeatures == maxFeatures)
-        {
-            histogram.pop_back(); // discard oldest feature
-            histogram.push_front(lastObservedFeature);
-            normalizeHistogram();
-        }
-        else
-        {
-            histogram.push_front(lastObservedFeature);
-            ++numFeatures;
-        }
-    }
+    nfeatures_.release();
+    colors_.release();
+    weights_.release();
+    buf_.release();
 }
-
-BackgroundSubtractorGMG::PixelModelGMG& BackgroundSubtractorGMG::PixelModelGMG::operator *=(const float &rhs)
-{
-    /*
-     * Used to scale histogram by a constant factor
-     */
-    list<HistogramFeatureGMG>::iterator feature;
-    list<HistogramFeatureGMG>::iterator last_feature = histogram.end();
-    for (feature = histogram.begin(); feature != last_feature; ++feature)
-    {
-        feature->likelihood *= rhs;
-    }
-    return *this;
-}
-
-void BackgroundSubtractorGMG::PixelModelGMG::normalizeHistogram()
-{
-    /*
-     * First, calculate the total weight in the histogram
-     */
-    list<HistogramFeatureGMG>::iterator feature;
-    list<HistogramFeatureGMG>::iterator last_feature = histogram.end();
-    double total = 0.0;
-    for (feature = histogram.begin(); feature != last_feature; ++feature)
-    {
-        total += feature->likelihood;
-    }
-
-    /*
-     * Then, if weight is not 0, divide every feature by the total likelihood to re-normalize.
-     */
-    for (feature = histogram.begin(); feature != last_feature; ++feature)
-    {
-        if (total != 0.0)
-            feature->likelihood = float(feature->likelihood / total);
-    }
-}
-
-bool BackgroundSubtractorGMG::HistogramFeatureGMG::operator ==(HistogramFeatureGMG &rhs)
-{
-    CV_Assert(color.size() == rhs.color.size());
-
-    std::vector<size_t>::iterator color_a;
-    std::vector<size_t>::iterator color_b;
-    std::vector<size_t>::iterator color_a_end = this->color.end();
-    std::vector<size_t>::iterator color_b_end = rhs.color.end();
-    for (color_a = color.begin(),color_b =rhs.color.begin();color_a!=color_a_end;++color_a,++color_b)
-    {
-        if (*color_a != *color_b)
-        {
-            return false;
-        }
-    }
-    return true;
-}
-
-
-}
-
diff --git a/modules/video/src/video_init.cpp b/modules/video/src/video_init.cpp
index def0cd7c83..0f3cec144c 100644
--- a/modules/video/src/video_init.cpp
+++ b/modules/video/src/video_init.cpp
@@ -78,7 +78,9 @@ CV_INIT_ALGORITHM(BackgroundSubtractorGMG, "BackgroundSubtractor.GMG",
                   obj.info()->addParam(obj, "smoothingRadius", obj.smoothingRadius,false,0,0,
                                        "Radius of smoothing kernel to filter noise from FG mask image.");
                   obj.info()->addParam(obj, "decisionThreshold", obj.decisionThreshold,false,0,0,
-                                       "Threshold for FG decision rule. Pixel is FG if posterior probability exceeds threshold."));
+                                       "Threshold for FG decision rule. Pixel is FG if posterior probability exceeds threshold.");
+                  obj.info()->addParam(obj, "updateBackgroundModel", obj.updateBackgroundModel,false,0,0,
+                                       "Perform background model update."));
 
 bool initModule_video(void)
 {
diff --git a/modules/video/test/test_backgroundsubtractor_gbh.cpp b/modules/video/test/test_backgroundsubtractor_gbh.cpp
index 8486e541cb..c858e4a093 100644
--- a/modules/video/test/test_backgroundsubtractor_gbh.cpp
+++ b/modules/video/test/test_backgroundsubtractor_gbh.cpp
@@ -115,43 +115,43 @@ void CV_BackgroundSubtractorTest::run(int)
         {
             rng.fill(simImage,RNG::UNIFORM,(unsigned char)(minuc/2+maxuc/2),maxuc);
             if (i == 0)
-                fgbg->initializeType(simImage,minuc,maxuc);
+                fgbg->initialize(simImage.size(),minuc,maxuc);
         }
         else if (type == CV_8S)
         {
             rng.fill(simImage,RNG::UNIFORM,(char)(minc/2+maxc/2),maxc);
             if (i==0)
-                fgbg->initializeType(simImage,minc,maxc);
+                fgbg->initialize(simImage.size(),minc,maxc);
         }
         else if (type == CV_16U)
         {
             rng.fill(simImage,RNG::UNIFORM,(unsigned int)(minui/2+maxui/2),maxui);
             if (i==0)
-                fgbg->initializeType(simImage,minui,maxui);
+                fgbg->initialize(simImage.size(),minui,maxui);
         }
         else if (type == CV_16S)
         {
             rng.fill(simImage,RNG::UNIFORM,(int)(mini/2+maxi/2),maxi);
             if (i==0)
-                fgbg->initializeType(simImage,mini,maxi);
+                fgbg->initialize(simImage.size(),mini,maxi);
         }
         else if (type == CV_32F)
         {
             rng.fill(simImage,RNG::UNIFORM,(float)(minf/2.0+maxf/2.0),maxf);
             if (i==0)
-                fgbg->initializeType(simImage,minf,maxf);
+                fgbg->initialize(simImage.size(),minf,maxf);
         }
         else if (type == CV_32S)
         {
             rng.fill(simImage,RNG::UNIFORM,(long int)(minli/2+maxli/2),maxli);
             if (i==0)
-                fgbg->initializeType(simImage,minli,maxli);
+                fgbg->initialize(simImage.size(),minli,maxli);
         }
         else if (type == CV_64F)
         {
             rng.fill(simImage,RNG::UNIFORM,(double)(mind/2.0+maxd/2.0),maxd);
             if (i==0)
-                fgbg->initializeType(simImage,mind,maxd);
+                fgbg->initialize(simImage.size(),mind,maxd);
         }
 
         /**
@@ -159,7 +159,6 @@ void CV_BackgroundSubtractorTest::run(int)
          */
         (*fgbg)(simImage,fgmask);
         Mat fullbg = Mat::zeros(simImage.rows, simImage.cols, CV_8U);
-        fgbg->updateBackgroundModel(fullbg);
 
         //! fgmask should be entirely background during training
         code = cvtest::cmpEps2( ts, fgmask, fullbg, 0, false, "The training foreground mask" );
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index 505cd6e3a1..799f34f100 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -6,6 +6,7 @@
 add_subdirectory(c)
 add_subdirectory(cpp)
 add_subdirectory(gpu)
+add_subdirectory(ocl)
 
 if(ANDROID AND BUILD_ANDROID_EXAMPLES)
   add_subdirectory(android)
diff --git a/samples/cpp/bgfg_gmg.cpp b/samples/cpp/bgfg_gmg.cpp
index b6a2852042..3d2da44ab2 100644
--- a/samples/cpp/bgfg_gmg.cpp
+++ b/samples/cpp/bgfg_gmg.cpp
@@ -7,91 +7,76 @@
 
 #include <opencv2/opencv.hpp>
 #include <iostream>
-#include <sstream>
 
 using namespace cv;
 
 static void help()
 {
-	std::cout <<
-	"\nA program demonstrating the use and capabilities of a particular BackgroundSubtraction\n"
-	"algorithm described in A. Godbehere, A. Matsukawa, K. Goldberg, \n"
-	"\"Visual Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive\n"
-	"Audio Art Installation\", American Control Conference, 2012, used in an interactive\n"
-	"installation at the Contemporary Jewish Museum in San Francisco, CA from March 31 through\n"
-	"July 31, 2011.\n"
-	"Call:\n"
-	"./BackgroundSubtractorGMG_sample\n"
-	"Using OpenCV version " << CV_VERSION << "\n"<<std::endl;
+    std::cout <<
+    "\nA program demonstrating the use and capabilities of a particular BackgroundSubtraction\n"
+    "algorithm described in A. Godbehere, A. Matsukawa, K. Goldberg, \n"
+    "\"Visual Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive\n"
+    "Audio Art Installation\", American Control Conference, 2012, used in an interactive\n"
+    "installation at the Contemporary Jewish Museum in San Francisco, CA from March 31 through\n"
+    "July 31, 2011.\n"
+    "Call:\n"
+    "./BackgroundSubtractorGMG_sample\n"
+    "Using OpenCV version " << CV_VERSION << "\n"<<std::endl;
 }
 
 int main(int argc, char** argv)
 {
-	help();
-	setUseOptimized(true);
-	setNumThreads(8);
+    help();
 
-	Ptr<BackgroundSubtractorGMG> fgbg = Algorithm::create<BackgroundSubtractorGMG>("BackgroundSubtractor.GMG");
-	if (fgbg == NULL)
-	{
-		CV_Error(CV_StsError,"Failed to create Algorithm\n");
-	}
-	fgbg->set("smoothingRadius",7);
-	fgbg->set("decisionThreshold",0.7);
+    initModule_video();
+    setUseOptimized(true);
+    setNumThreads(8);
 
-	VideoCapture cap;
-    if( argc > 1 )
+    Ptr<BackgroundSubtractorGMG> fgbg = Algorithm::create<BackgroundSubtractorGMG>("BackgroundSubtractor.GMG");
+    if (fgbg.empty())
+    {
+        std::cerr << "Failed to create BackgroundSubtractor.GMG Algorithm." << std::endl;
+        return -1;
+    }
+
+    fgbg->set("initializationFrames", 20);
+    fgbg->set("decisionThreshold", 0.7);
+
+    VideoCapture cap;
+    if (argc > 1)
         cap.open(argv[1]);
     else
         cap.open(0);
-    
-	if (!cap.isOpened())
-	{
-        std::cout << "error: cannot read video. Try moving video file to sample directory.\n";
-		return -1;
-	}
 
-	Mat img, downimg, downimg2, fgmask, upfgmask, posterior, upposterior;
+    if (!cap.isOpened())
+    {
+        std::cerr << "Cannot read video. Try moving video file to sample directory." << std::endl;
+        return -1;
+    }
 
-	bool first = true;
-	namedWindow("posterior");
-	namedWindow("fgmask");
-	namedWindow("FG Segmentation");
-	int i = 0;
-	for (;;)
-	{
-		std::stringstream txt;
-		txt << "frame: ";
-		txt << i++;
+    Mat frame, fgmask, segm;
 
-		cap >> img;
-		putText(img,txt.str(),Point(20,40),FONT_HERSHEY_SIMPLEX,0.8,Scalar(1.0,0.0,0.0));
+    namedWindow("FG Segmentation", WINDOW_NORMAL);
 
-		resize(img,downimg,Size(160,120),0,0,INTER_NEAREST);   // Size(cols, rows) or Size(width,height)
-		if (first)
-		{
-			fgbg->initializeType(downimg,0,255);
-			first = false;
-		}
-		if (img.empty())
-		{
-			return 0;
-		}
-		(*fgbg)(downimg,fgmask);
-		fgbg->updateBackgroundModel(Mat::zeros(120,160,CV_8U));
-		fgbg->getPosteriorImage(posterior);
-		resize(fgmask,upfgmask,Size(640,480),0,0,INTER_NEAREST);
-		Mat coloredFG = Mat::zeros(480,640,CV_8UC3);
-		coloredFG.setTo(Scalar(100,100,0),upfgmask);
+    for (;;)
+    {
+        cap >> frame;
+
+        if (frame.empty())
+            break;
+
+        (*fgbg)(frame, fgmask);
+
+        frame.copyTo(segm);
+        add(frame, Scalar(100, 100, 0), segm, fgmask);
+
+        imshow("FG Segmentation", segm);
 
-		resize(posterior,upposterior,Size(640,480),0,0,INTER_NEAREST);
-		imshow("posterior",upposterior);
-		imshow("fgmask",upfgmask);
-        resize(img, downimg2, Size(640, 480),0,0,INTER_LINEAR);
-		imshow("FG Segmentation",downimg2 + coloredFG);
         int c = waitKey(30);
-        if( c == 'q' || c == 'Q' || (c & 255) == 27 )
-			break;
-	}
+        if (c == 'q' || c == 'Q' || (c & 255) == 27)
+            break;
+    }
+
+    return 0;
 }
 
diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp
index 220d46604a..1e461174df 100644
--- a/samples/cpp/stitching_detailed.cpp
+++ b/samples/cpp/stitching_detailed.cpp
@@ -276,7 +276,9 @@ static int parseCmdArgs(int argc, char** argv)
             if (string(argv[i + 1]) == "no" ||
                 string(argv[i + 1]) == "voronoi" ||
                 string(argv[i + 1]) == "gc_color" ||
-                string(argv[i + 1]) == "gc_colorgrad")
+                string(argv[i + 1]) == "gc_colorgrad" ||
+                string(argv[i + 1]) == "dp_color" ||
+                string(argv[i + 1]) == "dp_colorgrad")
                 seam_find_type = argv[i + 1];
             else
             {
@@ -612,6 +614,10 @@ int main(int argc, char* argv[])
 #endif
             seam_finder = new detail::GraphCutSeamFinder(GraphCutSeamFinderBase::COST_COLOR_GRAD);
     }
+    else if (seam_find_type == "dp_color")
+        seam_finder = new detail::DpSeamFinder(DpSeamFinder::COLOR);
+    else if (seam_find_type == "dp_colorgrad")
+        seam_finder = new detail::DpSeamFinder(DpSeamFinder::COLOR_GRAD);
     if (seam_finder.empty())
     {
         cout << "Can't create the following seam finder '" << seam_find_type << "'\n";
diff --git a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
index 42663e0a56..7597a1f265 100644
--- a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+++ b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
@@ -30,11 +30,11 @@ struct BufferMSSIM                                     // Optimized GPU versions
     gpu::GpuMat I1_2, I2_2, I1_I2;
     vector<gpu::GpuMat> vI1, vI2;
 
-    gpu::GpuMat mu1, mu2; 
-    gpu::GpuMat mu1_2, mu2_2, mu1_mu2; 
+    gpu::GpuMat mu1, mu2;
+    gpu::GpuMat mu1_2, mu2_2, mu1_mu2;
 
-    gpu::GpuMat sigma1_2, sigma2_2, sigma12; 
-    gpu::GpuMat t3; 
+    gpu::GpuMat sigma1_2, sigma2_2, sigma12;
+    gpu::GpuMat t3;
 
     gpu::GpuMat ssim_map;
 
@@ -56,7 +56,7 @@ void help()
 
 int main(int argc, char *argv[])
 {
-    help(); 
+    help();
     Mat I1 = imread(argv[1]);           // Read the two images
     Mat I2 = imread(argv[2]);
 
@@ -69,13 +69,13 @@ int main(int argc, char *argv[])
     BufferPSNR bufferPSNR;
     BufferMSSIM bufferMSSIM;
 
-    int TIMES; 
-    stringstream sstr(argv[3]); 
+    int TIMES;
+    stringstream sstr(argv[3]);
     sstr >> TIMES;
     double time, result;
 
     //------------------------------- PSNR CPU ----------------------------------------------------
-    time = (double)getTickCount();    
+    time = (double)getTickCount();
 
     for (int i = 0; i < TIMES; ++i)
         result = getPSNR(I1,I2);
@@ -84,10 +84,10 @@ int main(int argc, char *argv[])
     time /= TIMES;
 
     cout << "Time of PSNR CPU (averaged for " << TIMES << " runs): " << time << " milliseconds."
-        << " With result of: " <<  result << endl; 
+        << " With result of: " <<  result << endl;
 
     //------------------------------- PSNR GPU ----------------------------------------------------
-    time = (double)getTickCount();    
+    time = (double)getTickCount();
 
     for (int i = 0; i < TIMES; ++i)
         result = getPSNR_GPU(I1,I2);
@@ -96,7 +96,7 @@ int main(int argc, char *argv[])
     time /= TIMES;
 
     cout << "Time of PSNR GPU (averaged for " << TIMES << " runs): " << time << " milliseconds."
-        << " With result of: " <<  result << endl; 
+        << " With result of: " <<  result << endl;
 
     //------------------------------- PSNR GPU Optimized--------------------------------------------
     time = (double)getTickCount();                                  // Initial call
@@ -105,20 +105,20 @@ int main(int argc, char *argv[])
     cout << "Initial call GPU optimized:              " << time  <<" milliseconds."
         << " With result of: " << result << endl;
 
-    time = (double)getTickCount();    
+    time = (double)getTickCount();
     for (int i = 0; i < TIMES; ++i)
         result = getPSNR_GPU_optimized(I1, I2, bufferPSNR);
 
     time = 1000*((double)getTickCount() - time)/getTickFrequency();
     time /= TIMES;
 
-    cout << "Time of PSNR GPU OPTIMIZED ( / " << TIMES << " runs): " << time 
-        << " milliseconds." << " With result of: " <<  result << endl << endl; 
+    cout << "Time of PSNR GPU OPTIMIZED ( / " << TIMES << " runs): " << time
+        << " milliseconds." << " With result of: " <<  result << endl << endl;
 
 
     //------------------------------- SSIM CPU -----------------------------------------------------
     Scalar x;
-    time = (double)getTickCount();    
+    time = (double)getTickCount();
 
     for (int i = 0; i < TIMES; ++i)
         x = getMSSIM(I1,I2);
@@ -127,10 +127,10 @@ int main(int argc, char *argv[])
     time /= TIMES;
 
     cout << "Time of MSSIM CPU (averaged for " << TIMES << " runs): " << time << " milliseconds."
-        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl; 
+        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl;
 
     //------------------------------- SSIM GPU -----------------------------------------------------
-    time = (double)getTickCount();    
+    time = (double)getTickCount();
 
     for (int i = 0; i < TIMES; ++i)
         x = getMSSIM_GPU(I1,I2);
@@ -139,16 +139,16 @@ int main(int argc, char *argv[])
     time /= TIMES;
 
     cout << "Time of MSSIM GPU (averaged for " << TIMES << " runs): " << time << " milliseconds."
-        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl; 
+        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl;
 
     //------------------------------- SSIM GPU Optimized--------------------------------------------
-    time = (double)getTickCount();    
+    time = (double)getTickCount();
     x = getMSSIM_GPU_optimized(I1,I2, bufferMSSIM);
     time = 1000*((double)getTickCount() - time)/getTickFrequency();
     cout << "Time of MSSIM GPU Initial Call            " << time << " milliseconds."
-        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl; 
+        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl;
 
-    time = (double)getTickCount();    
+    time = (double)getTickCount();
 
     for (int i = 0; i < TIMES; ++i)
         x = getMSSIM_GPU_optimized(I1,I2, bufferMSSIM);
@@ -157,14 +157,14 @@ int main(int argc, char *argv[])
     time /= TIMES;
 
     cout << "Time of MSSIM GPU OPTIMIZED ( / " << TIMES << " runs): " << time << " milliseconds."
-        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl << endl; 
+        << " With result of B" << x.val[0] << " G" << x.val[1] << " R" << x.val[2] << endl << endl;
     return 0;
 }
 
 
 double getPSNR(const Mat& I1, const Mat& I2)
 {
-    Mat s1; 
+    Mat s1;
     absdiff(I1, I2, s1);       // |I1 - I2|
     s1.convertTo(s1, CV_32F);  // cannot make a square on 8 bits
     s1 = s1.mul(s1);           // |I1 - I2|^2
@@ -186,7 +186,7 @@ double getPSNR(const Mat& I1, const Mat& I2)
 
 
 double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b)
-{    
+{
     b.gI1.upload(I1);
     b.gI2.upload(I2);
 
@@ -210,7 +210,7 @@ double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b)
 
 double getPSNR_GPU(const Mat& I1, const Mat& I2)
 {
-    gpu::GpuMat gI1, gI2, gs, t1,t2; 
+    gpu::GpuMat gI1, gI2, gs, t1,t2;
 
     gI1.upload(I1);
     gI2.upload(I2);
@@ -218,7 +218,7 @@ double getPSNR_GPU(const Mat& I1, const Mat& I2)
     gI1.convertTo(t1, CV_32F);
     gI2.convertTo(t2, CV_32F);
 
-    gpu::absdiff(t1.reshape(1), t2.reshape(1), gs); 
+    gpu::absdiff(t1.reshape(1), t2.reshape(1), gs);
     gpu::multiply(gs, gs, gs);
 
     Scalar s = gpu::sum(gs);
@@ -235,14 +235,14 @@ double getPSNR_GPU(const Mat& I1, const Mat& I2)
 }
 
 Scalar getMSSIM( const Mat& i1, const Mat& i2)
-{ 
+{
     const double C1 = 6.5025, C2 = 58.5225;
     /***************************** INITS **********************************/
     int d     = CV_32F;
 
-    Mat I1, I2; 
+    Mat I1, I2;
     i1.convertTo(I1, d);           // cannot calculate on one byte large values
-    i2.convertTo(I2, d); 
+    i2.convertTo(I2, d);
 
     Mat I2_2   = I2.mul(I2);        // I2^2
     Mat I1_2   = I1.mul(I1);        // I1^2
@@ -254,11 +254,11 @@ Scalar getMSSIM( const Mat& i1, const Mat& i2)
     GaussianBlur(I1, mu1, Size(11, 11), 1.5);
     GaussianBlur(I2, mu2, Size(11, 11), 1.5);
 
-    Mat mu1_2   =   mu1.mul(mu1);    
-    Mat mu2_2   =   mu2.mul(mu2); 
+    Mat mu1_2   =   mu1.mul(mu1);
+    Mat mu2_2   =   mu2.mul(mu2);
     Mat mu1_mu2 =   mu1.mul(mu2);
 
-    Mat sigma1_2, sigma2_2, sigma12; 
+    Mat sigma1_2, sigma2_2, sigma12;
 
     GaussianBlur(I1_2, sigma1_2, Size(11, 11), 1.5);
     sigma1_2 -= mu1_2;
@@ -270,28 +270,28 @@ Scalar getMSSIM( const Mat& i1, const Mat& i2)
     sigma12 -= mu1_mu2;
 
     ///////////////////////////////// FORMULA ////////////////////////////////
-    Mat t1, t2, t3; 
+    Mat t1, t2, t3;
 
-    t1 = 2 * mu1_mu2 + C1; 
-    t2 = 2 * sigma12 + C2; 
+    t1 = 2 * mu1_mu2 + C1;
+    t2 = 2 * sigma12 + C2;
     t3 = t1.mul(t2);              // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 
-    t1 = mu1_2 + mu2_2 + C1; 
-    t2 = sigma1_2 + sigma2_2 + C2;     
+    t1 = mu1_2 + mu2_2 + C1;
+    t2 = sigma1_2 + sigma2_2 + C2;
     t1 = t1.mul(t2);               // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
 
     Mat ssim_map;
     divide(t3, t1, ssim_map);      // ssim_map =  t3./t1;
 
     Scalar mssim = mean( ssim_map ); // mssim = average of ssim map
-    return mssim; 
+    return mssim;
 }
 
 Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2)
-{ 
+{
     const float C1 = 6.5025f, C2 = 58.5225f;
     /***************************** INITS **********************************/
-    gpu::GpuMat gI1, gI2, gs1, t1,t2; 
+    gpu::GpuMat gI1, gI2, gs1, t1,t2;
 
     gI1.upload(i1);
     gI2.upload(i2);
@@ -299,14 +299,14 @@ Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2)
     gI1.convertTo(t1, CV_MAKE_TYPE(CV_32F, gI1.channels()));
     gI2.convertTo(t2, CV_MAKE_TYPE(CV_32F, gI2.channels()));
 
-    vector<gpu::GpuMat> vI1, vI2; 
+    vector<gpu::GpuMat> vI1, vI2;
     gpu::split(t1, vI1);
     gpu::split(t2, vI2);
     Scalar mssim;
 
     for( int i = 0; i < gI1.channels(); ++i )
     {
-        gpu::GpuMat I2_2, I1_2, I1_I2; 
+        gpu::GpuMat I2_2, I1_2, I1_I2;
 
         gpu::multiply(vI2[i], vI2[i], I2_2);        // I2^2
         gpu::multiply(vI1[i], vI1[i], I1_2);        // I1^2
@@ -317,45 +317,45 @@ Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2)
         gpu::GaussianBlur(vI1[i], mu1, Size(11, 11), 1.5);
         gpu::GaussianBlur(vI2[i], mu2, Size(11, 11), 1.5);
 
-        gpu::GpuMat mu1_2, mu2_2, mu1_mu2; 
-        gpu::multiply(mu1, mu1, mu1_2);   
-        gpu::multiply(mu2, mu2, mu2_2);   
-        gpu::multiply(mu1, mu2, mu1_mu2);   
+        gpu::GpuMat mu1_2, mu2_2, mu1_mu2;
+        gpu::multiply(mu1, mu1, mu1_2);
+        gpu::multiply(mu2, mu2, mu2_2);
+        gpu::multiply(mu1, mu2, mu1_mu2);
 
-        gpu::GpuMat sigma1_2, sigma2_2, sigma12; 
+        gpu::GpuMat sigma1_2, sigma2_2, sigma12;
 
         gpu::GaussianBlur(I1_2, sigma1_2, Size(11, 11), 1.5);
-        sigma1_2 -= mu1_2;
+        gpu::subtract(sigma1_2, mu1_2, sigma1_2); // sigma1_2 -= mu1_2;
 
         gpu::GaussianBlur(I2_2, sigma2_2, Size(11, 11), 1.5);
-        sigma2_2 -= mu2_2;
+        gpu::subtract(sigma2_2, mu2_2, sigma2_2); // sigma2_2 -= mu2_2;
 
         gpu::GaussianBlur(I1_I2, sigma12, Size(11, 11), 1.5);
-        sigma12 -= mu1_mu2;
+        gpu::subtract(sigma12, mu1_mu2, sigma12); // sigma12 -= mu1_mu2;
 
         ///////////////////////////////// FORMULA ////////////////////////////////
-        gpu::GpuMat t1, t2, t3; 
+        gpu::GpuMat t1, t2, t3;
 
-        t1 = 2 * mu1_mu2 + C1; 
-        t2 = 2 * sigma12 + C2; 
-        gpu::multiply(t1, t2, t3);     // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
+        mu1_mu2.convertTo(t1, -1, 2, C1); // t1 = 2 * mu1_mu2 + C1;
+        sigma12.convertTo(t2, -1, 2, C2); // t2 = 2 * sigma12 + C2;
+        gpu::multiply(t1, t2, t3);        // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 
-        t1 = mu1_2 + mu2_2 + C1; 
-        t2 = sigma1_2 + sigma2_2 + C2;     
-        gpu::multiply(t1, t2, t1);     // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
+        gpu::addWeighted(mu1_2, 1.0, mu2_2, 1.0, C1, t1);       // t1 = mu1_2 + mu2_2 + C1;
+        gpu::addWeighted(sigma1_2, 1.0, sigma2_2, 1.0, C2, t2); // t2 = sigma1_2 + sigma2_2 + C2;
+        gpu::multiply(t1, t2, t1);                              // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
 
         gpu::GpuMat ssim_map;
         gpu::divide(t3, t1, ssim_map);      // ssim_map =  t3./t1;
 
-        Scalar s = gpu::sum(ssim_map);    
+        Scalar s = gpu::sum(ssim_map);
         mssim.val[i] = s.val[0] / (ssim_map.rows * ssim_map.cols);
 
     }
-    return mssim; 
+    return mssim;
 }
 
 Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
-{ 
+{
     int cn = i1.channels();
 
     const float C1 = 6.5025f, C2 = 58.5225f;
@@ -367,60 +367,63 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
     gpu::Stream stream;
 
     stream.enqueueConvert(b.gI1, b.t1, CV_32F);
-    stream.enqueueConvert(b.gI2, b.t2, CV_32F);      
+    stream.enqueueConvert(b.gI2, b.t2, CV_32F);
 
     gpu::split(b.t1, b.vI1, stream);
     gpu::split(b.t2, b.vI2, stream);
     Scalar mssim;
 
+    gpu::GpuMat buf;
+
     for( int i = 0; i < b.gI1.channels(); ++i )
-    {        
+    {
         gpu::multiply(b.vI2[i], b.vI2[i], b.I2_2, stream);        // I2^2
         gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, stream);        // I1^2
         gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, stream);       // I1 * I2
 
-        gpu::GaussianBlur(b.vI1[i], b.mu1, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);
-        gpu::GaussianBlur(b.vI2[i], b.mu2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);
+        gpu::GaussianBlur(b.vI1[i], b.mu1, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
+        gpu::GaussianBlur(b.vI2[i], b.mu2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
 
-        gpu::multiply(b.mu1, b.mu1, b.mu1_2, stream);   
-        gpu::multiply(b.mu2, b.mu2, b.mu2_2, stream);   
-        gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, stream);   
+        gpu::multiply(b.mu1, b.mu1, b.mu1_2, stream);
+        gpu::multiply(b.mu2, b.mu2, b.mu2_2, stream);
+        gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, stream);
 
-        gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);
-        gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, stream);
+        gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
+        gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, gpu::GpuMat(), -1, stream);
         //b.sigma1_2 -= b.mu1_2;  - This would result in an extra data transfer operation
 
-        gpu::GaussianBlur(b.I2_2, b.sigma2_2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);
-        gpu::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, stream);
+        gpu::GaussianBlur(b.I2_2, b.sigma2_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
+        gpu::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, gpu::GpuMat(), -1, stream);
         //b.sigma2_2 -= b.mu2_2;
 
-        gpu::GaussianBlur(b.I1_I2, b.sigma12, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream);
-        gpu::subtract(b.sigma12, b.mu1_mu2, b.sigma12, stream);
+        gpu::GaussianBlur(b.I1_I2, b.sigma12, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
+        gpu::subtract(b.sigma12, b.mu1_mu2, b.sigma12, gpu::GpuMat(), -1, stream);
         //b.sigma12 -= b.mu1_mu2;
 
         //here too it would be an extra data transfer due to call of operator*(Scalar, Mat)
-        gpu::multiply(b.mu1_mu2, 2, b.t1, stream); //b.t1 = 2 * b.mu1_mu2 + C1; 
-        gpu::add(b.t1, C1, b.t1, stream);
-        gpu::multiply(b.sigma12, 2, b.t2, stream); //b.t2 = 2 * b.sigma12 + C2; 
-        gpu::add(b.t2, C2, b.t2, stream);     
+        gpu::multiply(b.mu1_mu2, 2, b.t1, 1, -1, stream); //b.t1 = 2 * b.mu1_mu2 + C1;
+        gpu::add(b.t1, C1, b.t1, gpu::GpuMat(), -1, stream);
+        gpu::multiply(b.sigma12, 2, b.t2, 1, -1, stream); //b.t2 = 2 * b.sigma12 + C2;
+        gpu::add(b.t2, C2, b.t2, gpu::GpuMat(), -12, stream);
 
-        gpu::multiply(b.t1, b.t2, b.t3, stream);     // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
+        gpu::multiply(b.t1, b.t2, b.t3, 1, -1, stream);     // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 
-        gpu::add(b.mu1_2, b.mu2_2, b.t1, stream);
-        gpu::add(b.t1, C1, b.t1, stream);
+        gpu::add(b.mu1_2, b.mu2_2, b.t1, gpu::GpuMat(), -1, stream);
+        gpu::add(b.t1, C1, b.t1, gpu::GpuMat(), -1, stream);
 
-        gpu::add(b.sigma1_2, b.sigma2_2, b.t2, stream);
-        gpu::add(b.t2, C2, b.t2, stream);
+        gpu::add(b.sigma1_2, b.sigma2_2, b.t2, gpu::GpuMat(), -1, stream);
+        gpu::add(b.t2, C2, b.t2, gpu::GpuMat(), -1, stream);
 
 
-        gpu::multiply(b.t1, b.t2, b.t1, stream);     // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))        
-        gpu::divide(b.t3, b.t1, b.ssim_map, stream);      // ssim_map =  t3./t1;
+        gpu::multiply(b.t1, b.t2, b.t1, 1, -1, stream);     // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
+        gpu::divide(b.t3, b.t1, b.ssim_map, 1, -1, stream);      // ssim_map =  t3./t1;
 
         stream.waitForCompletion();
 
-        Scalar s = gpu::sum(b.ssim_map, b.buf);    
+        Scalar s = gpu::sum(b.ssim_map, b.buf);
         mssim.val[i] = s.val[0] / (b.ssim_map.rows * b.ssim_map.cols);
 
     }
-    return mssim; 
-}
\ No newline at end of file
+    return mssim;
+}
+
diff --git a/samples/gpu/bgfg_segm.cpp b/samples/gpu/bgfg_segm.cpp
index 839b0a982c..7c5e148f77 100644
--- a/samples/gpu/bgfg_segm.cpp
+++ b/samples/gpu/bgfg_segm.cpp
@@ -14,7 +14,8 @@ enum Method
     FGD_STAT,
     MOG,
     MOG2,
-    VIBE
+    VIBE,
+    GMG
 };
 
 int main(int argc, const char** argv)
@@ -22,7 +23,7 @@ int main(int argc, const char** argv)
     cv::CommandLineParser cmd(argc, argv,
         "{ c | camera | false       | use camera }"
         "{ f | file   | 768x576.avi | input video file }"
-        "{ m | method | mog         | method (fgd_stat, mog, mog2, vibe) }"
+        "{ m | method | mog         | method (fgd, mog, mog2, vibe, gmg) }"
         "{ h | help   | false       | print help message }");
 
     if (cmd.get<bool>("help"))
@@ -37,13 +38,13 @@ int main(int argc, const char** argv)
     string file = cmd.get<string>("file");
     string method = cmd.get<string>("method");
 
-    if (method != "fgd_stat" && method != "mog" && method != "mog2" && method != "vibe")
+    if (method != "fgd" && method != "mog" && method != "mog2" && method != "vibe" && method != "gmg")
     {
         cerr << "Incorrect method" << endl;
         return -1;
     }
 
-    Method m = method == "fgd_stat" ? FGD_STAT : method == "mog" ? MOG : method == "mog2" ? MOG2 : VIBE;
+    Method m = method == "fgd" ? FGD_STAT : method == "mog" ? MOG : method == "mog2" ? MOG2 : method == "vibe" ? VIBE : GMG;
 
     VideoCapture cap;
 
@@ -67,6 +68,8 @@ int main(int argc, const char** argv)
     MOG_GPU mog;
     MOG2_GPU mog2;
     VIBE_GPU vibe;
+    GMG_GPU gmg;
+    gmg.numInitializationFrames = 40;
 
     GpuMat d_fgmask;
     GpuMat d_fgimg;
@@ -93,12 +96,16 @@ int main(int argc, const char** argv)
     case VIBE:
         vibe.initialize(d_frame);
         break;
+
+    case GMG:
+        gmg.initialize(d_frame.size());
+        break;
     }
 
     namedWindow("image", WINDOW_NORMAL);
     namedWindow("foreground mask", WINDOW_NORMAL);
     namedWindow("foreground image", WINDOW_NORMAL);
-    if (m != VIBE)
+    if (m != VIBE && m != GMG)
         namedWindow("mean background image", WINDOW_NORMAL);
 
     for(;;)
@@ -108,6 +115,8 @@ int main(int argc, const char** argv)
             break;
         d_frame.upload(frame);
 
+        int64 start = cv::getTickCount();
+
         //update the model
         switch (m)
         {
@@ -130,8 +139,15 @@ int main(int argc, const char** argv)
         case VIBE:
             vibe(d_frame, d_fgmask);
             break;
+
+        case GMG:
+            gmg(d_frame, d_fgmask);
+            break;
         }
 
+        double fps = cv::getTickFrequency() / (cv::getTickCount() - start);
+        std::cout << "FPS : " << fps << std::endl;
+
         d_fgimg.setTo(Scalar::all(0));
         d_frame.copyTo(d_fgimg, d_fgmask);
 
diff --git a/samples/ocl/CMakeLists.txt b/samples/ocl/CMakeLists.txt
new file mode 100644
index 0000000000..4d89ff40d7
--- /dev/null
+++ b/samples/ocl/CMakeLists.txt
@@ -0,0 +1,64 @@
+SET(OPENCV_OCL_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc opencv_highgui
+                                     opencv_ml opencv_video opencv_objdetect opencv_features2d
+                                     opencv_calib3d opencv_legacy opencv_contrib opencv_ocl
+                                     opencv_nonfree)
+
+ocv_check_dependencies(${OPENCV_OCL_SAMPLES_REQUIRED_DEPS})
+
+if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
+  set(project "ocl")
+  string(TOUPPER "${project}" project_upper)
+    
+  project("${project}_samples")
+
+  ocv_include_modules(${OPENCV_OCL_SAMPLES_REQUIRED_DEPS})
+
+  if(HAVE_OPENCL)
+    ocv_include_directories(${OPENCL_INCLUDE_DIR})
+  endif()
+
+  if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
+  endif()
+
+  # ---------------------------------------------
+  #      Define executable targets
+  # ---------------------------------------------
+  MACRO(OPENCV_DEFINE_OCL_EXAMPLE name srcs)
+    set(the_target "example_${project}_${name}")
+    add_executable(${the_target} ${srcs})
+
+    target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_OCL_SAMPLES_REQUIRED_DEPS})
+
+    set_target_properties(${the_target} PROPERTIES
+      OUTPUT_NAME "${name}_${project}"                                       
+      PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}")
+        
+    if(ENABLE_SOLUTION_FOLDERS)
+      set_target_properties(${the_target} PROPERTIES FOLDER "samples//${project}")
+    endif()
+
+    if(WIN32)
+      if(MSVC AND NOT BUILD_SHARED_LIBS)
+        set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG")
+      endif()
+      install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${project}" COMPONENT main)
+    endif()
+  ENDMACRO()
+
+  file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
+
+  foreach(sample_filename ${all_samples})
+    get_filename_component(sample ${sample_filename} NAME_WE)
+    file(GLOB sample_srcs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${sample}.*)
+    OPENCV_DEFINE_OCL_EXAMPLE(${sample} ${sample_srcs})
+  endforeach()
+endif()
+
+if (NOT WIN32)
+  file(GLOB install_list *.c *.cpp *.jpg *.png *.data makefile.* build_all.sh *.dsp *.cmd )
+  install(FILES ${install_list}
+          DESTINATION share/opencv/samples/${project}
+          PERMISSIONS OWNER_READ GROUP_READ WORLD_READ)
+endif()
+
diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp
new file mode 100644
index 0000000000..96a4af0a7d
--- /dev/null
+++ b/samples/ocl/hog.cpp
@@ -0,0 +1,459 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <stdexcept>
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+
+bool help_showed = false;
+
+class Args
+{
+public:
+    Args();
+    static Args read(int argc, char** argv);
+
+    string src;
+    bool src_is_video;
+    bool src_is_camera;
+    int camera_id;
+
+    bool write_video;
+    string dst_video;
+    double dst_video_fps;
+
+    bool make_gray;
+
+    bool resize_src;
+    int width, height;
+
+    double scale;
+    int nlevels;
+    int gr_threshold;
+
+    double hit_threshold;
+    bool hit_threshold_auto;
+
+    int win_width;
+    int win_stride_width, win_stride_height;
+
+    bool gamma_corr;
+};
+
+
+class App
+{
+public:
+    App(const Args& s);
+    void run();
+
+    void handleKey(char key);
+
+    void hogWorkBegin();
+    void hogWorkEnd();
+    string hogWorkFps() const;
+
+    void workBegin();
+    void workEnd();
+    string workFps() const;
+
+    string message() const;
+
+private:
+    App operator=(App&);
+
+    Args args;
+    bool running;
+
+    bool use_gpu;
+    bool make_gray;
+    double scale;
+    int gr_threshold;
+    int nlevels;
+    double hit_threshold;
+    bool gamma_corr;
+
+    int64 hog_work_begin;
+    double hog_work_fps;
+
+    int64 work_begin;
+    double work_fps;
+};
+
+static void printHelp()
+{
+    cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
+         << "\nUsage: hog_gpu\n"
+         << "  (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
+         << "  [--make_gray <true/false>] # convert image to gray one or not\n"
+         << "  [--resize_src <true/false>] # do resize of the source image or not\n"
+         << "  [--width <int>] # resized image width\n"
+         << "  [--height <int>] # resized image height\n"
+         << "  [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
+         << "  [--scale <double>] # HOG window scale factor\n"
+         << "  [--nlevels <int>] # max number of HOG window scales\n"
+         << "  [--win_width <int>] # width of the window (48 or 64)\n"
+         << "  [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
+         << "  [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
+         << "  [--gr_threshold <int>] # merging similar rects constant\n"
+         << "  [--gamma_correct <int>] # do gamma correction or not\n"
+         << "  [--write_video <bool>] # write video or not\n"
+         << "  [--dst_video <path>] # output video path\n"
+         << "  [--dst_video_fps <double>] # output video fps\n";
+    help_showed = true;
+}
+
+int main(int argc, char** argv)
+{
+    try
+    {
+        if (argc < 2)
+            printHelp();
+        Args args = Args::read(argc, argv);
+        if (help_showed)
+            return -1;
+        App app(args);
+        app.run();
+    }
+    catch (const Exception& e) { return cout << "error: "  << e.what() << endl, 1; }
+    catch (const exception& e) { return cout << "error: "  << e.what() << endl, 1; }
+    catch(...) { return cout << "unknown exception" << endl, 1; }
+    return 0;
+}
+
+
+Args::Args()
+{
+    src_is_video = false;
+    src_is_camera = false;
+    camera_id = 0;
+
+    write_video = false;
+    dst_video_fps = 24.;
+
+    make_gray = false;
+
+    resize_src = false;
+    width = 640;
+    height = 480;
+
+    scale = 1.05;
+    nlevels = 13;
+    gr_threshold = 8;
+    hit_threshold = 1.4;
+    hit_threshold_auto = true;
+
+    win_width = 48;
+    win_stride_width = 8;
+    win_stride_height = 8;
+
+    gamma_corr = true;
+}
+
+
+Args Args::read(int argc, char** argv)
+{
+    Args args;
+    for (int i = 1; i < argc; i++)
+    {
+        if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
+        else if (string(argv[i]) == "--hit_threshold")
+        {
+            args.hit_threshold = atof(argv[++i]);
+            args.hit_threshold_auto = false;
+        }
+        else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
+        else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
+        else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
+        else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
+        else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
+        else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
+        else if (string(argv[i]) == "--help") printHelp();
+        else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
+        else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
+        else if (args.src.empty()) args.src = argv[i];
+        else throw runtime_error((string("unknown key: ") + argv[i]));
+    }
+    return args;
+}
+
+
+App::App(const Args& s)
+{
+    args = s;
+    cout << "\nControls:\n"
+         << "\tESC - exit\n"
+         << "\tm - change mode GPU <-> CPU\n"
+         << "\tg - convert image to gray or not\n"
+         << "\t1/q - increase/decrease HOG scale\n"
+         << "\t2/w - increase/decrease levels count\n"
+         << "\t3/e - increase/decrease HOG group threshold\n"
+         << "\t4/r - increase/decrease hit threshold\n"
+         << endl;
+
+    use_gpu = true;
+    make_gray = args.make_gray;
+    scale = args.scale;
+    gr_threshold = args.gr_threshold;
+    nlevels = args.nlevels;
+
+    if (args.hit_threshold_auto)
+        args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
+    hit_threshold = args.hit_threshold;
+
+    gamma_corr = args.gamma_corr;
+
+    if (args.win_width != 64 && args.win_width != 48)
+        args.win_width = 64;
+
+    cout << "Scale: " << scale << endl;
+    if (args.resize_src)
+        cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
+    cout << "Group threshold: " << gr_threshold << endl;
+    cout << "Levels number: " << nlevels << endl;
+    cout << "Win width: " << args.win_width << endl;
+    cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
+    cout << "Hit threshold: " << hit_threshold << endl;
+    cout << "Gamma correction: " << gamma_corr << endl;
+    cout << endl;
+}
+
+
+void App::run()
+{
+	std::vector<ocl::Info> oclinfo;
+	ocl::getDevice(oclinfo);
+    running = true;
+    cv::VideoWriter video_writer;
+
+    Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
+    Size win_stride(args.win_stride_width, args.win_stride_height);
+
+    // Create HOG descriptors and detectors here
+    vector<float> detector;
+    if (win_size == Size(64, 128))
+        detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
+    else
+        detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
+
+    cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
+                                   cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
+                                   cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
+    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
+                              HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
+    gpu_hog.setSVMDetector(detector);
+    cpu_hog.setSVMDetector(detector);
+
+    while (running)
+    {
+        VideoCapture vc;
+        Mat frame;
+
+        if (args.src_is_video)
+        {
+            vc.open(args.src.c_str());
+            if (!vc.isOpened())
+                throw runtime_error(string("can't open video file: " + args.src));
+            vc >> frame;
+        }
+        else if (args.src_is_camera)
+        {
+            vc.open(args.camera_id);
+            if (!vc.isOpened())
+            {
+                stringstream msg;
+                msg << "can't open camera: " << args.camera_id;
+                throw runtime_error(msg.str());
+            }
+            vc >> frame;
+        }
+        else
+        {
+            frame = imread(args.src);
+            if (frame.empty())
+                throw runtime_error(string("can't open image file: " + args.src));
+        }
+
+        Mat img_aux, img, img_to_show;
+        ocl::oclMat gpu_img;
+
+        // Iterate over all frames
+        while (running && !frame.empty())
+        {
+            workBegin();
+
+            // Change format of the image
+            if (make_gray) cvtColor(frame, img_aux, CV_BGR2GRAY);
+            else if (use_gpu) cvtColor(frame, img_aux, CV_BGR2BGRA);
+            else frame.copyTo(img_aux);
+
+            // Resize image
+            if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
+            else img = img_aux;
+            img_to_show = img;
+
+            gpu_hog.nlevels = nlevels;
+            cpu_hog.nlevels = nlevels;
+
+            vector<Rect> found;
+
+            // Perform HOG classification
+            hogWorkBegin();
+            if (use_gpu)
+            {
+                gpu_img.upload(img);
+                gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
+                                         Size(0, 0), scale, gr_threshold);
+            }
+            else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
+                                          Size(0, 0), scale, gr_threshold);
+            hogWorkEnd();
+
+            // Draw positive classified windows
+            for (size_t i = 0; i < found.size(); i++)
+            {
+                Rect r = found[i];
+                rectangle(img_to_show, r.tl(), r.br(), CV_RGB(0, 255, 0), 3);
+            }
+
+            if (use_gpu)
+                putText(img_to_show, "Mode: GPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            else
+                putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            imshow("opencv_gpu_hog", img_to_show);
+
+            if (args.src_is_video || args.src_is_camera) vc >> frame;
+
+            workEnd();
+
+            if (args.write_video)
+            {
+                if (!video_writer.isOpened())
+                {
+                    video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps,
+                                      img_to_show.size(), true);
+                    if (!video_writer.isOpened())
+                        throw std::runtime_error("can't create video writer");
+                }
+
+                if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR);
+                else cvtColor(img_to_show, img, CV_BGRA2BGR);
+
+                video_writer << img;
+            }
+
+            handleKey((char)waitKey(3));
+        }
+    }
+}
+
+
+void App::handleKey(char key)
+{
+    switch (key)
+    {
+    case 27:
+        running = false;
+        break;
+    case 'm':
+    case 'M':
+        use_gpu = !use_gpu;
+        cout << "Switched to " << (use_gpu ? "CUDA" : "CPU") << " mode\n";
+        break;
+    case 'g':
+    case 'G':
+        make_gray = !make_gray;
+        cout << "Convert image to gray: " << (make_gray ? "YES" : "NO") << endl;
+        break;
+    case '1':
+        scale *= 1.05;
+        cout << "Scale: " << scale << endl;
+        break;
+    case 'q':
+    case 'Q':
+        scale /= 1.05;
+        cout << "Scale: " << scale << endl;
+        break;
+    case '2':
+        nlevels++;
+        cout << "Levels number: " << nlevels << endl;
+        break;
+    case 'w':
+    case 'W':
+        nlevels = max(nlevels - 1, 1);
+        cout << "Levels number: " << nlevels << endl;
+        break;
+    case '3':
+        gr_threshold++;
+        cout << "Group threshold: " << gr_threshold << endl;
+        break;
+    case 'e':
+    case 'E':
+        gr_threshold = max(0, gr_threshold - 1);
+        cout << "Group threshold: " << gr_threshold << endl;
+        break;
+    case '4':
+        hit_threshold+=0.25;
+        cout << "Hit threshold: " << hit_threshold << endl;
+        break;
+    case 'r':
+    case 'R':
+        hit_threshold = max(0.0, hit_threshold - 0.25);
+        cout << "Hit threshold: " << hit_threshold << endl;
+        break;
+    case 'c':
+    case 'C':
+        gamma_corr = !gamma_corr;
+        cout << "Gamma correction: " << gamma_corr << endl;
+        break;
+    }
+}
+
+
+inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
+
+inline void App::hogWorkEnd()
+{
+    int64 delta = getTickCount() - hog_work_begin;
+    double freq = getTickFrequency();
+    hog_work_fps = freq / delta;
+}
+
+inline string App::hogWorkFps() const
+{
+    stringstream ss;
+    ss << hog_work_fps;
+    return ss.str();
+}
+
+
+inline void App::workBegin() { work_begin = getTickCount(); }
+
+inline void App::workEnd()
+{
+    int64 delta = getTickCount() - work_begin;
+    double freq = getTickFrequency();
+    work_fps = freq / delta;
+}
+
+inline string App::workFps() const
+{
+    stringstream ss;
+    ss << work_fps;
+    return ss.str();
+}
+
diff --git a/samples/python/chessboard.py b/samples/python/chessboard.py
index c204d6fabe..ce80431db5 100755
--- a/samples/python/chessboard.py
+++ b/samples/python/chessboard.py
@@ -15,7 +15,7 @@ if __name__ == "__main__":
             im = cv.LoadImageM(fileName, False)
             im3 = cv.LoadImageM(fileName, True)
        except: # if local copy cannot be opened, try downloading it
-            url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/cpp/left01.jpg'
+            url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/cpp/left01.jpg'
             filedata = urllib2.urlopen(url).read()
             imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
             cv.SetData(imagefiledata, filedata, len(filedata))
@@ -23,12 +23,12 @@ if __name__ == "__main__":
             im3 = cv.DecodeImageM(imagefiledata, cv.CV_LOAD_IMAGE_COLOR)
 
     chessboard_dim = ( 9, 6 )
-    
+
     found_all, corners = cv.FindChessboardCorners( im, chessboard_dim )
     print found_all, len(corners)
 
     cv.DrawChessboardCorners( im3, chessboard_dim, corners, found_all )
-    
+
     cv.ShowImage("win", im3);
     cv.WaitKey()
     cv.DestroyAllWindows()
diff --git a/samples/python/cvutils.py b/samples/python/cvutils.py
index 3450d5847e..6e81a3a44d 100644
--- a/samples/python/cvutils.py
+++ b/samples/python/cvutils.py
@@ -9,7 +9,7 @@ def load_sample(name=None):
         try:
             img0 = cv.LoadImage(name, cv.CV_LOAD_IMAGE_COLOR)
         except IOError:
-            urlbase = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/'
+            urlbase = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/'
             file = name.split('/')[-1]
             filedata = urllib2.urlopen(urlbase+file).read()
             imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
diff --git a/samples/python/demhist.py b/samples/python/demhist.py
index d2645e82d5..72565d6d74 100755
--- a/samples/python/demhist.py
+++ b/samples/python/demhist.py
@@ -60,7 +60,7 @@ class DemHist:
             cv.Rectangle(self.hist_image, (int(i * bin_w), self.hist_image.height),
                          (int((i + 1) * bin_w), self.hist_image.height - cv.Round(self.hist.bins[i])),
                          cv.ScalarAll(0), -1, 8, 0)
-       
+
         cv.ShowImage("histogram", self.hist_image)
 
 if __name__ == "__main__":
@@ -68,7 +68,7 @@ if __name__ == "__main__":
     if len(sys.argv) > 1:
         src_image = cv.GetMat(cv.LoadImage(sys.argv[1], 0))
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/baboon.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/baboon.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
diff --git a/samples/python/dft.py b/samples/python/dft.py
index 5a5e8dbebb..0ecc50ac81 100755
--- a/samples/python/dft.py
+++ b/samples/python/dft.py
@@ -12,11 +12,11 @@ def cvShiftDFT(src_arr, dst_arr ):
     dst_size = cv.GetSize(dst_arr)
 
     if dst_size != size:
-        cv.Error( cv.CV_StsUnmatchedSizes, "cv.ShiftDFT", "Source and Destination arrays must have equal sizes", __FILE__, __LINE__ )    
+        cv.Error( cv.CV_StsUnmatchedSizes, "cv.ShiftDFT", "Source and Destination arrays must have equal sizes", __FILE__, __LINE__ )
 
     if(src_arr is dst_arr):
         tmp = cv.CreateMat(size[1]/2, size[0]/2, cv.GetElemType(src_arr))
-    
+
     cx = size[0] / 2
     cy = size[1] / 2 # image center
 
@@ -31,13 +31,13 @@ def cvShiftDFT(src_arr, dst_arr ):
 
     if(src_arr is not dst_arr):
         if( not cv.CV_ARE_TYPES_EQ( q1, d1 )):
-            cv.Error( cv.CV_StsUnmatchedFormats, "cv.ShiftDFT", "Source and Destination arrays must have the same format", __FILE__, __LINE__ )    
-        
+            cv.Error( cv.CV_StsUnmatchedFormats, "cv.ShiftDFT", "Source and Destination arrays must have the same format", __FILE__, __LINE__ )
+
         cv.Copy(q3, d1)
         cv.Copy(q4, d2)
         cv.Copy(q1, d3)
         cv.Copy(q2, d4)
-    
+
     else:
         cv.Copy(q3, tmp)
         cv.Copy(q1, q3)
@@ -47,11 +47,11 @@ def cvShiftDFT(src_arr, dst_arr ):
         cv.Copy(tmp, q2)
 
 if __name__ == "__main__":
-    
+
     if len(sys.argv) > 1:
         im = cv.LoadImage( sys.argv[1], cv.CV_LOAD_IMAGE_GRAYSCALE)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/baboon.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/baboon.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
diff --git a/samples/python/distrans.py b/samples/python/distrans.py
index 9a23274500..38ace4419a 100755
--- a/samples/python/distrans.py
+++ b/samples/python/distrans.py
@@ -20,12 +20,12 @@ edge = 0
 def on_trackbar(edge_thresh):
 
     cv.Threshold(gray, edge, float(edge_thresh), float(edge_thresh), cv.CV_THRESH_BINARY)
-    #Distance transform                  
+    #Distance transform
     cv.DistTransform(edge, dist, cv.CV_DIST_L2, cv.CV_DIST_MASK_5)
 
     cv.ConvertScale(dist, dist, 5000.0, 0)
     cv.Pow(dist, dist, 0.5)
-    
+
     cv.ConvertScale(dist, dist32s, 1.0, 0.5)
     cv.AndS(dist32s, cv.ScalarAll(255), dist32s, None)
     cv.ConvertScale(dist32s, dist8u1, 1, 0)
@@ -42,7 +42,7 @@ if __name__ == "__main__":
     if len(sys.argv) > 1:
         gray = cv.LoadImage(sys.argv[1], cv.CV_LOAD_IMAGE_GRAYSCALE)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/stuff.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/stuff.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
@@ -61,7 +61,7 @@ if __name__ == "__main__":
     # Create a window
     cv.NamedWindow(wndname, 1)
 
-    # create a toolbar 
+    # create a toolbar
     cv.CreateTrackbar(tbarname, wndname, edge_thresh, 255, on_trackbar)
 
     # Show the image
diff --git a/samples/python/edge.py b/samples/python/edge.py
index 9413895ca2..2d3b8efb63 100755
--- a/samples/python/edge.py
+++ b/samples/python/edge.py
@@ -24,7 +24,7 @@ def on_trackbar(position):
 
     # copy edge points
     cv.Copy(im, col_edge, edge)
-    
+
     # show the im
     cv.ShowImage(win_name, col_edge)
 
@@ -32,7 +32,7 @@ if __name__ == '__main__':
     if len(sys.argv) > 1:
         im = cv.LoadImage( sys.argv[1], cv.CV_LOAD_IMAGE_COLOR)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/fruits.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/fruits.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
diff --git a/samples/python/ffilldemo.py b/samples/python/ffilldemo.py
index 339771e085..2ecf5ffa72 100755
--- a/samples/python/ffilldemo.py
+++ b/samples/python/ffilldemo.py
@@ -44,36 +44,36 @@ def on_mouse( event, x, y, flags, param ):
             if( is_mask ):
                 my_mask = mask
                 cv.Threshold( mask, mask, 1, 128, cv.CV_THRESH_BINARY );
-               
+
             if( is_color ):
-            
+
                 color = cv.CV_RGB( r, g, b );
                 comp = cv.FloodFill( color_img, seed, color, cv.CV_RGB( lo, lo, lo ),
                              cv.CV_RGB( up, up, up ), flags, my_mask );
                 cv.ShowImage( "image", color_img );
-            
+
             else:
-            
+
                 brightness = cv.RealScalar((r*2 + g*7 + b + 5)/10);
                 comp = cv.FloodFill( gray_img, seed, brightness, cv.RealScalar(lo),
                              cv.RealScalar(up), flags, my_mask );
                 cv.ShowImage( "image", gray_img );
-            
+
 
             print "%g pixels were repainted" % comp[0]
 
             if( is_mask ):
                 cv.ShowImage( "mask", mask );
-        
-    
+
+
 
 
 if __name__ == "__main__":
-    
+
     if len(sys.argv) > 1:
         im = cv.LoadImage( sys.argv[1], cv.CV_LOAD_IMAGE_COLOR)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/fruits.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/fruits.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
@@ -89,7 +89,7 @@ if __name__ == "__main__":
     print "\tg - use gradient floodfill with floating(relative) range"
     print "\t4 - use 4-connectivity mode"
     print "\t8 - use 8-connectivity mode"
-        
+
     color_img = cv.CloneImage( im );
     gray_img0 = cv.CreateImage( (color_img.width, color_img.height), 8, 1 );
     cv.CvtColor( color_img, gray_img0, cv.CV_BGR2GRAY );
@@ -102,7 +102,7 @@ if __name__ == "__main__":
 
     cv.SetMouseCallback( "image", on_mouse );
 
-    while True: 
+    while True:
         if( is_color ):
             cv.ShowImage( "image", color_img );
         else:
@@ -114,29 +114,29 @@ if __name__ == "__main__":
             sys.exit(0)
         elif c == ord('c'):
             if( is_color ):
-            
+
                 print("Grayscale mode is set");
                 cv.CvtColor( color_img, gray_img, cv.CV_BGR2GRAY );
                 is_color = 0;
-            
+
             else:
-            
+
                 print("Color mode is set");
                 cv.Copy( im, color_img, None );
                 cv.Zero( mask );
                 is_color = 1;
-            
+
         elif c == ord('m'):
             if( is_mask ):
                 cv.DestroyWindow( "mask" );
                 is_mask = 0;
-            
+
             else:
                 cv.NamedWindow( "mask", 0 );
                 cv.Zero( mask );
                 cv.ShowImage( "mask", mask );
                 is_mask = 1;
-            
+
         elif c == ord('r'):
             print("Original image is restored");
             cv.Copy( im, color_img, None );
diff --git a/samples/python/fitellipse.py b/samples/python/fitellipse.py
index cfc89efda5..88a927d451 100755
--- a/samples/python/fitellipse.py
+++ b/samples/python/fitellipse.py
@@ -27,12 +27,12 @@ class FitEllipse:
         cv.CreateTrackbar("Threshold", "Result", slider_pos, 255, self.process_image)
         self.process_image(slider_pos)
 
-    def process_image(self, slider_pos): 
+    def process_image(self, slider_pos):
         """
         This function finds contours, draws them and their approximation by ellipses.
         """
         stor = cv.CreateMemStorage()
-        
+
         # Create the destination images
         image02 = cv.CloneImage(self.source_image)
         cv.Zero(image02)
@@ -56,18 +56,18 @@ class FitEllipse:
                 PointArray2D32f = cv.CreateMat(1, len(c), cv.CV_32FC2)
                 for (i, (x, y)) in enumerate(c):
                     PointArray2D32f[0, i] = (x, y)
-                
+
                 # Draw the current contour in gray
                 gray = cv.CV_RGB(100, 100, 100)
                 cv.DrawContours(image04, c, gray, gray,0,1,8,(0,0))
-                
+
                 # Fits ellipse to current contour.
                 (center, size, angle) = cv.FitEllipse2(PointArray2D32f)
-                
+
                 # Convert ellipse data from float to integer representation.
                 center = (cv.Round(center[0]), cv.Round(center[1]))
                 size = (cv.Round(size[0] * 0.5), cv.Round(size[1] * 0.5))
-                
+
                 # Draw ellipse in random color
                 color = cv.CV_RGB(random.randrange(256),random.randrange(256),random.randrange(256))
                 cv.Ellipse(image04, center, size,
@@ -82,12 +82,12 @@ if __name__ == '__main__':
     if len(sys.argv) > 1:
         source_image = cv.LoadImage(sys.argv[1], cv.CV_LOAD_IMAGE_GRAYSCALE)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/stuff.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/stuff.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
         source_image = cv.DecodeImage(imagefiledata, cv.CV_LOAD_IMAGE_GRAYSCALE)
-    
+
     # Create windows.
     cv.NamedWindow("Source", 1)
     cv.NamedWindow("Result", 1)
diff --git a/samples/python/houghlines.py b/samples/python/houghlines.py
index 2c697a7ee2..a437bfe022 100755
--- a/samples/python/houghlines.py
+++ b/samples/python/houghlines.py
@@ -14,7 +14,7 @@ if __name__ == "__main__":
         filename = sys.argv[1]
         src = cv.LoadImage(filename, cv.CV_LOAD_IMAGE_GRAYSCALE)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/doc/pics/building.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/doc/pics/building.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
@@ -37,7 +37,7 @@ if __name__ == "__main__":
             for (rho, theta) in lines[:100]:
                 a = cos(theta)
                 b = sin(theta)
-                x0 = a * rho 
+                x0 = a * rho
                 y0 = b * rho
                 pt1 = (cv.Round(x0 + 1000*(-b)), cv.Round(y0 + 1000*(a)))
                 pt2 = (cv.Round(x0 - 1000*(-b)), cv.Round(y0 - 1000*(a)))
diff --git a/samples/python/inpaint.py b/samples/python/inpaint.py
index ce5a77ff4c..fa7e9a0925 100755
--- a/samples/python/inpaint.py
+++ b/samples/python/inpaint.py
@@ -27,7 +27,7 @@ if __name__=="__main__":
     if len(sys.argv) > 1:
         img0 = cv.LoadImage( sys.argv[1], cv.CV_LOAD_IMAGE_COLOR)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/fruits.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/fruits.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
@@ -38,7 +38,7 @@ if __name__=="__main__":
     print "\tr - restore the original image"
     print "\ti or ENTER - run inpainting algorithm"
     print "\t\t(before running it, paint something on the image)"
-    
+
     cv.NamedWindow("image", 1)
     cv.NamedWindow("inpainted image", 1)
 
diff --git a/samples/python/logpolar.py b/samples/python/logpolar.py
index 23c14f813f..338acc2ce8 100755
--- a/samples/python/logpolar.py
+++ b/samples/python/logpolar.py
@@ -19,27 +19,27 @@ def on_mouse(event, x, y, flags, param):
         cv.ShowImage("inverse log-polar", src2)
 
 if __name__ == "__main__":
-    
+
     if len(sys.argv) > 1:
         src = cv.LoadImage( sys.argv[1], cv.CV_LOAD_IMAGE_COLOR)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/fruits.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/fruits.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
         src = cv.DecodeImage(imagefiledata, cv.CV_LOAD_IMAGE_COLOR)
-        
+
     cv.NamedWindow("original", 1)
     cv.NamedWindow("log-polar", 1)
     cv.NamedWindow("inverse log-polar", 1)
-  
-    
+
+
     dst = cv.CreateImage((256, 256), 8, 3)
     src2 = cv.CreateImage(cv.GetSize(src), 8, 3)
-    
+
     cv.SetMouseCallback("original", on_mouse)
     on_mouse(cv.CV_EVENT_LBUTTONDOWN, src.width/2, src.height/2, None, None)
-    
+
     cv.ShowImage("original", src)
     cv.WaitKey()
     cv.DestroyAllWindows()
diff --git a/samples/python/morphology.py b/samples/python/morphology.py
index ede15f1b1d..6440272563 100755
--- a/samples/python/morphology.py
+++ b/samples/python/morphology.py
@@ -31,7 +31,7 @@ if __name__ == "__main__":
     if len(sys.argv) > 1:
         src = cv.LoadImage(sys.argv[1], cv.CV_LOAD_IMAGE_COLOR)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/fruits.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/fruits.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
diff --git a/samples/python/numpy_array.py b/samples/python/numpy_array.py
index b47d0541ef..a79eec7a95 100644
--- a/samples/python/numpy_array.py
+++ b/samples/python/numpy_array.py
@@ -22,7 +22,7 @@ if __name__ == "__main__":
     if len(sys.argv) > 1:
         img0 = cv.LoadImageM( sys.argv[1], cv.CV_LOAD_IMAGE_COLOR)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/lena.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/lena.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
diff --git a/samples/python/watershed.py b/samples/python/watershed.py
index 0ea43bea55..c1464227c6 100755
--- a/samples/python/watershed.py
+++ b/samples/python/watershed.py
@@ -27,7 +27,7 @@ if __name__ == "__main__":
     if len(sys.argv) > 1:
         img0 = cv.LoadImage( sys.argv[1], cv.CV_LOAD_IMAGE_COLOR)
     else:
-        url = 'http://code.opencv.org/svn/opencv/trunk/opencv/samples/c/fruits.jpg'
+        url = 'http://code.opencv.org/projects/opencv/repository/revisions/master/raw/samples/c/fruits.jpg'
         filedata = urllib2.urlopen(url).read()
         imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
         cv.SetData(imagefiledata, filedata, len(filedata))
@@ -106,4 +106,4 @@ if __name__ == "__main__":
             cv.AddWeighted(wshed, 0.5, img_gray, 0.5, 0, wshed)
             cv.ShowImage("watershed transform", wshed)
     cv.DestroyAllWindows()
-    
+
diff --git a/samples/python2/common.py b/samples/python2/common.py
index 0f332b6d0f..883aa9ae23 100644
--- a/samples/python2/common.py
+++ b/samples/python2/common.py
@@ -6,6 +6,12 @@ import itertools as it
 
 image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm']
 
+class Bunch(object):
+    def __init__(self, **kw):
+        self.__dict__.update(kw)
+    def __str__(self):
+        return str(self.__dict__)
+
 def splitfn(fn):
     path, fn = os.path.split(fn)
     name, ext = os.path.splitext(fn)
@@ -198,3 +204,9 @@ def getsize(img):
 
 def mdot(*args):
     return reduce(np.dot, args)
+
+def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
+    for kp in keypoints:
+            x, y = kp.pt
+            cv2.circle(vis, (int(x), int(y)), 2, color)
+
diff --git a/samples/python2/digits.py b/samples/python2/digits.py
index b8b9dc5793..b7b4a1e627 100644
--- a/samples/python2/digits.py
+++ b/samples/python2/digits.py
@@ -1,9 +1,9 @@
 '''
-SVN and KNearest digit recognition.
+SVM and KNearest digit recognition.
 
 Sample loads a dataset of handwritten digits from 'digits.png'.
-Then it trains a SVN and KNearest classifiers on it and evaluates
-their accuracy. 
+Then it trains a SVM and KNearest classifiers on it and evaluates
+their accuracy.
 
 Following preprocessing is applied to the dataset:
  - Moment-based image deskew (see deskew())
@@ -77,7 +77,7 @@ class KNearest(StatModel):
 
 class SVM(StatModel):
     def __init__(self, C = 1, gamma = 0.5):
-        self.params = dict( kernel_type = cv2.SVM_RBF, 
+        self.params = dict( kernel_type = cv2.SVM_RBF,
                             svm_type = cv2.SVM_C_SVC,
                             C = C,
                             gamma = gamma )
@@ -95,7 +95,7 @@ def evaluate_model(model, digits, samples, labels):
     resp = model.predict(samples)
     err = (labels != resp).mean()
     print 'error: %.2f %%' % (err*100)
-    
+
     confusion = np.zeros((10, 10), np.int32)
     for i, j in zip(labels, resp):
         confusion[i, j] += 1
@@ -128,7 +128,7 @@ def preprocess_hog(digits):
         hist = np.hstack(hists)
 
         # transform to Hellinger kernel
-        eps = 1e-7 
+        eps = 1e-7
         hist /= hist.sum() + eps
         hist = np.sqrt(hist)
         hist /= norm(hist) + eps
@@ -141,23 +141,23 @@ if __name__ == '__main__':
     print __doc__
 
     digits, labels = load_digits(DIGITS_FN)
-    
+
     print 'preprocessing...'
     # shuffle digits
     rand = np.random.RandomState(321)
     shuffle = rand.permutation(len(digits))
     digits, labels = digits[shuffle], labels[shuffle]
-    
+
     digits2 = map(deskew, digits)
     samples = preprocess_hog(digits2)
-    
+
     train_n = int(0.9*len(samples))
     cv2.imshow('test set', mosaic(25, digits[train_n:]))
     digits_train, digits_test = np.split(digits2, [train_n])
     samples_train, samples_test = np.split(samples, [train_n])
     labels_train, labels_test = np.split(labels, [train_n])
 
-    
+
     print 'training KNearest...'
     model = KNearest(k=4)
     model.train(samples_train, labels_train)
diff --git a/samples/python2/digits_adjust.py b/samples/python2/digits_adjust.py
index c2a238dafa..cf92280f2a 100644
--- a/samples/python2/digits_adjust.py
+++ b/samples/python2/digits_adjust.py
@@ -1,15 +1,15 @@
 '''
-Digit recognition adjustment. 
-Grid search is used to find the best parameters for SVN and KNearest classifiers.
-SVM adjustment follows the guidelines given in 
+Digit recognition adjustment.
+Grid search is used to find the best parameters for SVM and KNearest classifiers.
+SVM adjustment follows the guidelines given in
 http://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf
 
-Threading or cloud computing (with http://www.picloud.com/)) may be used 
+Threading or cloud computing (with http://www.picloud.com/)) may be used
 to speedup the computation.
 
 Usage:
   digits_adjust.py [--model {svm|knearest}] [--cloud] [--env <PiCloud environment>]
-  
+
   --model {svm|knearest}   - select the classifier (SVM is the default)
   --cloud                  - use PiCloud computing platform
   --env                    - cloud environment name
@@ -23,12 +23,12 @@ from multiprocessing.pool import ThreadPool
 
 from digits import *
 
-try: 
+try:
     import cloud
     have_cloud = True
 except ImportError:
     have_cloud = False
-    
+
 
 
 def cross_validate(model_class, params, samples, labels, kfold = 3, pool = None):
@@ -93,7 +93,7 @@ class App(object):
             pool = ThreadPool(processes=cv2.getNumberOfCPUs())
             ires = pool.imap_unordered(f, jobs)
         return ires
-            
+
     def adjust_SVM(self):
         Cs = np.logspace(0, 10, 15, base=2)
         gammas = np.logspace(-7, 4, 15, base=2)
@@ -107,7 +107,7 @@ class App(object):
             params = dict(C = Cs[i], gamma=gammas[j])
             score = cross_validate(SVM, params, samples, labels)
             return i, j, score
-        
+
         ires = self.run_jobs(f, np.ndindex(*scores.shape))
         for count, (i, j, score) in enumerate(ires):
             scores[i, j] = score
@@ -142,7 +142,7 @@ class App(object):
 if __name__ == '__main__':
     import getopt
     import sys
-    
+
     print __doc__
 
     args, _ = getopt.getopt(sys.argv[1:], '', ['model=', 'cloud', 'env='])
diff --git a/samples/python2/feature_homography.py b/samples/python2/feature_homography.py
index d553deb977..d09c764bcd 100644
--- a/samples/python2/feature_homography.py
+++ b/samples/python2/feature_homography.py
@@ -3,128 +3,44 @@ Feature homography
 ==================
 
 Example of using features2d framework for interactive video homography matching.
-ORB features and FLANN matcher are used.
+ORB features and FLANN matcher are used. The actual tracking is implemented by
+PlaneTracker class in plane_tracker.py
 
 Inspired by http://www.youtube.com/watch?v=-ZNYoL8rzPY
 
+video: http://www.youtube.com/watch?v=FirtmYcC0Vc
+
 Usage
 -----
 feature_homography.py [<video source>]
 
-Select a textured planar object to track by drawing a box with a mouse.
+Keys:
+   SPACE  -  pause video
 
+Select a textured planar object to track by drawing a box with a mouse.
 '''
 
 import numpy as np
 import cv2
 import video
 import common
-from collections import namedtuple
-from common import getsize
+from common import getsize, draw_keypoints
+from plane_tracker import PlaneTracker
 
-    
-FLANN_INDEX_KDTREE = 1
-FLANN_INDEX_LSH    = 6
-flann_params= dict(algorithm = FLANN_INDEX_LSH,
-                   table_number = 6, # 12
-                   key_size = 12,     # 20
-                   multi_probe_level = 1) #2
-
-MIN_MATCH_COUNT = 10
-
-
-ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
-                       [0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1], 
-                       [0.5, 0.5, 2]])
-ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0), 
-            (4, 5), (5, 6), (6, 7), (7, 4),
-            (0, 4), (1, 5), (2, 6), (3, 7), 
-            (4, 8), (5, 8), (6, 8), (7, 8)]
-
-
-
-def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
-    for kp in keypoints:
-            x, y = kp.pt
-            cv2.circle(vis, (int(x), int(y)), 2, color)
 
 class App:
     def __init__(self, src):
         self.cap = video.create_capture(src)
         self.frame = None
         self.paused = False
-        self.ref_frame  = None
-
-        self.detector = cv2.ORB( nfeatures = 1000 )
-        self.matcher = cv2.FlannBasedMatcher(flann_params, {})  # bug : need to pass empty dict (#1329)
+        self.tracker = PlaneTracker()
 
         cv2.namedWindow('plane')
         self.rect_sel = common.RectSelector('plane', self.on_rect)
-
-
-    def match_frames(self):
-        if len(self.frame_desc) < MIN_MATCH_COUNT or len(self.frame_desc) < MIN_MATCH_COUNT:
-            return
-        
-        raw_matches = self.matcher.knnMatch(self.frame_desc, k = 2)
-        p0, p1 = [], []
-        for m in raw_matches:
-            if len(m) == 2 and m[0].distance < m[1].distance * 0.75:
-                m = m[0]
-                p0.append( self.ref_points[m.trainIdx].pt )  # queryIdx
-                p1.append( self.frame_points[m.queryIdx].pt )
-        p0, p1 = np.float32((p0, p1))
-        if len(p0) < MIN_MATCH_COUNT:
-            return
-
-        H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 4.0)
-        status = status.ravel() != 0
-        if status.sum() < MIN_MATCH_COUNT:
-            return
-        p0, p1 = p0[status], p1[status]
-        return p0, p1, H
-
-
-    def on_frame(self, vis):
-        match = self.match_frames()
-        if match is None:
-            return
-        w, h = getsize(self.frame)
-        p0, p1, H = match
-        for (x0, y0), (x1, y1) in zip(np.int32(p0), np.int32(p1)):
-            cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
-        x0, y0, x1, y1 = self.ref_rect
-        corners0 = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
-        img_corners = cv2.perspectiveTransform(corners0.reshape(1, -1, 2), H)
-        cv2.polylines(vis, [np.int32(img_corners)], True, (255, 255, 255), 2)
-
-        corners3d = np.hstack([corners0, np.zeros((4, 1), np.float32)])
-        fx = 0.9
-        K = np.float64([[fx*w, 0, 0.5*(w-1)],
-                        [0, fx*w, 0.5*(h-1)],
-                        [0.0,0.0,      1.0]])
-        dist_coef = np.zeros(4)
-        ret, rvec, tvec = cv2.solvePnP(corners3d, img_corners, K, dist_coef)
-        verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
-        verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
-        for i, j in ar_edges:
-            (x0, y0), (x1, y1) = verts[i], verts[j]
-            cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
-
+    
     def on_rect(self, rect):
-        x0, y0, x1, y1 = rect
-        self.ref_frame = self.frame.copy()
-        self.ref_rect = rect
-        points, descs = [], []
-        for kp, desc in zip(self.frame_points, self.frame_desc):
-            x, y = kp.pt
-            if x0 <= x <= x1 and y0 <= y <= y1:
-                points.append(kp)
-                descs.append(desc)
-        self.ref_points, self.ref_descs = points, np.uint8(descs)
-
-        self.matcher.clear()
-        self.matcher.add([self.ref_descs])
+        self.tracker.clear()
+        self.tracker.add_target(self.frame, rect)
 
     def run(self):
         while True:
@@ -133,24 +49,27 @@ class App:
                 ret, frame = self.cap.read()
                 if not ret:
                     break
-                self.frame = np.fliplr(frame).copy()
-                self.frame_points, self.frame_desc = self.detector.detectAndCompute(self.frame, None)
-                if self.frame_desc is None:  # detectAndCompute returns descs=None if not keypoints found
-                    self.frame_desc = []
+                self.frame = frame.copy()
             
             w, h = getsize(self.frame)
             vis = np.zeros((h, w*2, 3), np.uint8)
             vis[:h,:w] = self.frame
-            if self.ref_frame is not None:
-                vis[:h,w:] = self.ref_frame
-                x0, y0, x1, y1 = self.ref_rect
+            if len(self.tracker.targets) > 0:
+                target = self.tracker.targets[0]
+                vis[:,w:] = target.image
+                draw_keypoints(vis[:,w:], target.keypoints)
+                x0, y0, x1, y1 = target.rect
                 cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
-                draw_keypoints(vis[:,w:], self.ref_points)
-            draw_keypoints(vis, self.frame_points)
 
-            if playing and self.ref_frame is not None:
-                self.on_frame(vis)
-            
+            if playing:
+                tracked = self.tracker.track(self.frame)
+                if len(tracked) > 0:
+                    tracked = tracked[0]
+                    cv2.polylines(vis, [np.int32(tracked.quad)], True, (255, 255, 255), 2)
+                    for (x0, y0), (x1, y1) in zip(np.int32(tracked.p0), np.int32(tracked.p1)):
+                        cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
+            draw_keypoints(vis, self.tracker.frame_points)
+
             self.rect_sel.draw(vis)
             cv2.imshow('plane', vis)
             ch = cv2.waitKey(1)
@@ -159,6 +78,7 @@ class App:
             if ch == 27:
                 break
 
+
 if __name__ == '__main__':
     print __doc__
 
diff --git a/samples/python2/plane_ar.py b/samples/python2/plane_ar.py
new file mode 100644
index 0000000000..8e3156a51e
--- /dev/null
+++ b/samples/python2/plane_ar.py
@@ -0,0 +1,103 @@
+'''
+Planar augmented reality
+==================
+
+This sample shows an example of augmented reality overlay over a planar object
+tracked by PlaneTracker from plane_tracker.py. solvePnP funciton is used to
+estimate the tracked object location in 3d space.
+
+video: http://www.youtube.com/watch?v=pzVbhxx6aog
+
+Usage
+-----
+plane_ar.py [<video source>]
+
+Keys:
+   SPACE  -  pause video
+   c      -  clear targets
+
+Select a textured planar object to track by drawing a box with a mouse.
+Use 'focal' slider to adjust to camera focal length for proper video augmentation.
+'''
+
+import numpy as np
+import cv2
+import video
+import common
+from plane_tracker import PlaneTracker
+
+    
+ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
+                       [0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1], 
+                       [0, 0.5, 2], [1, 0.5, 2]])
+ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0), 
+            (4, 5), (5, 6), (6, 7), (7, 4),
+            (0, 4), (1, 5), (2, 6), (3, 7), 
+            (4, 8), (5, 8), (6, 9), (7, 9), (8, 9)]
+
+class App:
+    def __init__(self, src):
+        self.cap = video.create_capture(src)
+        self.frame = None
+        self.paused = False
+        self.tracker = PlaneTracker()
+
+        cv2.namedWindow('plane')
+        cv2.createTrackbar('focal', 'plane', 25, 50, common.nothing)
+        self.rect_sel = common.RectSelector('plane', self.on_rect)
+    
+    def on_rect(self, rect):
+        self.tracker.add_target(self.frame, rect)
+
+    def run(self):
+        while True:
+            playing = not self.paused and not self.rect_sel.dragging
+            if playing or self.frame is None:
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+                self.frame = frame.copy()
+            
+            vis = self.frame.copy()
+            if playing:
+                tracked = self.tracker.track(self.frame)
+                for tr in tracked:
+                    cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
+                    for (x, y) in np.int32(tr.p1):
+                        cv2.circle(vis, (x, y), 2, (255, 255, 255))
+                    self.draw_overlay(vis, tr)
+
+            self.rect_sel.draw(vis)
+            cv2.imshow('plane', vis)
+            ch = cv2.waitKey(1)
+            if ch == ord(' '):
+                self.paused = not self.paused
+            if ch == ord('c'):
+                self.tracker.clear()
+            if ch == 27:
+                break
+
+    def draw_overlay(self, vis, tracked):
+        x0, y0, x1, y1 = tracked.target.rect
+        quad_3d = np.float32([[x0, y0, 0], [x1, y0, 0], [x1, y1, 0], [x0, y1, 0]])
+        fx = 0.5 + cv2.getTrackbarPos('focal', 'plane') / 50.0
+        h, w = vis.shape[:2]
+        K = np.float64([[fx*w, 0, 0.5*(w-1)],
+                        [0, fx*w, 0.5*(h-1)],
+                        [0.0,0.0,      1.0]])
+        dist_coef = np.zeros(4)
+        ret, rvec, tvec = cv2.solvePnP(quad_3d, tracked.quad, K, dist_coef)
+        verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
+        verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
+        for i, j in ar_edges:
+            (x0, y0), (x1, y1) = verts[i], verts[j]
+            cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
+
+
+if __name__ == '__main__':
+    print __doc__
+
+    import sys
+    try: video_src = sys.argv[1]
+    except: video_src = 0
+    App(video_src).run()
diff --git a/samples/python2/plane_tracker.py b/samples/python2/plane_tracker.py
new file mode 100644
index 0000000000..61695caea7
--- /dev/null
+++ b/samples/python2/plane_tracker.py
@@ -0,0 +1,171 @@
+'''
+Multitarget planar tracking
+==================
+
+Example of using features2d framework for interactive video homography matching.
+ORB features and FLANN matcher are used. This sample provides PlaneTracker class
+and an example of its usage.
+
+video: http://www.youtube.com/watch?v=pzVbhxx6aog
+
+Usage
+-----
+plane_tracker.py [<video source>]
+
+Keys:
+   SPACE  -  pause video
+   c      -  clear targets
+
+Select a textured planar object to track by drawing a box with a mouse.
+'''
+
+import numpy as np
+import cv2
+from collections import namedtuple
+import video
+import common
+
+
+FLANN_INDEX_KDTREE = 1
+FLANN_INDEX_LSH    = 6
+flann_params= dict(algorithm = FLANN_INDEX_LSH,
+                   table_number = 6, # 12
+                   key_size = 12,     # 20
+                   multi_probe_level = 1) #2
+
+MIN_MATCH_COUNT = 10
+
+'''
+  image     - image to track
+  rect      - tracked rectangle (x1, y1, x2, y2)
+  keypoints - keypoints detected inside rect
+  descrs    - their descriptors
+  data      - some user-provided data
+'''
+PlanarTarget = namedtuple('PlaneTarget', 'image, rect, keypoints, descrs, data')
+
+'''
+  target - reference to PlanarTarget
+  p0     - matched points coords in target image
+  p1     - matched points coords in input frame
+  H      - homography matrix from p0 to p1
+  quad   - target bounary quad in input frame
+'''
+TrackedTarget = namedtuple('TrackedTarget', 'target, p0, p1, H, quad')
+
+class PlaneTracker:
+    def __init__(self):
+        self.detector = cv2.ORB( nfeatures = 1000 )
+        self.matcher = cv2.FlannBasedMatcher(flann_params, {})  # bug : need to pass empty dict (#1329)
+        self.targets = []
+
+    def add_target(self, image, rect, data=None):
+        '''Add a new tracking target.'''
+        x0, y0, x1, y1 = rect
+        raw_points, raw_descrs = self.detect_features(image)
+        points, descs = [], []
+        for kp, desc in zip(raw_points, raw_descrs):
+            x, y = kp.pt
+            if x0 <= x <= x1 and y0 <= y <= y1:
+                points.append(kp)
+                descs.append(desc)
+        descs = np.uint8(descs)
+        self.matcher.add([descs])
+        target = PlanarTarget(image = image, rect=rect, keypoints = points, descrs=descs, data=None)
+        self.targets.append(target)
+
+    def clear(self):
+        '''Remove all targets'''
+        self.targets = []
+        self.matcher.clear()
+
+    def track(self, frame):
+        '''Returns a list of detected TrackedTarget objects'''
+        self.frame_points, self.frame_descrs = self.detect_features(frame)
+        if len(self.frame_points) < MIN_MATCH_COUNT:
+            return []
+        matches = self.matcher.knnMatch(self.frame_descrs, k = 2)
+        matches = [m[0] for m in matches if len(m) == 2 and m[0].distance < m[1].distance * 0.75]
+        if len(matches) < MIN_MATCH_COUNT:
+            return []
+        matches_by_id = [[] for _ in xrange(len(self.targets))]
+        for m in matches:
+            matches_by_id[m.imgIdx].append(m)
+        tracked = []
+        for imgIdx, matches in enumerate(matches_by_id):
+            if len(matches) < MIN_MATCH_COUNT:
+                continue
+            target = self.targets[imgIdx]
+            p0 = [target.keypoints[m.trainIdx].pt for m in matches]
+            p1 = [self.frame_points[m.queryIdx].pt for m in matches]
+            p0, p1 = np.float32((p0, p1))
+            H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 3.0)
+            status = status.ravel() != 0
+            if status.sum() < MIN_MATCH_COUNT:
+                continue
+            p0, p1 = p0[status], p1[status]
+            
+            x0, y0, x1, y1 = target.rect
+            quad = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
+            quad = cv2.perspectiveTransform(quad.reshape(1, -1, 2), H).reshape(-1, 2)
+
+            track = TrackedTarget(target=target, p0=p0, p1=p1, H=H, quad=quad)
+            tracked.append(track)
+        tracked.sort(key = lambda t: len(t.p0), reverse=True)
+        return tracked
+
+    def detect_features(self, frame):
+        '''detect_features(self, frame) -> keypoints, descrs'''
+        keypoints, descrs = self.detector.detectAndCompute(frame, None)
+        if descrs is None:  # detectAndCompute returns descs=None if not keypoints found
+            descrs = []
+        return keypoints, descrs
+
+
+class App:
+    def __init__(self, src):
+        self.cap = video.create_capture(src)
+        self.frame = None
+        self.paused = False
+        self.tracker = PlaneTracker()
+
+        cv2.namedWindow('plane')
+        self.rect_sel = common.RectSelector('plane', self.on_rect)
+    
+    def on_rect(self, rect):
+        self.tracker.add_target(self.frame, rect)
+
+    def run(self):
+        while True:
+            playing = not self.paused and not self.rect_sel.dragging
+            if playing or self.frame is None:
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+                self.frame = frame.copy()
+            
+            vis = self.frame.copy()
+            if playing:
+                tracked = self.tracker.track(self.frame)
+                for tr in tracked:
+                    cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
+                    for (x, y) in np.int32(tr.p1):
+                        cv2.circle(vis, (x, y), 2, (255, 255, 255))
+
+            self.rect_sel.draw(vis)
+            cv2.imshow('plane', vis)
+            ch = cv2.waitKey(1)
+            if ch == ord(' '):
+                self.paused = not self.paused
+            if ch == ord('c'):
+                self.tracker.clear()
+            if ch == 27:
+                break
+
+if __name__ == '__main__':
+    print __doc__
+
+    import sys
+    try: video_src = sys.argv[1]
+    except: video_src = 0
+    App(video_src).run()