diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb173592a6..c1e4e7c1af 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,8 +127,7 @@ OCV_OPTION(WITH_FFMPEG         "Include FFMPEG support"                      ON
 OCV_OPTION(WITH_GSTREAMER      "Include Gstreamer support"                   ON   IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_GSTREAMER_0_10 "Enable Gstreamer 0.10 support (instead of 1.x)"                              OFF )
 OCV_OPTION(WITH_GTK            "Include GTK support"                         ON   IF (UNIX AND NOT APPLE AND NOT ANDROID) )
-OCV_OPTION(WITH_ICV            "Include Intel IPP ICV support"               ON   IF (NOT IOS) )
-OCV_OPTION(WITH_IPP            "Include Intel IPP support"                   OFF  IF (NOT IOS) )
+OCV_OPTION(WITH_IPP            "Include Intel IPP support"                   ON   IF (NOT IOS) )
 OCV_OPTION(WITH_JASPER         "Include JPEG2K support"                      ON   IF (NOT IOS) )
 OCV_OPTION(WITH_JPEG           "Include JPEG support"                        ON)
 OCV_OPTION(WITH_WEBP           "Include WebP support"                        ON   IF (NOT IOS) )
@@ -158,7 +157,7 @@ OCV_OPTION(WITH_OPENCLAMDFFT   "Include AMD OpenCL FFT library support"      ON
 OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     ON   IF (NOT ANDROID AND NOT IOS) )
 OCV_OPTION(WITH_DIRECTX        "Include DirectX support"                     ON   IF WIN32 )
 OCV_OPTION(WITH_INTELPERC      "Include Intel Perceptual Computing support"  OFF  IF WIN32 )
-
+OCV_OPTION(WITH_IPP_A          "Include Intel IPP_A support"                 OFF  IF (MSVC OR X86 OR X86_64) )
 
 # OpenCV build components
 # ===================================================
@@ -917,13 +916,17 @@ endif(DEFINED WITH_INTELPERC)
 status("")
 status("  Other third-party libraries:")
 
-if(WITH_IPP AND IPP_FOUND)
-  status("    Use IPP:" "${IPP_LATEST_VERSION_STR} [${IPP_LATEST_VERSION_MAJOR}.${IPP_LATEST_VERSION_MINOR}.${IPP_LATEST_VERSION_BUILD}]")
+if(WITH_IPP AND HAVE_IPP)
+  status("    Use IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]")
   status("         at:" "${IPP_ROOT_DIR}")
 else()
-  status("    Use IPP:"   WITH_IPP AND NOT IPP_FOUND THEN "IPP not found" ELSE NO)
+  status("    Use IPP:"   WITH_IPP AND NOT HAVE_IPP THEN "IPP not found" ELSE NO)
 endif()
 
+if(DEFINED WITH_IPP_A)
+status("    Use IPP Async:"  HAVE_IPP_A       THEN "YES" ELSE NO)
+endif(DEFINED WITH_IPP_A)
+
 status("    Use Eigen:"      HAVE_EIGEN       THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO)
 status("    Use TBB:"        HAVE_TBB         THEN "YES (ver ${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR} interface ${TBB_INTERFACE_VERSION})" ELSE NO)
 status("    Use OpenMP:"     HAVE_OPENMP      THEN YES ELSE NO)
diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake
index 780ee51b87..9cb6ed0183 100644
--- a/cmake/OpenCVFindIPP.cmake
+++ b/cmake/OpenCVFindIPP.cmake
@@ -2,15 +2,12 @@
 # The script to detect Intel(R) Integrated Performance Primitives (IPP)
 # installation/package
 #
-# Windows host:
-# Run script like this before cmake:
-#   call "<IPP_INSTALL_DIR>\bin\ippvars.bat" intel64
-# for example:
-#   call "C:\Program Files (x86)\Intel\Composer XE\ipp\bin\ippvars.bat" intel64
+# By default, ICV version will be used.
+# To use standalone IPP update cmake command line:
+# cmake ... -DIPPROOT=<path> ...
+#
+# Note: Backward compatibility is broken, IPPROOT environment path is ignored
 #
-# Linux host:
-# Run script like this before cmake:
-#   source /opt/intel/ipp/bin/ippvars.sh [ia32|intel64]
 #
 # On return this will define:
 #
@@ -39,14 +36,6 @@ unset(IPP_VERSION_BUILD)
 
 set(IPP_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
 set(IPP_LIB_SUFFIX  ${CMAKE_STATIC_LIBRARY_SUFFIX})
-set(IPP_PREFIX "ipp")
-set(IPP_SUFFIX "_l")
-set(IPPCORE    "core") # core functionality
-set(IPPS       "s")    # signal processing
-set(IPPI       "i")    # image processing
-set(IPPCC      "cc")   # color conversion
-set(IPPCV      "cv")   # computer vision
-set(IPPVM      "vm")   # vector math
 
 set(IPP_X64 0)
 if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
@@ -56,21 +45,21 @@ if(CMAKE_CL_64)
     set(IPP_X64 1)
 endif()
 
-# This function detects IPP version by analyzing ippversion.h file
-macro(ipp_get_version _ROOT_DIR)
+# This function detects IPP version by analyzing .h file
+macro(ipp_get_version VERSION_FILE)
   unset(_VERSION_STR)
   unset(_MAJOR)
   unset(_MINOR)
   unset(_BUILD)
 
   # read IPP version info from file
-  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR1 REGEX "IPP_VERSION_MAJOR")
-  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR2 REGEX "IPP_VERSION_MINOR")
-  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_BUILD")
+  file(STRINGS ${VERSION_FILE} STR1 REGEX "IPP_VERSION_MAJOR")
+  file(STRINGS ${VERSION_FILE} STR2 REGEX "IPP_VERSION_MINOR")
+  file(STRINGS ${VERSION_FILE} STR3 REGEX "IPP_VERSION_BUILD")
   if("${STR3}" STREQUAL "")
-    file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_UPDATE")
+    file(STRINGS ${VERSION_FILE} STR3 REGEX "IPP_VERSION_UPDATE")
   endif()
-  file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR4 REGEX "IPP_VERSION_STR")
+  file(STRINGS ${VERSION_FILE} STR4 REGEX "IPP_VERSION_STR")
 
   # extract info and assign to variables
   string(REGEX MATCHALL "[0-9]+" _MAJOR ${STR1})
@@ -83,66 +72,92 @@ macro(ipp_get_version _ROOT_DIR)
   set(IPP_VERSION_MAJOR ${_MAJOR})
   set(IPP_VERSION_MINOR ${_MINOR})
   set(IPP_VERSION_BUILD ${_BUILD})
-
-  set(__msg)
-  if(EXISTS ${_ROOT_DIR}/include/ippicv.h)
-    ocv_assert(WITH_ICV AND NOT WITH_IPP)
-    set(__msg " ICV version")
-    set(HAVE_IPP_ICV_ONLY 1)
-  endif()
-
-  message(STATUS "found IPP: ${_MAJOR}.${_MINOR}.${_BUILD} [${_VERSION_STR}]${__msg}")
-  message(STATUS "at: ${_ROOT_DIR}")
 endmacro()
 
+macro(_ipp_not_supported)
+  message(STATUS ${ARGN})
+  unset(HAVE_IPP)
+  unset(HAVE_IPP_ICV_ONLY)
+  unset(IPP_VERSION_STR)
+  return()
+endmacro()
 
-# This function sets IPP_INCLUDE_DIRS and IPP_LIBRARIES variables
-macro(ipp_set_variables _LATEST_VERSION)
-  if(${_LATEST_VERSION} VERSION_LESS "7.0")
-    message(SEND_ERROR "IPP ${_LATEST_VERSION} is not supported")
-    unset(HAVE_IPP)
-    return()
+# This macro uses IPP_ROOT_DIR variable
+# TODO Cleanup code after ICV package stabilization
+macro(ipp_detect_version)
+  set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include)
+
+  set(__msg)
+  if(EXISTS ${IPP_ROOT_DIR}/ippicv.h)
+    set(__msg " (ICV version)")
+    set(HAVE_IPP_ICV_ONLY 1)
+    if(EXISTS ${IPP_ROOT_DIR}/ippversion.h)
+      _ipp_not_supported("Can't resolve IPP directory: ${IPP_ROOT_DIR}")
+    else()
+      ipp_get_version(${IPP_ROOT_DIR}/ippicv.h)
+    endif()
+    ocv_assert(IPP_VERSION_STR VERSION_GREATER "8.0")
+    set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/)
+  elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h)
+    ipp_get_version(${IPP_ROOT_DIR}/include/ippversion.h)
+    ocv_assert(IPP_VERSION_STR VERSION_GREATER "1.0")
+  else()
+    _ipp_not_supported("Can't resolve IPP directory: ${IPP_ROOT_DIR}")
   endif()
 
-  # set INCLUDE and LIB folders
-  set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include)
+  message(STATUS "found IPP${__msg}: ${_MAJOR}.${_MINOR}.${_BUILD} [${IPP_VERSION_STR}]")
+  message(STATUS "at: ${IPP_ROOT_DIR}")
+
+  if(${IPP_VERSION_STR} VERSION_LESS "7.0")
+    _ipp_not_supported("IPP ${IPP_VERSION_STR} is not supported")
+  endif()
+
+  set(HAVE_IPP 1)
+  if(EXISTS ${IPP_INCLUDE_DIRS}/ipp_redefine.h)
+    set(HAVE_IPP_REDEFINE 1)
+  else()
+    unset(HAVE_IPP_REDEFINE)
+  endif()
+
+  macro(_ipp_set_library_dir DIR)
+    if(NOT EXISTS ${DIR})
+      _ipp_not_supported("IPP library directory not found")
+    endif()
+    set(IPP_LIBRARY_DIR ${DIR})
+  endmacro()
 
   if(NOT HAVE_IPP_ICV_ONLY)
     if(APPLE)
-      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib)
+      _ipp_set_library_dir(${IPP_ROOT_DIR}/lib)
     elseif(IPP_X64)
-      if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
-        message(SEND_ERROR "IPP EM64T libraries not found")
-      endif()
-      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib/intel64)
+      _ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64)
     else()
-      if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32)
-        message(SEND_ERROR "IPP IA32 libraries not found")
-      endif()
-      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib/ia32)
+      _ipp_set_library_dir(${IPP_ROOT_DIR}/lib/ia32)
     endif()
   else()
-    if(APPLE)
-      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/macosx)
-    elseif(WIN32 AND NOT ARM)
-      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/windows)
-    elseif(UNIX)
-      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/linux)
+    if(EXISTS ${IPP_ROOT_DIR}/lib)
+      set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib)
     else()
-      message(MESSAGE "IPP ${_LATEST_VERSION} at ${IPP_ROOT_DIR} is not supported")
-      unset(HAVE_IPP)
-      return()
+      _ipp_not_supported("IPP ${IPP_VERSION_STR} at ${IPP_ROOT_DIR} is not supported")
     endif()
     if(X86_64)
-      set(IPP_LIBRARY_DIR ${IPP_LIBRARY_DIR}/intel64)
+      _ipp_set_library_dir(${IPP_LIBRARY_DIR}/intel64)
     else()
-      set(IPP_LIBRARY_DIR ${IPP_LIBRARY_DIR}/ia32)
+      _ipp_set_library_dir(${IPP_LIBRARY_DIR}/ia32)
     endif()
   endif()
 
+  macro(_ipp_add_library name)
+    if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+      list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+    else()
+      message(STATUS "Can't find IPP library: ${name}")
+    endif()
+  endmacro()
+
   set(IPP_PREFIX "ipp")
-  if(${_LATEST_VERSION} VERSION_LESS "8.0")
-    set(IPP_SUFFIX "_l")        # static not threaded libs suffix IPP 7.x
+  if(${IPP_VERSION_STR} VERSION_LESS "8.0")
+    set(IPP_SUFFIX "_l")      # static not threaded libs suffix IPP 7.x
   else()
     if(WIN32)
       set(IPP_SUFFIX "mt")    # static not threaded libs suffix IPP 8.x for Windows
@@ -150,78 +165,92 @@ macro(ipp_set_variables _LATEST_VERSION)
       set(IPP_SUFFIX "")      # static not threaded libs suffix IPP 8.x for Linux/OS X
     endif()
   endif()
-  set(IPPCORE "core")     # core functionality
-  set(IPPSP   "s")        # signal processing
-  set(IPPIP   "i")        # image processing
-  set(IPPCC   "cc")       # color conversion
-  set(IPPCV   "cv")       # computer vision
-  set(IPPVM   "vm")       # vector math
 
-  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
-  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
-  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCV}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
-  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPI}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
-  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPS}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
-  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCORE}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  if(HAVE_IPP_ICV_ONLY)
+    _ipp_add_library(icv)
+  else()
+    _ipp_add_library(core)
+    _ipp_add_library(s)
+    _ipp_add_library(i)
+    _ipp_add_library(cc)
+    _ipp_add_library(cv)
+    _ipp_add_library(vm)
+    _ipp_add_library(m)
 
-# FIXIT
-#  if(UNIX AND NOT HAVE_IPP_ICV_ONLY)
-#    get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../lib REALPATH)
-  if(UNIX)
-    if(NOT HAVE_IPP_ICV_ONLY)
+    if(UNIX)
       get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../lib REALPATH)
-    else()
-      set(INTEL_COMPILER_LIBRARY_DIR "/opt/intel/lib")
-    endif()
-    if(IPP_X64)
-      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
-        message(SEND_ERROR "Intel compiler EM64T libraries not found")
+      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR})
+        get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../compiler/lib REALPATH)
       endif()
-      set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
-    else()
-      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
-        message(SEND_ERROR "Intel compiler IA32 libraries not found")
+      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR})
+        _ipp_not_supported("IPP configuration error: can't find Intel compiler library dir ${INTEL_COMPILER_LIBRARY_DIR}")
       endif()
-      set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
-    endif()
-    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}irc${CMAKE_SHARED_LIBRARY_SUFFIX})
-    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}imf${CMAKE_SHARED_LIBRARY_SUFFIX})
-    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}svml${CMAKE_SHARED_LIBRARY_SUFFIX})
+      if(NOT APPLE)
+        if(IPP_X64)
+          if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
+            message(SEND_ERROR "Intel compiler EM64T libraries not found")
+          endif()
+          set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
+        else()
+          if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
+            message(SEND_ERROR "Intel compiler IA32 libraries not found")
+          endif()
+          set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
+        endif()
+      endif()
+
+      macro(_ipp_add_compiler_library name)
+        if (EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}${name}${CMAKE_SHARED_LIBRARY_SUFFIX})
+          list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}${name}${CMAKE_SHARED_LIBRARY_SUFFIX})
+        else()
+          message(STATUS "Can't find compiler library: ${name}")
+        endif()
+      endmacro()
+
+      _ipp_add_compiler_library(irc)
+      _ipp_add_compiler_library(imf)
+      _ipp_add_compiler_library(svml)
+    endif(UNIX)
   endif()
 
   #message(STATUS "IPP libs: ${IPP_LIBRARIES}")
 endmacro()
 
-if(WITH_IPP)
-  set(IPPPATH $ENV{IPPROOT})
-  if(UNIX)
-    list(APPEND IPPPATH /opt/intel/ipp)
-  endif()
-elseif(WITH_ICV)
-  if(DEFINED ENV{IPPICVROOT})
-    set(IPPPATH $ENV{IPPICVROOT})
-  else()
-    set(IPPPATH ${OpenCV_SOURCE_DIR}/3rdparty/ippicv)
+# OPENCV_IPP_PATH is an environment variable for internal usage only, do not use it
+if(DEFINED ENV{OPENCV_IPP_PATH} AND NOT DEFINED IPPROOT)
+  set(IPPROOT "$ENV{OPENCV_IPP_PATH}")
+endif()
+if(NOT DEFINED IPPROOT)
+  set(IPPROOT "${OpenCV_SOURCE_DIR}/3rdparty/ippicv")
+endif()
+
+# Try ICV
+find_path(
+    IPP_ICV_H_PATH
+    NAMES ippicv.h
+    PATHS ${IPPROOT}
+    DOC "The path to Intel(R) IPP ICV header files"
+    NO_DEFAULT_PATH
+    NO_CMAKE_PATH)
+set(IPP_ROOT_DIR ${IPP_ICV_H_PATH})
+
+if(NOT IPP_ICV_H_PATH)
+  # Try standalone IPP
+  find_path(
+      IPP_H_PATH
+      NAMES ippversion.h
+      PATHS ${IPPROOT}
+      PATH_SUFFIXES include
+      DOC "The path to Intel(R) IPP header files"
+      NO_DEFAULT_PATH
+      NO_CMAKE_PATH)
+  if(IPP_H_PATH)
+    get_filename_component(IPP_ROOT_DIR ${IPP_H_PATH} PATH)
   endif()
 endif()
 
-
-find_path(
-    IPP_H_PATH
-    NAMES ippversion.h
-    PATHS ${IPPPATH}
-    PATH_SUFFIXES include
-    DOC "The path to Intel(R) IPP header files"
-    NO_DEFAULT_PATH
-    NO_CMAKE_PATH)
-
-if(IPP_H_PATH)
-    set(HAVE_IPP 1)
-
-    get_filename_component(IPP_ROOT_DIR ${IPP_H_PATH} PATH)
-
-    ipp_get_version(${IPP_ROOT_DIR})
-    ipp_set_variables(${IPP_VERSION_STR})
+if(IPP_ROOT_DIR)
+  ipp_detect_version()
 endif()
 
 
diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake
index 7198326351..a046b8fc34 100644
--- a/cmake/OpenCVFindLibsPerf.cmake
+++ b/cmake/OpenCVFindLibsPerf.cmake
@@ -8,7 +8,7 @@ if(WITH_TBB)
 endif(WITH_TBB)
 
 # --- IPP ---
-if(WITH_IPP OR WITH_ICV)
+if(WITH_IPP)
   include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIPP.cmake")
   if(HAVE_IPP)
     ocv_include_directories(${IPP_INCLUDE_DIRS})
diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp
index 45351d1a86..593ee9fd55 100644
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@@ -211,8 +211,8 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
 
 #ifdef HAVE_IPP
 #  ifdef HAVE_IPP_ICV_ONLY
+#    include "ipp_redefine.h"
 #    include "ippicv.h"
-#    include "ippicv_fn_map.h"
 #  else
 #    include "ipp.h"
 #  endif
@@ -223,6 +223,13 @@ static inline IppiSize ippiSize(int width, int height)
     IppiSize size = { width, height };
     return size;
 }
+
+static inline IppiSize ippiSize(const cv::Size & _size)
+{
+    IppiSize size = { _size.width, _size.height };
+    return size;
+}
+
 #else
 #  define IPP_VERSION_X100 0
 #endif
diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index ecc2ca0648..aa9469c04c 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -460,7 +460,7 @@ static void add8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0))
         return;
 #endif
     (vBinOp<uchar, OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@@ -479,7 +479,7 @@ static void add16u( const ushort* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0))
         return;
 #endif
     (vBinOp<ushort, OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, sz));
@@ -491,7 +491,7 @@ static void add16s( const short* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0))
         return;
 #endif
     (vBinOp<short, OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, sz));
@@ -510,7 +510,7 @@ static void add32f( const float* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp32<float, OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, sz));
@@ -529,7 +529,7 @@ static void sub8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0))
         return;
 #endif
     (vBinOp<uchar, OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@@ -548,7 +548,7 @@ static void sub16u( const ushort* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0))
         return;
 #endif
     (vBinOp<ushort, OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, sz));
@@ -560,7 +560,7 @@ static void sub16s( const short* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0))
         return;
 #endif
     (vBinOp<short, OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, sz));
@@ -579,7 +579,7 @@ static void sub32f( const float* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp32<float, OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, sz));
@@ -801,7 +801,7 @@ static void absdiff8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp<uchar, OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@@ -820,7 +820,7 @@ static void absdiff16u( const ushort* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp<ushort, OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, sz));
@@ -846,7 +846,7 @@ static void absdiff32f( const float* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp32<float, OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, sz));
@@ -866,7 +866,7 @@ static void and8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp<uchar, OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@@ -878,7 +878,7 @@ static void or8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp<uchar, OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@@ -890,7 +890,7 @@ static void xor8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
     fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp<uchar, OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@@ -901,8 +901,8 @@ static void not8u( const uchar* src1, size_t step1,
                    uchar* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-    fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2;
-    if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz))
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void)src2;
+    if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, ippiSize(sz)))
         return;
 #endif
     (vBinOp<uchar, OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@@ -2386,7 +2386,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
     if( op  >= 0 )
     {
         fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
             return;
     }
 #endif
@@ -2469,7 +2469,7 @@ static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t
     if( op  >= 0 )
     {
         fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
             return;
     }
 #endif
@@ -2484,7 +2484,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
     if( op  > 0 )
     {
         fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
             return;
     }
 #endif
@@ -2590,7 +2590,7 @@ static void cmp32f(const float* src1, size_t step1, const float* src2, size_t st
     if( op  >= 0 )
     {
         fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
             return;
     }
 #endif
diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
index cd5cf9b733..155ca67d6f 100644
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@@ -1079,6 +1079,33 @@ dtype* dst, size_t dstep, Size size, double* scale) \
     cvtScale_(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
 }
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
+static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
+                         dtype* dst, size_t dstep, Size size, double*) \
+{ \
+    if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
+        return; \
+    cvt_(src, sstep, dst, dstep, size); \
+}
+
+#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
+static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
+                         dtype* dst, size_t dstep, Size size, double*) \
+{ \
+    if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
+        return; \
+    cvt_(src, sstep, dst, dstep, size); \
+}
+#else
+#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
+static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
+                         dtype* dst, size_t dstep, Size size, double*) \
+{ \
+    cvt_(src, sstep, dst, dstep, size); \
+}
+#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
+#endif
 
 #define DEF_CVT_FUNC(suffix, stype, dtype) \
 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
@@ -1089,7 +1116,7 @@ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
 
 #define DEF_CPY_FUNC(suffix, stype) \
 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
-stype* dst, size_t dstep, Size size, double*) \
+                         stype* dst, size_t dstep, Size size, double*) \
 { \
     cpy_(src, sstep, dst, dstep, size); \
 }
@@ -1160,48 +1187,48 @@ DEF_CVT_SCALE_FUNC(32f64f, float, double, double)
 DEF_CVT_SCALE_FUNC(64f,    double, double, double)
 
 DEF_CPY_FUNC(8u,     uchar)
-DEF_CVT_FUNC(8s8u,   schar, uchar)
-DEF_CVT_FUNC(16u8u,  ushort, uchar)
-DEF_CVT_FUNC(16s8u,  short, uchar)
-DEF_CVT_FUNC(32s8u,  int, uchar)
-DEF_CVT_FUNC(32f8u,  float, uchar)
+DEF_CVT_FUNC_F(8s8u,   schar, uchar, 8s8u_C1Rs)
+DEF_CVT_FUNC_F(16u8u,  ushort, uchar, 16u8u_C1R)
+DEF_CVT_FUNC_F(16s8u,  short, uchar, 16s8u_C1R)
+DEF_CVT_FUNC_F(32s8u,  int, uchar, 32s8u_C1R)
+DEF_CVT_FUNC_F2(32f8u,  float, uchar, 32f8u_C1RSfs)
 DEF_CVT_FUNC(64f8u,  double, uchar)
 
-DEF_CVT_FUNC(8u8s,   uchar, schar)
-DEF_CVT_FUNC(16u8s,  ushort, schar)
-DEF_CVT_FUNC(16s8s,  short, schar)
-DEF_CVT_FUNC(32s8s,  int, schar)
-DEF_CVT_FUNC(32f8s,  float, schar)
+DEF_CVT_FUNC_F2(8u8s,   uchar, schar, 8u8s_C1RSfs)
+DEF_CVT_FUNC_F2(16u8s,  ushort, schar, 16u8s_C1RSfs)
+DEF_CVT_FUNC_F2(16s8s,  short, schar, 16s8s_C1RSfs)
+DEF_CVT_FUNC_F(32s8s,  int, schar, 32s8s_C1R)
+DEF_CVT_FUNC_F2(32f8s,  float, schar, 32f8s_C1RSfs)
 DEF_CVT_FUNC(64f8s,  double, schar)
 
-DEF_CVT_FUNC(8u16u,  uchar, ushort)
-DEF_CVT_FUNC(8s16u,  schar, ushort)
+DEF_CVT_FUNC_F(8u16u,  uchar, ushort, 8u16u_C1R)
+DEF_CVT_FUNC_F(8s16u,  schar, ushort, 8s16u_C1Rs)
 DEF_CPY_FUNC(16u,    ushort)
-DEF_CVT_FUNC(16s16u, short, ushort)
-DEF_CVT_FUNC(32s16u, int, ushort)
-DEF_CVT_FUNC(32f16u, float, ushort)
+DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
+DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
+DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
 DEF_CVT_FUNC(64f16u, double, ushort)
 
-DEF_CVT_FUNC(8u16s,  uchar, short)
-DEF_CVT_FUNC(8s16s,  schar, short)
-DEF_CVT_FUNC(16u16s, ushort, short)
-DEF_CVT_FUNC(32s16s, int, short)
-DEF_CVT_FUNC(32f16s, float, short)
+DEF_CVT_FUNC_F(8u16s,  uchar, short, 8u16s_C1R)
+DEF_CVT_FUNC_F(8s16s,  schar, short, 8s16s_C1R)
+DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
+DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
+DEF_CVT_FUNC_F2(32f16s, float, short, 32f16s_C1RSfs)
 DEF_CVT_FUNC(64f16s, double, short)
 
-DEF_CVT_FUNC(8u32s,  uchar, int)
-DEF_CVT_FUNC(8s32s,  schar, int)
-DEF_CVT_FUNC(16u32s, ushort, int)
-DEF_CVT_FUNC(16s32s, short, int)
+DEF_CVT_FUNC_F(8u32s,  uchar, int, 8u32s_C1R)
+DEF_CVT_FUNC_F(8s32s,  schar, int, 8s32s_C1R)
+DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
+DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
 DEF_CPY_FUNC(32s,    int)
-DEF_CVT_FUNC(32f32s, float, int)
+DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
 DEF_CVT_FUNC(64f32s, double, int)
 
-DEF_CVT_FUNC(8u32f,  uchar, float)
-DEF_CVT_FUNC(8s32f,  schar, float)
-DEF_CVT_FUNC(16u32f, ushort, float)
-DEF_CVT_FUNC(16s32f, short, float)
-DEF_CVT_FUNC(32s32f, int, float)
+DEF_CVT_FUNC_F(8u32f,  uchar, float, 8u32f_C1R)
+DEF_CVT_FUNC_F(8s32f,  schar, float, 8s32f_C1R)
+DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
+DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
+DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
 DEF_CVT_FUNC(64f32f, double, float)
 
 DEF_CVT_FUNC(8u64f,  uchar, double)
@@ -1434,7 +1461,7 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)
         Size sz((int)(it.size*cn), 1);
 
         for( size_t i = 0; i < it.nplanes; i++, ++it )
-            func(ptrs[0], 0, 0, 0, ptrs[1], 0, sz, scale);
+            func(ptrs[0], 1, 0, 0, ptrs[1], 1, sz, scale);
     }
 }
 
diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 5ac5f22c58..202e7a9225 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -495,25 +495,17 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
     else
         kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;
 
-    Size size = _src.size();
-    int cols = size.width, rows = size.height;
-    if ((cols == 1 && flipType == FLIP_COLS) ||
-            (rows == 1 && flipType == FLIP_ROWS) ||
-            (rows == 1 && cols == 1 && flipType == FLIP_BOTH))
-    {
-        _src.copyTo(_dst);
-        return true;
-    }
-
     ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
         format( "-D T=%s -D T1=%s -D cn=%d", ocl::memopTypeToStr(type),
                 ocl::memopTypeToStr(depth), cn));
     if (k.empty())
         return false;
 
+    Size size = _src.size();
     _dst.create(size, type);
     UMat src = _src.getUMat(), dst = _dst.getUMat();
 
+    int cols = size.width, rows = size.height;
     cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols;
     rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows;
 
@@ -531,13 +523,59 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
 void flip( InputArray _src, OutputArray _dst, int flip_mode )
 {
     CV_Assert( _src.dims() <= 2 );
+    Size size = _src.size();
 
-    CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src,_dst, flip_mode))
+    if (flip_mode < 0)
+    {
+        if (size.width == 1)
+            flip_mode = 0;
+        if (size.height == 1)
+            flip_mode = 1;
+    }
+
+    if ((size.width == 1 && flip_mode > 0) ||
+        (size.height == 1 && flip_mode == 0) ||
+        (size.height == 1 && size.width == 1 && flip_mode < 0))
+    {
+        return _src.copyTo(_dst);
+    }
+
+    CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode))
 
     Mat src = _src.getMat();
-    _dst.create( src.size(), src.type() );
+    int type = src.type();
+    _dst.create( size, type );
     Mat dst = _dst.getMat();
-    size_t esz = src.elemSize();
+    size_t esz = CV_ELEM_SIZE(type);
+
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    typedef IppStatus (CV_STDCALL * ippiMirror)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize, IppiAxis flip);
+    ippiMirror ippFunc =
+        type == CV_8UC1 ? (ippiMirror)ippiMirror_8u_C1R :
+        type == CV_8UC3 ? (ippiMirror)ippiMirror_8u_C3R :
+        type == CV_8UC4 ? (ippiMirror)ippiMirror_8u_C4R :
+        type == CV_16UC1 ? (ippiMirror)ippiMirror_16u_C1R :
+        type == CV_16UC3 ? (ippiMirror)ippiMirror_16u_C3R :
+        type == CV_16UC4 ? (ippiMirror)ippiMirror_16u_C4R :
+        type == CV_16SC1 ? (ippiMirror)ippiMirror_16s_C1R :
+        type == CV_16SC3 ? (ippiMirror)ippiMirror_16s_C3R :
+        type == CV_16SC4 ? (ippiMirror)ippiMirror_16s_C4R :
+        type == CV_32SC1 ? (ippiMirror)ippiMirror_32s_C1R :
+        type == CV_32SC3 ? (ippiMirror)ippiMirror_32s_C3R :
+        type == CV_32SC4 ? (ippiMirror)ippiMirror_32s_C4R :
+        type == CV_32FC1 ? (ippiMirror)ippiMirror_32f_C1R :
+        type == CV_32FC3 ? (ippiMirror)ippiMirror_32f_C3R :
+        type == CV_32FC4 ? (ippiMirror)ippiMirror_32f_C4R : 0;
+    IppiAxis axis = flip_mode == 0 ? ippAxsHorizontal :
+        flip_mode > 0 ? ippAxsVertical : ippAxsBoth;
+
+    if (ippFunc != 0)
+    {
+        IppStatus status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, ippiSize(src.cols, src.rows), axis);
+        if (status >= 0)
+            return;
+    }
+#endif
 
     if( flip_mode <= 0 )
         flipVert( src.data, src.step, dst.data, dst.step, src.size(), esz );
diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp
index 12ba4fa5b1..65f78de085 100644
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@@ -238,6 +238,12 @@ float  cubeRoot( float value )
 
 static void Magnitude_32f(const float* x, const float* y, float* mag, int len)
 {
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    IppStatus status = ippsMagnitude_32f(x, y, mag, len);
+    if (status >= 0)
+        return;
+#endif
+
     int i = 0;
 
 #if CV_SSE
@@ -264,6 +270,12 @@ static void Magnitude_32f(const float* x, const float* y, float* mag, int len)
 
 static void Magnitude_64f(const double* x, const double* y, double* mag, int len)
 {
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    IppStatus status = ippsMagnitude_64f(x, y, mag, len);
+    if (status >= 0)
+        return;
+#endif
+
     int i = 0;
 
 #if CV_SSE2
@@ -291,6 +303,11 @@ static void Magnitude_64f(const double* x, const double* y, double* mag, int len
 
 static void InvSqrt_32f(const float* src, float* dst, int len)
 {
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
+        return;
+#endif
+
     int i = 0;
 
 #if CV_SSE
@@ -334,6 +351,10 @@ static void InvSqrt_64f(const double* src, double* dst, int len)
 
 static void Sqrt_32f(const float* src, float* dst, int len)
 {
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    if (ippsSqrt_32f_A21(src, dst, len) >= 0)
+        return;
+#endif
     int i = 0;
 
 #if CV_SSE
@@ -363,6 +384,11 @@ static void Sqrt_32f(const float* src, float* dst, int len)
 
 static void Sqrt_64f(const double* src, double* dst, int len)
 {
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    if (ippsSqrt_64f_A50(src, dst, len) >= 0)
+        return;
+#endif
+
     int i = 0;
 
 #if CV_SSE2
@@ -729,6 +755,22 @@ void polarToCart( InputArray src1, InputArray src2,
     dst2.create( Angle.dims, Angle.size, type );
     Mat X = dst1.getMat(), Y = dst2.getMat();
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
+    {
+        typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
+                                                         void * pDstRe, void * pDstIm, int len);
+        ippsPolarToCart ippFunc =
+        depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f :
+        depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0;
+        CV_Assert(ippFunc != 0);
+
+        IppStatus status = ippFunc(Mag.data, Angle.data, X.data, Y.data, static_cast<int>(cn * X.total()));
+        if (status >= 0)
+            return;
+    }
+#endif
+
     const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
     uchar* ptrs[4];
     NAryMatIterator it(arrays, ptrs);
@@ -2119,6 +2161,29 @@ void pow( InputArray _src, double power, OutputArray _dst )
             _src.copyTo(_dst);
             return;
         case 2:
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+            if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) || (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) ))
+            {
+                Mat src = _src.getMat();
+                _dst.create( src.dims, src.size, type );
+                Mat dst = _dst.getMat();
+
+                Size size = src.size();
+                int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type);
+                if (src.isContinuous() && dst.isContinuous())
+                {
+                    size.width = (int)src.total();
+                    size.height = 1;
+                    srcstep = dststep = (int)src.total() * esz;
+                }
+                size.width *= cn;
+
+                IppStatus status = ippiSqr_32f_C1R((const Ipp32f *)src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+
+                if (status >= 0)
+                    return;
+            }
+#endif
             if (same)
                 multiply(_dst, _dst, _dst);
             else
@@ -2168,6 +2233,18 @@ void pow( InputArray _src, double power, OutputArray _dst )
     }
     else
     {
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+        if (src.isContinuous() && dst.isContinuous())
+        {
+            IppStatus status = depth == CV_32F ?
+                        ippsPowx_32f_A21((const Ipp32f *)src.data, (Ipp32f)power, (Ipp32f*)dst.data, (Ipp32s)(src.total() * cn)) :
+                        ippsPowx_64f_A50((const Ipp64f *)src.data, power, (Ipp64f*)dst.data, (Ipp32s)(src.total() * cn));
+
+            if (status >= 0)
+                return;
+        }
+#endif
+
         int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
         size_t esz1 = src.elemSize1();
 
diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp
index 8891bb05f8..23735194d3 100644
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@@ -2212,7 +2212,7 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray
     Mat src1 = _src1.getMat(), src2 = _src2.getMat();
     CV_Assert(src1.size == src2.size);
 
-    _dst.create(src1.dims, src1.size, src1.type());
+    _dst.create(src1.dims, src1.size, type);
     Mat dst = _dst.getMat();
 
     float falpha = (float)alpha;
@@ -2220,9 +2220,16 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray
 
     ScaleAddFunc func = depth == CV_32F ? (ScaleAddFunc)scaleAdd_32f : (ScaleAddFunc)scaleAdd_64f;
 
-    if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() )
+    if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous())
     {
         size_t len = src1.total()*cn;
+#if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY
+        if (depth == CV_32F &&
+                ippmSaxpy_vava_32f((const Ipp32f *)src1.data, (int)src1.step, sizeof(Ipp32f), falpha,
+                (const Ipp32f *)src2.data, (int)src2.step, sizeof(Ipp32f),
+                (Ipp32f *)dst.data, (int)dst.step, sizeof(Ipp32f), (int)len, 1) >= 0)
+            return;
+#endif
         func(src1.data, src2.data, dst.data, (int)len, palpha);
         return;
     }
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index 45ae3d5124..4efba46548 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -2967,6 +2967,30 @@ void cv::transpose( InputArray _src, OutputArray _dst )
         return;
     }
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    typedef IppStatus (CV_STDCALL * ippiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize);
+    ippiTranspose ippFunc =
+    type == CV_8UC1 ? (ippiTranspose)ippiTranspose_8u_C1R :
+    type == CV_8UC3 ? (ippiTranspose)ippiTranspose_8u_C3R :
+    type == CV_8UC4 ? (ippiTranspose)ippiTranspose_8u_C4R :
+    type == CV_16UC1 ? (ippiTranspose)ippiTranspose_16u_C1R :
+    type == CV_16UC3 ? (ippiTranspose)ippiTranspose_16u_C3R :
+    type == CV_16UC4 ? (ippiTranspose)ippiTranspose_16u_C4R :
+    type == CV_16SC1 ? (ippiTranspose)ippiTranspose_16s_C1R :
+    type == CV_16SC3 ? (ippiTranspose)ippiTranspose_16s_C3R :
+    type == CV_16SC4 ? (ippiTranspose)ippiTranspose_16s_C4R :
+    type == CV_32SC1 ? (ippiTranspose)ippiTranspose_32s_C1R :
+    type == CV_32SC3 ? (ippiTranspose)ippiTranspose_32s_C3R :
+    type == CV_32SC4 ? (ippiTranspose)ippiTranspose_32s_C4R :
+    type == CV_32FC1 ? (ippiTranspose)ippiTranspose_32f_C1R :
+    type == CV_32FC3 ? (ippiTranspose)ippiTranspose_32f_C3R :
+    type == CV_32FC4 ? (ippiTranspose)ippiTranspose_32f_C4R : 0;
+
+    IppiSize roiSize = { src.cols, src.rows };
+    if (ippFunc != 0 && ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, roiSize) >= 0)
+        return;
+#endif
+
     if( dst.data == src.data )
     {
         TransposeInplaceFunc func = transposeInplaceTab[esz];
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index ffea804ed9..24190c52c9 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -1581,7 +1581,7 @@ void finish()
 
 #define IMPLEMENT_REFCOUNTABLE() \
     void addref() { CV_XADD(&refcount, 1); } \
-    void release() { if( CV_XADD(&refcount, -1) == 1 ) delete this; } \
+    void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
     int refcount
 
 /////////////////////////////////////////// Platform /////////////////////////////////////////////
diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp
index 0e3d44ed6b..ecc0f76cb8 100644
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -933,10 +933,10 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
             dcn_stddev = (int)stddev.total();
             pstddev = (Ipp64f *)stddev.data;
         }
-        for( int k = cn; k < dcn_mean; k++ )
-            pmean[k] = 0;
-        for( int k = cn; k < dcn_stddev; k++ )
-            pstddev[k] = 0;
+        for( int c = cn; c < dcn_mean; c++ )
+            pmean[c] = 0;
+        for( int c = cn; c < dcn_stddev; c++ )
+            pstddev[c] = 0;
         IppiSize sz = { cols, rows };
         int type = src.type();
         if( !mask.empty() )
@@ -2016,6 +2016,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
     size_t total_size = src.total();
     int rows = src.size[0], cols = (int)(total_size/rows);
+
     if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
         && cols > 0 && (size_t)rows*cols == total_size
         && (normType == NORM_INF || normType == NORM_L1 ||
@@ -2030,7 +2031,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
                 normType == NORM_INF ?
                 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
                 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
-                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
+//                type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
                 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
                 0) :
             normType == NORM_L1 ?
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index 4b3efce4a8..cef4db3c2c 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -918,16 +918,22 @@ public:
     #pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
 #endif
 
-BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID);
-
+extern "C"
 BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved)
 {
     if (fdwReason == DLL_THREAD_DETACH || fdwReason == DLL_PROCESS_DETACH)
     {
         if (lpReserved != NULL) // called after ExitProcess() call
+        {
             cv::__termination = true;
-        cv::deleteThreadAllocData();
-        cv::deleteThreadData();
+        }
+        else
+        {
+            // Not allowed to free resources if lpReserved is non-null
+            // http://msdn.microsoft.com/en-us/library/windows/desktop/ms682583.aspx
+            cv::deleteThreadAllocData();
+            cv::deleteThreadData();
+        }
     }
     return TRUE;
 }
diff --git a/modules/core/test/ocl/test_matrix_operation.cpp b/modules/core/test/ocl/test_matrix_operation.cpp
index 901609538e..ee591e9bd9 100644
--- a/modules/core/test/ocl/test_matrix_operation.cpp
+++ b/modules/core/test/ocl/test_matrix_operation.cpp
@@ -107,6 +107,7 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool)
 {
     int depth, cn;
     bool use_roi, use_mask;
+    Scalar val;
 
     TEST_DECLARE_INPUT_PARAMETER(src);
     TEST_DECLARE_INPUT_PARAMETER(mask);
@@ -143,6 +144,8 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool)
         if (use_mask)
             UMAT_UPLOAD_INPUT_PARAMETER(mask);
         UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
+
+        val = randomScalar(-MAX_VALUE, MAX_VALUE);
     }
 };
 
@@ -168,12 +171,38 @@ OCL_TEST_P(CopyTo, Accuracy)
     }
 }
 
+typedef CopyTo SetTo;
+
+OCL_TEST_P(SetTo, Accuracy)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        if (use_mask)
+        {
+            OCL_OFF(dst_roi.setTo(val, mask_roi));
+            OCL_ON(udst_roi.setTo(val, umask_roi));
+        }
+        else
+        {
+            OCL_OFF(dst_roi.setTo(val));
+            OCL_ON(udst_roi.setTo(val));
+        }
+
+        OCL_EXPECT_MATS_NEAR(dst, 0);
+    }
+}
+
 OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
                                 OCL_ALL_DEPTHS, OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 
 OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
                                 OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
 
+OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
+                                OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
+
 } } // namespace cvtest::ocl
 
 #endif
diff --git a/modules/highgui/test/test_ffmpeg.cpp b/modules/highgui/test/test_ffmpeg.cpp
index f8491d1a69..61fc3d49a4 100644
--- a/modules/highgui/test/test_ffmpeg.cpp
+++ b/modules/highgui/test/test_ffmpeg.cpp
@@ -329,7 +329,7 @@ public:
                 EXPECT_EQ(reference.depth(), actual.depth());
                 EXPECT_EQ(reference.channels(), actual.channels());
 
-                double psnr = PSNR(actual, reference);
+                double psnr = cvtest::PSNR(actual, reference);
                 if (psnr < eps)
                 {
     #define SUM cvtest::TS::SUMMARY
diff --git a/modules/highgui/test/test_video_io.cpp b/modules/highgui/test/test_video_io.cpp
index cacfde0b3c..f380e0d26b 100644
--- a/modules/highgui/test/test_video_io.cpp
+++ b/modules/highgui/test/test_video_io.cpp
@@ -198,7 +198,7 @@ void CV_HighGuiTest::ImageTest(const string& dir)
         }
 
         const double thresDbell = 20;
-        double psnr = PSNR(loaded, image);
+        double psnr = cvtest::PSNR(loaded, image);
         if (psnr < thresDbell)
         {
             ts->printf(ts->LOG, "Reading image from file: too big difference (=%g) with fmt=%s\n", psnr, ext.c_str());
@@ -235,7 +235,7 @@ void CV_HighGuiTest::ImageTest(const string& dir)
             continue;
         }
 
-        psnr = PSNR(buf_loaded, image);
+        psnr = cvtest::PSNR(buf_loaded, image);
 
         if (psnr < thresDbell)
         {
@@ -316,7 +316,7 @@ void CV_HighGuiTest::VideoTest(const string& dir, const cvtest::VideoFormat& fmt
         Mat img = frames[i];
         Mat img1 = cv::cvarrToMat(ipl1);
 
-        double psnr = PSNR(img1, img);
+        double psnr = cvtest::PSNR(img1, img);
         if (psnr < thresDbell)
         {
             ts->printf(ts->LOG, "Too low frame %d psnr = %gdb\n", i, psnr);
@@ -371,7 +371,7 @@ void CV_HighGuiTest::SpecificImageTest(const string& dir)
         }
 
         const double thresDbell = 20;
-        double psnr = PSNR(loaded, image);
+        double psnr = cvtest::PSNR(loaded, image);
         if (psnr < thresDbell)
         {
             ts->printf(ts->LOG, "Reading image from file: too big difference (=%g) with fmt=bmp\n", psnr);
@@ -408,7 +408,7 @@ void CV_HighGuiTest::SpecificImageTest(const string& dir)
             continue;
         }
 
-        psnr = PSNR(buf_loaded, image);
+        psnr = cvtest::PSNR(buf_loaded, image);
 
         if (psnr < thresDbell)
         {
@@ -521,7 +521,7 @@ void CV_HighGuiTest::SpecificVideoTest(const string& dir, const cvtest::VideoFor
         Mat img = images[i];
 
         const double thresDbell = 40;
-        double psnr = PSNR(img, frame);
+        double psnr = cvtest::PSNR(img, frame);
 
         if (psnr > thresDbell)
         {
diff --git a/modules/highgui/test/test_video_pos.cpp b/modules/highgui/test/test_video_pos.cpp
index a502040efb..c8fe4050da 100644
--- a/modules/highgui/test/test_video_pos.cpp
+++ b/modules/highgui/test/test_video_pos.cpp
@@ -160,7 +160,7 @@ public:
                     return;
                 }
 
-                double err = PSNR(img, img0);
+                double err = cvtest::PSNR(img, img0);
 
                 if( err < 20 )
                 {
diff --git a/modules/imgproc/src/accum.cpp b/modules/imgproc/src/accum.cpp
index f130f34da9..74a63e916c 100644
--- a/modules/imgproc/src/accum.cpp
+++ b/modules/imgproc/src/accum.cpp
@@ -457,6 +457,56 @@ void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _m
 
     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
+    {
+        typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
+        typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
+                                                           int srcDstStep, IppiSize roiSize);
+        ippiAddSquare ippFunc = 0;
+        ippiAddSquareMask ippFuncMask = 0;
+
+        if (mask.empty())
+        {
+            ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0;
+        }
+        else if (scn == 1)
+        {
+            ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0;
+        }
+
+        if (ippFunc || ippFuncMask)
+        {
+            IppStatus status = ippStsNoErr;
+
+            Size size = src.size();
+            int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
+            if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
+            {
+                srcstep = static_cast<int>(src.total() * src.elemSize());
+                dststep = static_cast<int>(dst.total() * dst.elemSize());
+                maskstep = static_cast<int>(mask.total() * mask.elemSize());
+                size.width = static_cast<int>(src.total());
+                size.height = 1;
+            }
+            size.width *= scn;
+
+            if (mask.empty())
+                status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+            else
+                status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep,
+                                     (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+
+            if (status >= 0)
+                return;
+        }
+    }
+#endif
+
     int fidx = getAccTabIdx(sdepth, ddepth);
     AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0;
     CV_Assert( func != 0 );
@@ -485,6 +535,59 @@ void cv::accumulateProduct( InputArray _src1, InputArray _src2,
 
     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous()))
+    {
+        typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2,
+                                                        int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
+        typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step,
+                                                            const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
+        ippiAddProduct ippFunc = 0;
+        ippiAddProductMask ippFuncMask = 0;
+
+        if (mask.empty())
+        {
+            ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0;
+        }
+        else if (scn == 1)
+        {
+            ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0;
+        }
+
+        if (ippFunc || ippFuncMask)
+        {
+            IppStatus status = ippStsNoErr;
+
+            Size size = src1.size();
+            int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step;
+            if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous())
+            {
+                src1step = static_cast<int>(src1.total() * src1.elemSize());
+                src2step = static_cast<int>(src2.total() * src2.elemSize());
+                dststep = static_cast<int>(dst.total() * dst.elemSize());
+                maskstep = static_cast<int>(mask.total() * mask.elemSize());
+                size.width = static_cast<int>(src1.total());
+                size.height = 1;
+            }
+            size.width *= scn;
+
+            if (mask.empty())
+                status = ippFunc(src1.data, src1step, src2.data, src2step, (Ipp32f *)dst.data,
+                                 dststep, ippiSize(size.width, size.height));
+            else
+                status = ippFuncMask(src1.data, src1step, src2.data, src2step, (Ipp8u *)mask.data, maskstep,
+                                     (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+
+            if (status >= 0)
+                return;
+        }
+    }
+#endif
+
     int fidx = getAccTabIdx(sdepth, ddepth);
     AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0;
     CV_Assert( func != 0 );
@@ -512,6 +615,58 @@ void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst,
 
     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous()))
+    {
+        typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep,
+                                                         IppiSize roiSize, Ipp32f alpha);
+        typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask,
+                                                             int maskStep, Ipp32f * pSrcDst,
+                                                             int srcDstStep, IppiSize roiSize, Ipp32f alpha);
+        ippiAddWeighted ippFunc = 0;
+        ippiAddWeightedMask ippFuncMask = 0;
+
+        if (mask.empty())
+        {
+            ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0;
+        }
+        else if (scn == 1)
+        {
+            ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0;
+        }
+
+        if (ippFunc || ippFuncMask)
+        {
+            IppStatus status = ippStsNoErr;
+
+            Size size = src.size();
+            int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
+            if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
+            {
+                srcstep = static_cast<int>(src.total() * src.elemSize());
+                dststep = static_cast<int>(dst.total() * dst.elemSize());
+                maskstep = static_cast<int>(mask.total() * mask.elemSize());
+                size.width = static_cast<int>((int)src.total());
+                size.height = 1;
+            }
+            size.width *= scn;
+
+            if (mask.empty())
+                status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
+            else
+                status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep,
+                                     (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
+
+            if (status >= 0)
+                return;
+        }
+    }
+#endif
+
     int fidx = getAccTabIdx(sdepth, ddepth);
     AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0;
     CV_Assert( func != 0 );
diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp
index 8ab7e4929f..287a188807 100644
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -200,12 +200,14 @@ void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
 }
 
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+
 typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *);
 typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize);
 typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *);
 
 template <typename Cvt>
-class CvtColorIPPLoop_Invoker : public ParallelLoopBody
+class CvtColorIPPLoop_Invoker :
+        public ParallelLoopBody
 {
 public:
 
@@ -251,8 +253,8 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
         source = temp;
     }
     bool ok;
-    parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok), source.total()/(double)(1<<16) );
-    //ok = cvt(src.ptr<uchar>(0), (int)src.step[0], dst.ptr<uchar>(0), (int)dst.step[0], src.cols, src.rows);
+    parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok),
+                  source.total()/(double)(1<<16) );
     return ok;
 }
 
@@ -298,7 +300,7 @@ static ippiReorderFunc ippiSwapChannelsC3RTab[] =
     0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
 };
 
-#if (IPP_VERSION_X100 >= 801)
+#if !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 801
 static ippiReorderFunc ippiSwapChannelsC4RTab[] =
 {
     (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0,
@@ -308,8 +310,8 @@ static ippiReorderFunc ippiSwapChannelsC4RTab[] =
 
 static ippiColor2GrayFunc ippiColor2GrayC3Tab[] =
 {
-    (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0,
-    0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0
+    /*(ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R*/ 0, 0, /*(ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R*/ 0, 0,
+    0, /*(ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R*/ 0, 0, 0
 };
 
 static ippiColor2GrayFunc ippiColor2GrayC4Tab[] =
@@ -339,18 +341,18 @@ static ippiGeneralFunc ippiCopyP3C3RTab[] =
 static ippiGeneralFunc ippiRGB2XYZTab[] =
 {
     (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0,
-    0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0
+    0, /*(ippiGeneralFunc)ippiRGBToXYZ_32f_C3R*/ 0, 0, 0
 };
 
 static ippiGeneralFunc ippiXYZ2RGBTab[] =
 {
     (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0,
-    0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0
+    0, /*(ippiGeneralFunc)ippiXYZToRGB_32f_C3R*/ 0, 0, 0
 };
 
 static ippiGeneralFunc ippiRGB2HSVTab[] =
 {
-    (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0,
+    /*(ippiGeneralFunc)ippiRGBToHSV_8u_C3R*/ 0, 0, /*(ippiGeneralFunc)ippiRGBToHSV_16u_C3R*/ 0, 0,
     0, 0, 0, 0
 };
 
@@ -377,7 +379,7 @@ struct IPPGeneralFunctor
     IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){}
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
-        return func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
+        return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false;
     }
 private:
     ippiGeneralFunc func;
@@ -394,7 +396,7 @@ struct IPPReorderFunctor
     }
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
-        return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0;
+        return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false;
     }
 private:
     ippiReorderFunc func;
@@ -403,7 +405,8 @@ private:
 
 struct IPPColor2GrayFunctor
 {
-    IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func)
+    IPPColor2GrayFunctor(ippiColor2GrayFunc _func) :
+        func(_func)
     {
         coeffs[0] = 0.114f;
         coeffs[1] = 0.587f;
@@ -411,7 +414,7 @@ struct IPPColor2GrayFunctor
     }
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
-        return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0;
+        return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false;
     }
 private:
     ippiColor2GrayFunc func;
@@ -420,9 +423,16 @@ private:
 
 struct IPPGray2BGRFunctor
 {
-    IPPGray2BGRFunctor(ippiGeneralFunc _func) : func(_func){}
+    IPPGray2BGRFunctor(ippiGeneralFunc _func) :
+        func(_func)
+    {
+    }
+
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
+        if (func == 0)
+            return false;
+
         const void* srcarray[3] = { src, src, src };
         return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
     }
@@ -432,9 +442,16 @@ private:
 
 struct IPPGray2BGRAFunctor
 {
-    IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : func1(_func1), func2(_func2), depth(_depth){}
+    IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) :
+        func1(_func1), func2(_func2), depth(_depth)
+    {
+    }
+
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
+        if (func1 == 0 || func2 == 0)
+            return false;
+
         const void* srcarray[3] = { src, src, src };
         Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
         if(func1(srcarray, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0)
@@ -450,7 +467,8 @@ private:
 
 struct IPPReorderGeneralFunctor
 {
-    IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth)
+    IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) :
+        func1(_func1), func2(_func2), depth(_depth)
     {
         order[0] = _order0;
         order[1] = _order1;
@@ -459,6 +477,9 @@ struct IPPReorderGeneralFunctor
     }
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
+        if (func1 == 0 || func2 == 0)
+            return false;
+
         Mat temp;
         temp.create(rows, cols, CV_MAKETYPE(depth, 3));
         if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows), order) < 0)
@@ -474,7 +495,8 @@ private:
 
 struct IPPGeneralReorderFunctor
 {
-    IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth)
+    IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) :
+        func1(_func1), func2(_func2), depth(_depth)
     {
         order[0] = _order0;
         order[1] = _order1;
@@ -483,6 +505,9 @@ struct IPPGeneralReorderFunctor
     }
     bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
     {
+        if (func1 == 0 || func2 == 0)
+            return false;
+
         Mat temp;
         temp.create(rows, cols, CV_MAKETYPE(depth, 3));
         if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0)
@@ -495,6 +520,7 @@ private:
     int order[4];
     int depth;
 };
+
 #endif
 
 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
@@ -3254,7 +3280,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) )
                     return;
             }
-#if (IPP_VERSION_X100 >= 801)
+#if !defined(HAVE_IPP_ICV_ONLY) && (IPP_VERSION_X100 >= 801)
             else if( code == CV_RGBA2BGRA )
             {
                 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) )
@@ -3315,17 +3341,14 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
             CV_Assert( scn == 3 || scn == 4 );
             _dst.create(sz, CV_MAKETYPE(depth, 1));
             dst = _dst.getMat();
-/**/
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-/*
             if( code == CV_BGR2GRAY )
             {
                 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) )
                     return;
             }
-            else
-*/
-            if( code == CV_RGB2GRAY )
+            else if( code == CV_RGB2GRAY )
             {
                 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) )
                     return;
@@ -3341,7 +3364,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                     return;
             }
 #endif
-/**/
+
             bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
 
             if( depth == CV_8U )
diff --git a/modules/imgproc/src/deriv.cpp b/modules/imgproc/src/deriv.cpp
index 0b19f22be4..1b3e2c417b 100644
--- a/modules/imgproc/src/deriv.cpp
+++ b/modules/imgproc/src/deriv.cpp
@@ -233,6 +233,9 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy,
             }
         }
     case CV_32F:
+#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R
+        return false;
+#else
         {
             switch(dst.type())
             {
@@ -277,6 +280,7 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy,
                 return false;
             }
         }
+#endif
     default:
         return false;
     }
@@ -341,6 +345,10 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k
 
         if (src.type() == CV_32F && dst.type() == CV_32F)
         {
+#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R
+            return false;
+#else
+#if 0
             if ((dx == 1) && (dy == 0))
             {
                 if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize))
@@ -374,6 +382,7 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k
                     ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                 return true;
             }
+#endif
 
             if((dx == 2) && (dy == 0))
             {
@@ -409,6 +418,7 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k
                     ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
                 return true;
             }
+#endif
         }
     }
 
diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp
index 8b337f645b..c9a5ed1e75 100644
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -1464,7 +1464,7 @@ private:
     int ippiOperator(const uchar* _src, uchar* _dst, int width, int cn) const
     {
         int _ksize = kernel.rows + kernel.cols - 1;
-        if ((1 != cn && 3 != cn) || width < _ksize*8)
+//        if ((1 != cn && 3 != cn) || width < _ksize*8)
             return 0;
 
         const float* src = (const float*)_src;
diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 0c7aafc7b9..45a66bd83e 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -61,9 +61,9 @@ namespace cv
     typedef IppStatus (CV_STDCALL* ippiResizeGetSrcOffset)(void*, IppiPoint, IppiPoint*);
 #endif
 
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) && 0
     typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
-    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
+    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
     typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
 
     template <int channels, typename Type>
@@ -75,7 +75,7 @@ namespace cv
         return func(values, dataPointer, step, size) >= 0;
     }
 
-    bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
+    static bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
     {
         if( channels == 1 )
         {
@@ -1912,7 +1912,7 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec
     getBufferSizeFunc = (ippiResizeGetBufferSize)ippiResizeGetBufferSize_##TYPE;\
     getSrcOffsetFunc =  (ippiResizeGetSrcOffset)ippiResizeGetSrcOffset_##TYPE;
 
-#if IPP_VERSION_X100 >= 701
+#if !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 701
 class IPPresizeInvoker :
     public ParallelLoopBody
 {
@@ -2384,7 +2384,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
     double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
     int k, sx, sy, dx, dy;
 
-#if IPP_VERSION_X100 >= 701
+#if !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 701
 #define IPP_RESIZE_EPS    1.e-10
 
     double ex = fabs((double)dsize.width/src.cols  - inv_scale_x)/inv_scale_x;
@@ -3892,11 +3892,11 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
 namespace cv
 {
 
-class warpAffineInvoker :
+class WarpAffineInvoker :
     public ParallelLoopBody
 {
 public:
-    warpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
+    WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
                       const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) :
         ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
         borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
@@ -4013,16 +4013,20 @@ private:
     double *M;
 };
 
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-class IPPwarpAffineInvoker :
+
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+class IPPWarpAffineInvoker :
     public ParallelLoopBody
 {
 public:
-    IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
-      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
-      {
-          *ok = true;
-      }
+    IPPWarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int _borderType,
+                         const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
+        ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
+        borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+    {
+        *ok = true;
+    }
 
     virtual void operator() (const Range& range) const
     {
@@ -4040,21 +4044,26 @@ public:
                 return;
             }
         }
-        if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+
+        // Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+        IppStatus status = func( src.data, srcsize, (int)src.step[0], srcroi, dst.data,
+                                (int)dst.step[0], dstroi, coeffs, mode );
+        if( status < 0)
             *ok = false;
     }
 private:
     Mat &src;
     Mat &dst;
-    double (&coeffs)[2][3];
     int mode;
+    double (&coeffs)[2][3];
     int borderType;
     Scalar borderValue;
     ippiWarpAffineBackFunc func;
     bool *ok;
-    const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
+    const IPPWarpAffineInvoker& operator= (const IPPWarpAffineInvoker&);
 };
 #endif
+    */
 
 #ifdef HAVE_OPENCL
 
@@ -4204,16 +4213,19 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
     int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
     const int AB_BITS = MAX(10, (int)INTER_BITS);
     const int AB_SCALE = 1 << AB_BITS;
-/*
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    int depth = src.depth();
-    int channels = src.channels();
+
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+    int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
     if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
-        ( channels == 1 || channels == 3 || channels == 4 ) &&
-        ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
+       ( cn == 1 || cn == 3 || cn == 4 ) &&
+       ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) &&
+       ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT) )
     {
-        int type = src.type();
-        ippiWarpAffineBackFunc ippFunc =
+        ippiWarpAffineBackFunc ippFunc = 0;
+        if ((flags & WARP_INVERSE_MAP) != 0)
+        {
+            ippFunc =
             type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
             type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
             type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
@@ -4224,31 +4236,43 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
             type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
             type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
             0;
-        int mode =
-            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
-            flags == INTER_NEAREST ? IPPI_INTER_NN :
-            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
-            0;
-        if( mode && ippFunc )
-        {
-            double coeffs[2][3];
-            for( int i = 0; i < 2; i++ )
-            {
-                for( int j = 0; j < 3; j++ )
-                {
-                    coeffs[i][j] = matM.at<double>(i, j);
-                }
-            }
-            bool ok;
-            Range range(0, dst.rows);
-            IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
-            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
-            if( ok )
-                return;
         }
+        else
+        {
+            ippFunc =
+            type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C4R :
+            0;
+        }
+        int mode =
+        interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
+        interpolation == INTER_NEAREST ? IPPI_INTER_NN :
+        interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC :
+        0;
+        CV_Assert(mode && ippFunc);
+
+        double coeffs[2][3];
+        for( int i = 0; i < 2; i++ )
+            for( int j = 0; j < 3; j++ )
+                coeffs[i][j] = matM.at<double>(i, j);
+
+        bool ok;
+        Range range(0, dst.rows);
+        IPPWarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
     }
 #endif
-*/
+     */
+
     for( x = 0; x < dst.cols; x++ )
     {
         adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@@ -4256,7 +4280,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
     }
 
     Range range(0, dst.rows);
-    warpAffineInvoker invoker(src, dst, interpolation, borderType,
+    WarpAffineInvoker invoker(src, dst, interpolation, borderType,
                               borderValue, adelta, bdelta, M);
     parallel_for_(range, invoker, dst.total()/(double)(1<<16));
 }
@@ -4265,12 +4289,12 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
 namespace cv
 {
 
-class warpPerspectiveInvoker :
+class WarpPerspectiveInvoker :
     public ParallelLoopBody
 {
 public:
 
-    warpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation,
+    WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation,
                            int _borderType, const Scalar &_borderValue) :
         ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
         borderType(_borderType), borderValue(_borderValue)
@@ -4356,16 +4380,19 @@ private:
     Scalar borderValue;
 };
 
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-class IPPwarpPerspectiveInvoker :
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+class IPPWarpPerspectiveInvoker :
     public ParallelLoopBody
 {
 public:
-    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
-      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
-      {
-          *ok = true;
-      }
+    IPPWarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation,
+        int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveFunc _func, bool *_ok) :
+        ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
+        borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+    {
+        *ok = true;
+    }
 
     virtual void operator() (const Range& range) const
     {
@@ -4384,22 +4411,25 @@ public:
                 return;
             }
         }
-        if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
+
+        IppStatus status = func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode);
+        if (status != ippStsNoErr)
             *ok = false;
     }
 private:
     Mat &src;
     Mat &dst;
-    double (&coeffs)[3][3];
     int mode;
+    double (&coeffs)[3][3];
     int borderType;
     const Scalar borderValue;
-    ippiWarpPerspectiveBackFunc func;
+    ippiWarpPerspectiveFunc func;
     bool *ok;
-    const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
+
+    const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&);
 };
 #endif
-
+    */
 }
 
 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
@@ -4432,55 +4462,65 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
         return;
 #endif
 
-    if( !(flags & WARP_INVERSE_MAP) )
-         invert(matM, matM);
-/*
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    int depth = src.depth();
-    int channels = src.channels();
-    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
-        ( channels == 1 || channels == 3 || channels == 4 ) &&
-        ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+    int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    if( (depth == CV_8U || depth == CV_16U || depth == CV_32F) &&
+       (cn == 1 || cn == 3 || cn == 4) &&
+       ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) &&
+       (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC))
     {
-        int type = src.type();
-        ippiWarpPerspectiveBackFunc ippFunc =
-            type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
-            type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
-            type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
-            type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
-            type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
-            type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
-            type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
-            type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
-            type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
-            0;
-        int mode =
-            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
-            flags == INTER_NEAREST ? IPPI_INTER_NN :
-            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
-            0;
-        if( mode && ippFunc )
+        ippiWarpPerspectiveFunc ippFunc = 0;
+        if ((flags & WARP_INVERSE_MAP) != 0)
         {
-            double coeffs[3][3];
-            for( int i = 0; i < 3; i++ )
-            {
-                for( int j = 0; j < 3; j++ )
-                {
-                    coeffs[i][j] = matM.at<double>(i, j);
-                }
-            }
-            bool ok;
-            Range range(0, dst.rows);
-            IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
-            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
-            if( ok )
-                return;
+            ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C4R : 0;
         }
+        else
+        {
+            ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C4R : 0;
+        }
+        int mode =
+        interpolation == INTER_NEAREST ? IPPI_INTER_NN :
+        interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
+        interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : 0;
+        CV_Assert(mode && ippFunc);
+
+        double coeffs[3][3];
+        for( int i = 0; i < 3; i++ )
+            for( int j = 0; j < 3; j++ )
+                coeffs[i][j] = matM.at<double>(i, j);
+
+        bool ok;
+        Range range(0, dst.rows);
+        IPPWarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
     }
 #endif
-*/
+    */
+
+    if( !(flags & WARP_INVERSE_MAP) )
+        invert(matM, matM);
+
     Range range(0, dst.rows);
-    warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
+    WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
     parallel_for_(range, invoker, dst.total()/(double)(1<<16));
 }
 
diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp
index 07aa4c5dd3..5b13ffc29e 100644
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@@ -1228,6 +1228,9 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
     }
     else
     {
+#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiFilterMin*/ippiFilterMax*
+        return false;
+#else
         IppiPoint point = {anchor.x, anchor.y};
 
         #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
@@ -1257,6 +1260,7 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
         }
 
         #undef IPP_MORPH_CASE
+#endif
     }
 }
 
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index 84570bd2c2..4318cd1871 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -841,7 +841,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
     CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize))
 
     Mat src = _src.getMat();
-    int sdepth = src.depth(), cn = src.channels();
+    int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
     if( ddepth < 0 )
         ddepth = sdepth;
     _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
@@ -858,6 +858,69 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
         return;
 #endif
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    int ippBorderType = borderType & ~BORDER_ISOLATED;
+    Point ocvAnchor, ippAnchor;
+    ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
+    ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
+    ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
+    ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);
+
+    if (normalize && !src.isSubmatrix() && ddepth == sdepth &&
+        (ippBorderType == BORDER_REPLICATE || ippBorderType == BORDER_CONSTANT) &&
+        ocvAnchor == ippAnchor )
+    {
+        Ipp32s bufSize;
+        IppiSize roiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize.width, ksize.height);
+
+#define IPP_FILTER_BOX_BORDER(ippType, ippDataType, flavor) \
+        do \
+        { \
+            if (ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippDataType, cn, &bufSize) >= 0) \
+            { \
+                Ipp8u * buffer = ippsMalloc_8u(bufSize); \
+                ippType borderValue[4] = { 0, 0, 0, 0 }; \
+                ippBorderType = ippBorderType == BORDER_CONSTANT ? ippBorderConst : ippBorderType == BORDER_REPLICATE ? ippBorderRepl : -1; \
+                CV_Assert(ippBorderType >= 0); \
+                IppStatus status = ippiFilterBoxBorder_##flavor((ippType *)src.data, (int)src.step, (ippType *)dst.data, (int)dst.step, roiSize, maskSize, \
+                                                                (IppiBorderType)ippBorderType, borderValue, buffer); \
+                ippsFree(buffer); \
+                if (status >= 0) \
+                    return; \
+            } \
+        } while ((void)0, 0)
+
+        if (stype == CV_8UC1)
+            IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C1R);
+        else if (stype == CV_8UC3)
+            IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C3R);
+        else if (stype == CV_8UC4)
+            IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C4R);
+
+        else if (stype == CV_16UC1)
+            IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C1R);
+        else if (stype == CV_16UC3)
+            IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C3R);
+        else if (stype == CV_16UC4)
+            IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C4R);
+
+        else if (stype == CV_16SC1)
+            IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C1R);
+        else if (stype == CV_16SC3)
+            IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C3R);
+        else if (stype == CV_16SC4)
+            IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C4R);
+
+        else if (stype == CV_32FC1)
+            IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C1R);
+        else if (stype == CV_32FC3)
+            IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C3R);
+        else if (stype == CV_32FC4)
+            IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C4R);
+    }
+#undef IPP_FILTER_BOX_BORDER
+#endif
+
     Ptr<FilterEngine> f = createBoxFilter( src.type(), dst.type(),
                         ksize, anchor, normalize, borderType );
     f->apply( src, dst );
@@ -1948,13 +2011,46 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize )
         return;
     }
 
-    CV_OCL_RUN(_src0.dims() <= 2 && _dst.isUMat(),
+    CV_OCL_RUN(_dst.isUMat(),
                ocl_medianFilter(_src0,_dst, ksize))
 
     Mat src0 = _src0.getMat();
     _dst.create( src0.size(), src0.type() );
     Mat dst = _dst.getMat();
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 801
+#define IPP_FILTER_MEDIAN_BORDER(ippType, ippDataType, flavor) \
+    do \
+    { \
+        if (ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, \
+            ippDataType, CV_MAT_CN(type), &bufSize) >= 0) \
+        { \
+            Ipp8u * buffer = ippsMalloc_8u(bufSize); \
+            IppStatus status = ippiFilterMedianBorder_##flavor((const ippType *)src0.data, (int)src0.step, \
+                (ippType *)dst.data, (int)dst.step, dstRoiSize, maskSize, \
+                ippBorderRepl, (ippType)0, buffer); \
+            ippsFree(buffer); \
+            if (status >= 0) \
+                return; \
+        } \
+    } \
+    while ((void)0, 0)
+
+    Ipp32s bufSize;
+    IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize);
+
+    int type = src0.type();
+    if (type == CV_8UC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp8u, ipp8u, 8u_C1R);
+    else if (type == CV_16UC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp16u, ipp16u, 16u_C1R);
+    else if (type == CV_16SC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp16s, ipp16s, 16s_C1R);
+    else if (type == CV_32FC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp32f, ipp32f, 32f_C1R);
+#undef IPP_FILTER_MEDIAN_BORDER
+#endif
+
 #ifdef HAVE_TEGRA_OPTIMIZATION
     if (tegra::medianBlur(src0, dst, ksize))
         return;
@@ -2329,13 +2425,14 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
     Mat temp;
     copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
 
-#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) && 0
     if( cn == 1 )
     {
         bool ok;
         IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok );
         parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16));
-        if( ok ) return;
+        if( ok )
+            return;
     }
 #endif
 
diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp
index 7fd0b2372d..17f323a1ad 100644
--- a/modules/imgproc/src/thresh.cpp
+++ b/modules/imgproc/src/thresh.cpp
@@ -53,11 +53,14 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
     uchar tab[256];
     Size roi = _src.size();
     roi.width *= _src.channels();
+    size_t src_step = _src.step;
+    size_t dst_step = _dst.step;
 
     if( _src.isContinuous() && _dst.isContinuous() )
     {
         roi.width *= roi.height;
         roi.height = 1;
+        src_step = dst_step = roi.width;
     }
 
 #ifdef HAVE_TEGRA_OPTIMIZATION
@@ -65,6 +68,25 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
         return;
 #endif
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    IppiSize sz = { roi.width, roi.height };
+    switch( type )
+    {
+    case THRESH_TRUNC:
+        if (0 <= ippiThreshold_GT_8u_C1R(_src.data, (int)src_step, _dst.data, (int)dst_step, sz, thresh))
+            return;
+        break;
+    case THRESH_TOZERO:
+        if (0 <= ippiThreshold_LTVal_8u_C1R(_src.data, (int)src_step, _dst.data, (int)dst_step, sz, thresh+1, 0))
+            return;
+        break;
+    case THRESH_TOZERO_INV:
+        if (0 <= ippiThreshold_GTVal_8u_C1R(_src.data, (int)src_step, _dst.data, (int)dst_step, sz, thresh, 0))
+            return;
+        break;
+    }
+#endif
+
     switch( type )
     {
     case THRESH_BINARY:
@@ -112,8 +134,8 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
 
         for( i = 0; i < roi.height; i++ )
         {
-            const uchar* src = (const uchar*)(_src.data + _src.step*i);
-            uchar* dst = (uchar*)(_dst.data + _dst.step*i);
+            const uchar* src = (const uchar*)(_src.data + src_step*i);
+            uchar* dst = (uchar*)(_dst.data + dst_step*i);
 
             switch( type )
             {
@@ -231,8 +253,8 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
     {
         for( i = 0; i < roi.height; i++ )
         {
-            const uchar* src = (const uchar*)(_src.data + _src.step*i);
-            uchar* dst = (uchar*)(_dst.data + _dst.step*i);
+            const uchar* src = (const uchar*)(_src.data + src_step*i);
+            uchar* dst = (uchar*)(_dst.data + dst_step*i);
             j = j_scalar;
 #if CV_ENABLE_UNROLLED
             for( ; j <= roi.width - 4; j += 4 )
@@ -276,6 +298,7 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
     {
         roi.width *= roi.height;
         roi.height = 1;
+        src_step = dst_step = roi.width;
     }
 
 #ifdef HAVE_TEGRA_OPTIMIZATION
@@ -283,6 +306,25 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
         return;
 #endif
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    IppiSize sz = { roi.width, roi.height };
+    switch( type )
+    {
+    case THRESH_TRUNC:
+        if (0 <= ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh))
+            return;
+        break;
+    case THRESH_TOZERO:
+        if (0 <= ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0))
+            return;
+        break;
+    case THRESH_TOZERO_INV:
+        if (0 <= ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0))
+            return;
+        break;
+    }
+#endif
+
     switch( type )
     {
     case THRESH_BINARY:
@@ -455,6 +497,25 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
         return;
 #endif
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    IppiSize sz = { roi.width, roi.height };
+    switch( type )
+    {
+    case THRESH_TRUNC:
+        if (0 <= ippiThreshold_GT_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh))
+            return;
+        break;
+    case THRESH_TOZERO:
+        if (0 <= ippiThreshold_LTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+FLT_EPSILON, 0))
+            return;
+        break;
+    case THRESH_TOZERO_INV:
+        if (0 <= ippiThreshold_GTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0))
+            return;
+        break;
+    }
+#endif
+
     switch( type )
     {
         case THRESH_BINARY:
diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp
index fcf270f8e7..f0cf560bb1 100644
--- a/modules/imgproc/test/ocl/test_color.cpp
+++ b/modules/imgproc/test/ocl/test_color.cpp
@@ -155,15 +155,23 @@ OCL_TEST_P(CvtColor, YCrCb2BGRA) { performTest(3, 4, CVTCODE(YCrCb2BGR)); }
 
 // RGB <-> XYZ
 
-OCL_TEST_P(CvtColor, RGB2XYZ) { performTest(3, 3, CVTCODE(RGB2XYZ)); }
-OCL_TEST_P(CvtColor, BGR2XYZ) { performTest(3, 3, CVTCODE(BGR2XYZ)); }
-OCL_TEST_P(CvtColor, RGBA2XYZ) { performTest(4, 3, CVTCODE(RGB2XYZ)); }
-OCL_TEST_P(CvtColor, BGRA2XYZ) { performTest(4, 3, CVTCODE(BGR2XYZ)); }
+#if IPP_VERSION_X100 > 0
+#define IPP_EPS depth <= CV_32S ? 1 : 4e-5
+#else
+#define IPP_EPS 0
+#endif
 
-OCL_TEST_P(CvtColor, XYZ2RGB) { performTest(3, 3, CVTCODE(XYZ2RGB)); }
-OCL_TEST_P(CvtColor, XYZ2BGR) { performTest(3, 3, CVTCODE(XYZ2BGR)); }
-OCL_TEST_P(CvtColor, XYZ2RGBA) { performTest(3, 4, CVTCODE(XYZ2RGB)); }
-OCL_TEST_P(CvtColor, XYZ2BGRA) { performTest(3, 4, CVTCODE(XYZ2BGR)); }
+OCL_TEST_P(CvtColor, RGB2XYZ) { performTest(3, 3, CVTCODE(RGB2XYZ), IPP_EPS); }
+OCL_TEST_P(CvtColor, BGR2XYZ) { performTest(3, 3, CVTCODE(BGR2XYZ), IPP_EPS); }
+OCL_TEST_P(CvtColor, RGBA2XYZ) { performTest(4, 3, CVTCODE(RGB2XYZ), IPP_EPS); }
+OCL_TEST_P(CvtColor, BGRA2XYZ) { performTest(4, 3, CVTCODE(BGR2XYZ), IPP_EPS); }
+
+OCL_TEST_P(CvtColor, XYZ2RGB) { performTest(3, 3, CVTCODE(XYZ2RGB), IPP_EPS); }
+OCL_TEST_P(CvtColor, XYZ2BGR) { performTest(3, 3, CVTCODE(XYZ2BGR), IPP_EPS); }
+OCL_TEST_P(CvtColor, XYZ2RGBA) { performTest(3, 4, CVTCODE(XYZ2RGB), IPP_EPS); }
+OCL_TEST_P(CvtColor, XYZ2BGRA) { performTest(3, 4, CVTCODE(XYZ2BGR), IPP_EPS); }
+
+#undef IPP_EPS
 
 // RGB <-> HSV
 
@@ -191,15 +199,21 @@ OCL_TEST_P(CvtColor8u32f, HSV2BGRA_FULL) { performTest(3, 4, CVTCODE(HSV2BGR_FUL
 
 // RGB <-> HLS
 
+#if IPP_VERSION_X100 > 0
+#define IPP_EPS depth == CV_8U ? 2 : 1e-3
+#else
+#define IPP_EPS depth == CV_8U ? 1 : 1e-3
+#endif
+
 OCL_TEST_P(CvtColor8u32f, RGB2HLS) { performTest(3, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
 OCL_TEST_P(CvtColor8u32f, BGR2HLS) { performTest(3, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
 OCL_TEST_P(CvtColor8u32f, RGBA2HLS) { performTest(4, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
 OCL_TEST_P(CvtColor8u32f, BGRA2HLS) { performTest(4, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
 
-OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { performTest(3, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { performTest(3, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { performTest(4, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
-OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { performTest(4, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
+OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { performTest(3, 3, CVTCODE(RGB2HLS_FULL), IPP_EPS); }
+OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { performTest(3, 3, CVTCODE(BGR2HLS_FULL), IPP_EPS); }
+OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { performTest(4, 3, CVTCODE(RGB2HLS_FULL), IPP_EPS); }
+OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { performTest(4, 3, CVTCODE(BGR2HLS_FULL), IPP_EPS); }
 
 OCL_TEST_P(CvtColor8u32f, HLS2RGB) { performTest(3, 3, CVTCODE(HLS2RGB), 1); }
 OCL_TEST_P(CvtColor8u32f, HLS2BGR) { performTest(3, 3, CVTCODE(HLS2BGR), 1); }
@@ -211,6 +225,8 @@ OCL_TEST_P(CvtColor8u32f, HLS2BGR_FULL) { performTest(3, 3, CVTCODE(HLS2BGR_FULL
 OCL_TEST_P(CvtColor8u32f, HLS2RGBA_FULL) { performTest(3, 4, CVTCODE(HLS2RGB_FULL), 1); }
 OCL_TEST_P(CvtColor8u32f, HLS2BGRA_FULL) { performTest(3, 4, CVTCODE(HLS2BGR_FULL), 1); }
 
+#undef IPP_EPS
+
 // RGB5x5 <-> RGB
 
 typedef CvtColor CvtColor8u;
diff --git a/modules/imgproc/test/test_precomp.hpp b/modules/imgproc/test/test_precomp.hpp
index 9650b7fcc8..53f315ee4f 100644
--- a/modules/imgproc/test/test_precomp.hpp
+++ b/modules/imgproc/test/test_precomp.hpp
@@ -11,6 +11,7 @@
 
 #include <iostream>
 #include "opencv2/ts.hpp"
+#include "opencv2/core/private.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/highgui.hpp"
 
diff --git a/modules/legacy/test/test_stereomatching.cpp b/modules/legacy/test/test_stereomatching.cpp
index 7262249844..95aa97bbe4 100644
--- a/modules/legacy/test/test_stereomatching.cpp
+++ b/modules/legacy/test/test_stereomatching.cpp
@@ -278,7 +278,7 @@ float dispRMS( const Mat& computedDisp, const Mat& groundTruthDisp, const Mat& m
         checkTypeAndSizeOfMask( mask, sz );
         pointsCount = countNonZero(mask);
     }
-    return 1.f/sqrt((float)pointsCount) * (float)norm(computedDisp, groundTruthDisp, NORM_L2, mask);
+    return 1.f/sqrt((float)pointsCount) * (float)cvtest::norm(computedDisp, groundTruthDisp, NORM_L2, mask);
 }
 
 /*
diff --git a/modules/optim/test/test_denoise_tvl1.cpp b/modules/optim/test/test_denoise_tvl1.cpp
index 9334dc5c5b..76ec2cda3e 100644
--- a/modules/optim/test/test_denoise_tvl1.cpp
+++ b/modules/optim/test/test_denoise_tvl1.cpp
@@ -41,7 +41,8 @@
 #include "test_precomp.hpp"
 #include "opencv2/highgui.hpp"
 
-void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_salt_ratio,cv::RNG& rng){
+void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_salt_ratio,cv::RNG& rng)
+{
     noisy.create(img.size(), img.type());
     cv::Mat noise(img.size(), img.type()), mask(img.size(), CV_8U);
     rng.fill(noise,cv::RNG::NORMAL,128.0,sigma);
@@ -54,34 +55,36 @@ void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_
     noise.setTo(128, mask);
     cv::addWeighted(noisy, 1, noise, 1, -128, noisy);
 }
-void make_spotty(cv::Mat& img,cv::RNG& rng, int r=3,int n=1000){
-    for(int i=0;i<n;i++){
+
+void make_spotty(cv::Mat& img,cv::RNG& rng, int r=3,int n=1000)
+{
+    for(int i=0;i<n;i++)
+    {
         int x=rng(img.cols-r),y=rng(img.rows-r);
-        if(rng(2)==0){
+        if(rng(2)==0)
             img(cv::Range(y,y+r),cv::Range(x,x+r))=(uchar)0;
-        }else{
+        else
             img(cv::Range(y,y+r),cv::Range(x,x+r))=(uchar)255;
-        }
     }
 }
 
-bool validate_pixel(const cv::Mat& image,int x,int y,uchar val){
+bool validate_pixel(const cv::Mat& image,int x,int y,uchar val)
+{
     printf("test: image(%d,%d)=%d vs %d - %s\n",x,y,(int)image.at<uchar>(x,y),val,(val==image.at<uchar>(x,y))?"true":"false");
     return (image.at<uchar>(x,y)==val);
 }
 
-TEST(Optim_denoise_tvl1, regression_basic){
+TEST(Optim_denoise_tvl1, regression_basic)
+{
     cv::RNG rng(42);
-    cv::Mat img = cv::imread("lena.jpg", 0), noisy,res;
-    if(img.rows!=512 || img.cols!=512){
-        printf("\tplease, put lena.jpg from samples/c in the current folder\n");
-        printf("\tnow, the test will fail...\n");
-        ASSERT_TRUE(false);
-    }
+    cv::Mat img = cv::imread(cvtest::TS::ptr()->get_data_path() + "shared/lena.png", 0), noisy, res;
+
+    ASSERT_FALSE(img.empty()) << "Error: can't open 'lena.png'";
 
     const int obs_num=5;
-    std::vector<cv::Mat> images(obs_num,cv::Mat());
-    for(int i=0;i<(int)images.size();i++){
+    std::vector<cv::Mat> images(obs_num, cv::Mat());
+    for(int i=0;i<(int)images.size();i++)
+    {
         make_noisy(img,images[i], 20, 0.02,rng);
         //make_spotty(images[i],rng);
     }
diff --git a/modules/photo/test/test_denoising.cpp b/modules/photo/test/test_denoising.cpp
index ca4f63f222..9808e9cddc 100644
--- a/modules/photo/test/test_denoising.cpp
+++ b/modules/photo/test/test_denoising.cpp
@@ -73,7 +73,7 @@ TEST(Photo_DenoisingGrayscale, regression)
 
     DUMP(result, expected_path + ".res.png");
 
-    ASSERT_EQ(0, norm(result != expected));
+    ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2));
 }
 
 TEST(Photo_DenoisingColored, regression)
@@ -93,7 +93,7 @@ TEST(Photo_DenoisingColored, regression)
 
     DUMP(result, expected_path + ".res.png");
 
-    ASSERT_EQ(0, norm(result != expected));
+    ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2));
 }
 
 TEST(Photo_DenoisingGrayscaleMulti, regression)
@@ -118,7 +118,7 @@ TEST(Photo_DenoisingGrayscaleMulti, regression)
 
     DUMP(result, expected_path + ".res.png");
 
-    ASSERT_EQ(0, norm(result != expected));
+    ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2));
 }
 
 TEST(Photo_DenoisingColoredMulti, regression)
@@ -143,7 +143,7 @@ TEST(Photo_DenoisingColoredMulti, regression)
 
     DUMP(result, expected_path + ".res.png");
 
-    ASSERT_EQ(0, norm(result != expected));
+    ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2));
 }
 
 TEST(Photo_White, issue_2646)
diff --git a/modules/photo/test/test_inpaint.cpp b/modules/photo/test/test_inpaint.cpp
index 3c341b27a0..8f031e8d38 100644
--- a/modules/photo/test/test_inpaint.cpp
+++ b/modules/photo/test/test_inpaint.cpp
@@ -91,8 +91,8 @@ void CV_InpaintTest::run( int )
     absdiff( orig, res1, diff1 );
     absdiff( orig, res2, diff2 );
 
-    double n1 = norm(diff1.reshape(1), NORM_INF, inv_mask.reshape(1));
-    double n2 = norm(diff2.reshape(1), NORM_INF, inv_mask.reshape(1));
+    double n1 = cvtest::norm(diff1.reshape(1), NORM_INF, inv_mask.reshape(1));
+    double n2 = cvtest::norm(diff2.reshape(1), NORM_INF, inv_mask.reshape(1));
 
     if (n1 != 0 || n2 != 0)
     {
@@ -103,8 +103,8 @@ void CV_InpaintTest::run( int )
     absdiff( exp1, res1, diff1 );
     absdiff( exp2, res2, diff2 );
 
-    n1 = norm(diff1.reshape(1), NORM_INF, mask.reshape(1));
-    n2 = norm(diff2.reshape(1), NORM_INF, mask.reshape(1));
+    n1 = cvtest::norm(diff1.reshape(1), NORM_INF, mask.reshape(1));
+    n2 = cvtest::norm(diff2.reshape(1), NORM_INF, mask.reshape(1));
 
     const int jpeg_thres = 3;
     if (n1 > jpeg_thres || n2 > jpeg_thres)
diff --git a/modules/stitching/test/test_blenders.cpp b/modules/stitching/test/test_blenders.cpp
index 6702eabf0c..cb84482f21 100644
--- a/modules/stitching/test/test_blenders.cpp
+++ b/modules/stitching/test/test_blenders.cpp
@@ -73,6 +73,6 @@ TEST(MultiBandBlender, CanBlendTwoImages)
     Mat result; result_s.convertTo(result, CV_8U);
 
     Mat expected = imread(string(cvtest::TS::ptr()->get_data_path()) + "stitching/baboon_lena.png");
-    double rmsErr = norm(expected, result, NORM_L2) / sqrt(double(expected.size().area()));
+    double rmsErr = cvtest::norm(expected, result, NORM_L2) / sqrt(double(expected.size().area()));
     ASSERT_LT(rmsErr, 1e-3);
 }
diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp
index 457f00b3e2..8aeec65712 100644
--- a/modules/ts/include/opencv2/ts.hpp
+++ b/modules/ts/include/opencv2/ts.hpp
@@ -129,6 +129,7 @@ CV_EXPORTS void minMaxLoc(const Mat& src, double* minval, double* maxval,
 CV_EXPORTS double norm(InputArray src, int normType, InputArray mask=noArray());
 CV_EXPORTS double norm(InputArray src1, InputArray src2, int normType, InputArray mask=noArray());
 CV_EXPORTS Scalar mean(const Mat& src, const Mat& mask=Mat());
+CV_EXPORTS double PSNR(InputArray src1, InputArray src2);
 
 CV_EXPORTS bool cmpUlps(const Mat& data, const Mat& refdata, int expMaxDiff, double* realMaxDiff, vector<int>* idx);
 
diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp
index e3563caa4f..2042f5cf0c 100644
--- a/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@@ -1399,6 +1399,12 @@ double norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask)
     return isRelative ? result / (cvtest::norm(src2, normType) + DBL_EPSILON) : result;
 }
 
+double PSNR(InputArray _src1, InputArray _src2)
+{
+    CV_Assert( _src1.depth() == CV_8U );
+    double diff = std::sqrt(cvtest::norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
+    return 20*log10(255./(diff+DBL_EPSILON));
+}
 
 template<typename _Tp> static double
 crossCorr_(const _Tp* src1, const _Tp* src2, size_t total)
diff --git a/modules/video/src/motempl.cpp b/modules/video/src/motempl.cpp
index 3fc87e657e..152706b9fe 100644
--- a/modules/video/src/motempl.cpp
+++ b/modules/video/src/motempl.cpp
@@ -80,13 +80,27 @@ void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi,
 
     Mat silh = _silhouette.getMat(), mhi = _mhi.getMat();
     Size size = silh.size();
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    int silhstep = (int)silh.step, mhistep = (int)mhi.step;
+#endif
 
     if( silh.isContinuous() && mhi.isContinuous() )
     {
         size.width *= size.height;
         size.height = 1;
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+        silhstep = (int)silh.total();
+        mhistep = (int)mhi.total() * sizeof(Ipp32f);
+#endif
     }
 
+#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY)
+    IppStatus status = ippiUpdateMotionHistory_8u32f_C1IR((const Ipp8u *)silh.data, silhstep, (Ipp32f *)mhi.data, mhistep,
+                                                          ippiSize(size.width, size.height), (Ipp32f)timestamp, (Ipp32f)duration);
+    if (status >= 0)
+        return;
+#endif
+
 #if CV_SSE2
     volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2);
 #endif
diff --git a/modules/video/test/test_estimaterigid.cpp b/modules/video/test/test_estimaterigid.cpp
index 5259ce7ee2..50508b4abb 100644
--- a/modules/video/test/test_estimaterigid.cpp
+++ b/modules/video/test/test_estimaterigid.cpp
@@ -109,8 +109,8 @@ bool CV_RigidTransform_Test::testNPoints(int from)
 
         Mat aff_est = estimateRigidTransform(fpts, tpts, true);
 
-        double thres = 0.1*norm(aff);
-        double d = norm(aff_est, aff, NORM_L2);
+        double thres = 0.1*cvtest::norm(aff, NORM_L2);
+        double d = cvtest::norm(aff_est, aff, NORM_L2);
         if (d > thres)
         {
             double dB=0, nB=0;
@@ -120,7 +120,7 @@ bool CV_RigidTransform_Test::testNPoints(int from)
                 Mat B = A - repeat(A.row(0), 3, 1), Bt = B.t();
                 B = Bt*B;
                 dB = cv::determinant(B);
-                nB = norm(B);
+                nB = cvtest::norm(B, NORM_L2);
                 if( fabs(dB) < 0.01*nB )
                     continue;
             }
@@ -154,11 +154,11 @@ bool CV_RigidTransform_Test::testImage()
     Mat aff_est = estimateRigidTransform(img, rotated, true);
 
     const double thres = 0.033;
-    if (norm(aff_est, aff, NORM_INF) > thres)
+    if (cvtest::norm(aff_est, aff, NORM_INF) > thres)
     {
         ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
         ts->printf( cvtest::TS::LOG, "Threshold = %f, norm of difference = %f", thres,
-            norm(aff_est, aff, NORM_INF) );
+            cvtest::norm(aff_est, aff, NORM_INF) );
         return false;
     }