diff --git a/CMakeLists.txt b/CMakeLists.txt index eb173592a6..c1e4e7c1af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,8 +127,7 @@ OCV_OPTION(WITH_FFMPEG "Include FFMPEG support" ON OCV_OPTION(WITH_GSTREAMER "Include Gstreamer support" ON IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_GSTREAMER_0_10 "Enable Gstreamer 0.10 support (instead of 1.x)" OFF ) OCV_OPTION(WITH_GTK "Include GTK support" ON IF (UNIX AND NOT APPLE AND NOT ANDROID) ) -OCV_OPTION(WITH_ICV "Include Intel IPP ICV support" ON IF (NOT IOS) ) -OCV_OPTION(WITH_IPP "Include Intel IPP support" OFF IF (NOT IOS) ) +OCV_OPTION(WITH_IPP "Include Intel IPP support" ON IF (NOT IOS) ) OCV_OPTION(WITH_JASPER "Include JPEG2K support" ON IF (NOT IOS) ) OCV_OPTION(WITH_JPEG "Include JPEG support" ON) OCV_OPTION(WITH_WEBP "Include WebP support" ON IF (NOT IOS) ) @@ -158,7 +157,7 @@ OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 ) OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF WIN32 ) - +OCV_OPTION(WITH_IPP_A "Include Intel IPP_A support" OFF IF (MSVC OR X86 OR X86_64) ) # OpenCV build components # =================================================== @@ -917,13 +916,17 @@ endif(DEFINED WITH_INTELPERC) status("") status(" Other third-party libraries:") -if(WITH_IPP AND IPP_FOUND) - status(" Use IPP:" "${IPP_LATEST_VERSION_STR} [${IPP_LATEST_VERSION_MAJOR}.${IPP_LATEST_VERSION_MINOR}.${IPP_LATEST_VERSION_BUILD}]") +if(WITH_IPP AND HAVE_IPP) + status(" Use IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]") status(" at:" "${IPP_ROOT_DIR}") else() - status(" Use IPP:" WITH_IPP AND NOT IPP_FOUND THEN "IPP not found" ELSE NO) + status(" Use IPP:" WITH_IPP AND NOT HAVE_IPP THEN "IPP not found" ELSE NO) endif() +if(DEFINED WITH_IPP_A) +status(" Use IPP Async:" HAVE_IPP_A THEN "YES" ELSE NO) +endif(DEFINED WITH_IPP_A) + status(" Use Eigen:" HAVE_EIGEN THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO) status(" Use TBB:" HAVE_TBB THEN "YES (ver ${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR} interface ${TBB_INTERFACE_VERSION})" ELSE NO) status(" Use OpenMP:" HAVE_OPENMP THEN YES ELSE NO) diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake index 780ee51b87..9cb6ed0183 100644 --- a/cmake/OpenCVFindIPP.cmake +++ b/cmake/OpenCVFindIPP.cmake @@ -2,15 +2,12 @@ # The script to detect Intel(R) Integrated Performance Primitives (IPP) # installation/package # -# Windows host: -# Run script like this before cmake: -# call "\bin\ippvars.bat" intel64 -# for example: -# call "C:\Program Files (x86)\Intel\Composer XE\ipp\bin\ippvars.bat" intel64 +# By default, ICV version will be used. +# To use standalone IPP update cmake command line: +# cmake ... -DIPPROOT= ... +# +# Note: Backward compatibility is broken, IPPROOT environment path is ignored # -# Linux host: -# Run script like this before cmake: -# source /opt/intel/ipp/bin/ippvars.sh [ia32|intel64] # # On return this will define: # @@ -39,14 +36,6 @@ unset(IPP_VERSION_BUILD) set(IPP_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) set(IPP_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) -set(IPP_PREFIX "ipp") -set(IPP_SUFFIX "_l") -set(IPPCORE "core") # core functionality -set(IPPS "s") # signal processing -set(IPPI "i") # image processing -set(IPPCC "cc") # color conversion -set(IPPCV "cv") # computer vision -set(IPPVM "vm") # vector math set(IPP_X64 0) if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8) @@ -56,21 +45,21 @@ if(CMAKE_CL_64) set(IPP_X64 1) endif() -# This function detects IPP version by analyzing ippversion.h file -macro(ipp_get_version _ROOT_DIR) +# This function detects IPP version by analyzing .h file +macro(ipp_get_version VERSION_FILE) unset(_VERSION_STR) unset(_MAJOR) unset(_MINOR) unset(_BUILD) # read IPP version info from file - file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR1 REGEX "IPP_VERSION_MAJOR") - file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR2 REGEX "IPP_VERSION_MINOR") - file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_BUILD") + file(STRINGS ${VERSION_FILE} STR1 REGEX "IPP_VERSION_MAJOR") + file(STRINGS ${VERSION_FILE} STR2 REGEX "IPP_VERSION_MINOR") + file(STRINGS ${VERSION_FILE} STR3 REGEX "IPP_VERSION_BUILD") if("${STR3}" STREQUAL "") - file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR3 REGEX "IPP_VERSION_UPDATE") + file(STRINGS ${VERSION_FILE} STR3 REGEX "IPP_VERSION_UPDATE") endif() - file(STRINGS ${_ROOT_DIR}/include/ippversion.h STR4 REGEX "IPP_VERSION_STR") + file(STRINGS ${VERSION_FILE} STR4 REGEX "IPP_VERSION_STR") # extract info and assign to variables string(REGEX MATCHALL "[0-9]+" _MAJOR ${STR1}) @@ -83,66 +72,92 @@ macro(ipp_get_version _ROOT_DIR) set(IPP_VERSION_MAJOR ${_MAJOR}) set(IPP_VERSION_MINOR ${_MINOR}) set(IPP_VERSION_BUILD ${_BUILD}) - - set(__msg) - if(EXISTS ${_ROOT_DIR}/include/ippicv.h) - ocv_assert(WITH_ICV AND NOT WITH_IPP) - set(__msg " ICV version") - set(HAVE_IPP_ICV_ONLY 1) - endif() - - message(STATUS "found IPP: ${_MAJOR}.${_MINOR}.${_BUILD} [${_VERSION_STR}]${__msg}") - message(STATUS "at: ${_ROOT_DIR}") endmacro() +macro(_ipp_not_supported) + message(STATUS ${ARGN}) + unset(HAVE_IPP) + unset(HAVE_IPP_ICV_ONLY) + unset(IPP_VERSION_STR) + return() +endmacro() -# This function sets IPP_INCLUDE_DIRS and IPP_LIBRARIES variables -macro(ipp_set_variables _LATEST_VERSION) - if(${_LATEST_VERSION} VERSION_LESS "7.0") - message(SEND_ERROR "IPP ${_LATEST_VERSION} is not supported") - unset(HAVE_IPP) - return() +# This macro uses IPP_ROOT_DIR variable +# TODO Cleanup code after ICV package stabilization +macro(ipp_detect_version) + set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include) + + set(__msg) + if(EXISTS ${IPP_ROOT_DIR}/ippicv.h) + set(__msg " (ICV version)") + set(HAVE_IPP_ICV_ONLY 1) + if(EXISTS ${IPP_ROOT_DIR}/ippversion.h) + _ipp_not_supported("Can't resolve IPP directory: ${IPP_ROOT_DIR}") + else() + ipp_get_version(${IPP_ROOT_DIR}/ippicv.h) + endif() + ocv_assert(IPP_VERSION_STR VERSION_GREATER "8.0") + set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/) + elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h) + ipp_get_version(${IPP_ROOT_DIR}/include/ippversion.h) + ocv_assert(IPP_VERSION_STR VERSION_GREATER "1.0") + else() + _ipp_not_supported("Can't resolve IPP directory: ${IPP_ROOT_DIR}") endif() - # set INCLUDE and LIB folders - set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include) + message(STATUS "found IPP${__msg}: ${_MAJOR}.${_MINOR}.${_BUILD} [${IPP_VERSION_STR}]") + message(STATUS "at: ${IPP_ROOT_DIR}") + + if(${IPP_VERSION_STR} VERSION_LESS "7.0") + _ipp_not_supported("IPP ${IPP_VERSION_STR} is not supported") + endif() + + set(HAVE_IPP 1) + if(EXISTS ${IPP_INCLUDE_DIRS}/ipp_redefine.h) + set(HAVE_IPP_REDEFINE 1) + else() + unset(HAVE_IPP_REDEFINE) + endif() + + macro(_ipp_set_library_dir DIR) + if(NOT EXISTS ${DIR}) + _ipp_not_supported("IPP library directory not found") + endif() + set(IPP_LIBRARY_DIR ${DIR}) + endmacro() if(NOT HAVE_IPP_ICV_ONLY) if(APPLE) - set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib) + _ipp_set_library_dir(${IPP_ROOT_DIR}/lib) elseif(IPP_X64) - if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64) - message(SEND_ERROR "IPP EM64T libraries not found") - endif() - set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib/intel64) + _ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64) else() - if(NOT EXISTS ${IPP_ROOT_DIR}/lib/ia32) - message(SEND_ERROR "IPP IA32 libraries not found") - endif() - set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib/ia32) + _ipp_set_library_dir(${IPP_ROOT_DIR}/lib/ia32) endif() else() - if(APPLE) - set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/macosx) - elseif(WIN32 AND NOT ARM) - set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/windows) - elseif(UNIX) - set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/libs/linux) + if(EXISTS ${IPP_ROOT_DIR}/lib) + set(IPP_LIBRARY_DIR ${IPP_ROOT_DIR}/lib) else() - message(MESSAGE "IPP ${_LATEST_VERSION} at ${IPP_ROOT_DIR} is not supported") - unset(HAVE_IPP) - return() + _ipp_not_supported("IPP ${IPP_VERSION_STR} at ${IPP_ROOT_DIR} is not supported") endif() if(X86_64) - set(IPP_LIBRARY_DIR ${IPP_LIBRARY_DIR}/intel64) + _ipp_set_library_dir(${IPP_LIBRARY_DIR}/intel64) else() - set(IPP_LIBRARY_DIR ${IPP_LIBRARY_DIR}/ia32) + _ipp_set_library_dir(${IPP_LIBRARY_DIR}/ia32) endif() endif() + macro(_ipp_add_library name) + if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) + list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) + else() + message(STATUS "Can't find IPP library: ${name}") + endif() + endmacro() + set(IPP_PREFIX "ipp") - if(${_LATEST_VERSION} VERSION_LESS "8.0") - set(IPP_SUFFIX "_l") # static not threaded libs suffix IPP 7.x + if(${IPP_VERSION_STR} VERSION_LESS "8.0") + set(IPP_SUFFIX "_l") # static not threaded libs suffix IPP 7.x else() if(WIN32) set(IPP_SUFFIX "mt") # static not threaded libs suffix IPP 8.x for Windows @@ -150,78 +165,92 @@ macro(ipp_set_variables _LATEST_VERSION) set(IPP_SUFFIX "") # static not threaded libs suffix IPP 8.x for Linux/OS X endif() endif() - set(IPPCORE "core") # core functionality - set(IPPSP "s") # signal processing - set(IPPIP "i") # image processing - set(IPPCC "cc") # color conversion - set(IPPCV "cv") # computer vision - set(IPPVM "vm") # vector math - list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) - list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) - list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCV}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) - list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPI}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) - list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPS}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) - list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCORE}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) + if(HAVE_IPP_ICV_ONLY) + _ipp_add_library(icv) + else() + _ipp_add_library(core) + _ipp_add_library(s) + _ipp_add_library(i) + _ipp_add_library(cc) + _ipp_add_library(cv) + _ipp_add_library(vm) + _ipp_add_library(m) -# FIXIT -# if(UNIX AND NOT HAVE_IPP_ICV_ONLY) -# get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../lib REALPATH) - if(UNIX) - if(NOT HAVE_IPP_ICV_ONLY) + if(UNIX) get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../lib REALPATH) - else() - set(INTEL_COMPILER_LIBRARY_DIR "/opt/intel/lib") - endif() - if(IPP_X64) - if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64) - message(SEND_ERROR "Intel compiler EM64T libraries not found") + if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}) + get_filename_component(INTEL_COMPILER_LIBRARY_DIR ${IPP_ROOT_DIR}/../compiler/lib REALPATH) endif() - set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64) - else() - if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32) - message(SEND_ERROR "Intel compiler IA32 libraries not found") + if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}) + _ipp_not_supported("IPP configuration error: can't find Intel compiler library dir ${INTEL_COMPILER_LIBRARY_DIR}") endif() - set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32) - endif() - list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}irc${CMAKE_SHARED_LIBRARY_SUFFIX}) - list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}imf${CMAKE_SHARED_LIBRARY_SUFFIX}) - list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}svml${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(NOT APPLE) + if(IPP_X64) + if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64) + message(SEND_ERROR "Intel compiler EM64T libraries not found") + endif() + set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64) + else() + if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32) + message(SEND_ERROR "Intel compiler IA32 libraries not found") + endif() + set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32) + endif() + endif() + + macro(_ipp_add_compiler_library name) + if (EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}${name}${CMAKE_SHARED_LIBRARY_SUFFIX}) + list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}${name}${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + message(STATUS "Can't find compiler library: ${name}") + endif() + endmacro() + + _ipp_add_compiler_library(irc) + _ipp_add_compiler_library(imf) + _ipp_add_compiler_library(svml) + endif(UNIX) endif() #message(STATUS "IPP libs: ${IPP_LIBRARIES}") endmacro() -if(WITH_IPP) - set(IPPPATH $ENV{IPPROOT}) - if(UNIX) - list(APPEND IPPPATH /opt/intel/ipp) - endif() -elseif(WITH_ICV) - if(DEFINED ENV{IPPICVROOT}) - set(IPPPATH $ENV{IPPICVROOT}) - else() - set(IPPPATH ${OpenCV_SOURCE_DIR}/3rdparty/ippicv) +# OPENCV_IPP_PATH is an environment variable for internal usage only, do not use it +if(DEFINED ENV{OPENCV_IPP_PATH} AND NOT DEFINED IPPROOT) + set(IPPROOT "$ENV{OPENCV_IPP_PATH}") +endif() +if(NOT DEFINED IPPROOT) + set(IPPROOT "${OpenCV_SOURCE_DIR}/3rdparty/ippicv") +endif() + +# Try ICV +find_path( + IPP_ICV_H_PATH + NAMES ippicv.h + PATHS ${IPPROOT} + DOC "The path to Intel(R) IPP ICV header files" + NO_DEFAULT_PATH + NO_CMAKE_PATH) +set(IPP_ROOT_DIR ${IPP_ICV_H_PATH}) + +if(NOT IPP_ICV_H_PATH) + # Try standalone IPP + find_path( + IPP_H_PATH + NAMES ippversion.h + PATHS ${IPPROOT} + PATH_SUFFIXES include + DOC "The path to Intel(R) IPP header files" + NO_DEFAULT_PATH + NO_CMAKE_PATH) + if(IPP_H_PATH) + get_filename_component(IPP_ROOT_DIR ${IPP_H_PATH} PATH) endif() endif() - -find_path( - IPP_H_PATH - NAMES ippversion.h - PATHS ${IPPPATH} - PATH_SUFFIXES include - DOC "The path to Intel(R) IPP header files" - NO_DEFAULT_PATH - NO_CMAKE_PATH) - -if(IPP_H_PATH) - set(HAVE_IPP 1) - - get_filename_component(IPP_ROOT_DIR ${IPP_H_PATH} PATH) - - ipp_get_version(${IPP_ROOT_DIR}) - ipp_set_variables(${IPP_VERSION_STR}) +if(IPP_ROOT_DIR) + ipp_detect_version() endif() diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake index 7198326351..a046b8fc34 100644 --- a/cmake/OpenCVFindLibsPerf.cmake +++ b/cmake/OpenCVFindLibsPerf.cmake @@ -8,7 +8,7 @@ if(WITH_TBB) endif(WITH_TBB) # --- IPP --- -if(WITH_IPP OR WITH_ICV) +if(WITH_IPP) include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIPP.cmake") if(HAVE_IPP) ocv_include_directories(${IPP_INCLUDE_DIRS}) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 45351d1a86..593ee9fd55 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -211,8 +211,8 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un #ifdef HAVE_IPP # ifdef HAVE_IPP_ICV_ONLY +# include "ipp_redefine.h" # include "ippicv.h" -# include "ippicv_fn_map.h" # else # include "ipp.h" # endif @@ -223,6 +223,13 @@ static inline IppiSize ippiSize(int width, int height) IppiSize size = { width, height }; return size; } + +static inline IppiSize ippiSize(const cv::Size & _size) +{ + IppiSize size = { _size.width, _size.height }; + return size; +} + #else # define IPP_VERSION_X100 0 #endif diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index ecc2ca0648..aa9469c04c 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -460,7 +460,7 @@ static void add8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -479,7 +479,7 @@ static void add16u( const ushort* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -491,7 +491,7 @@ static void add16s( const short* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -510,7 +510,7 @@ static void add32f( const float* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -529,7 +529,7 @@ static void sub8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -548,7 +548,7 @@ static void sub16u( const ushort* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -560,7 +560,7 @@ static void sub16s( const short* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -579,7 +579,7 @@ static void sub32f( const float* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -801,7 +801,7 @@ static void absdiff8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); @@ -820,7 +820,7 @@ static void absdiff16u( const ushort* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); @@ -846,7 +846,7 @@ static void absdiff32f( const float* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); @@ -866,7 +866,7 @@ static void and8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VAnd)>(src1, step1, src2, step2, dst, step, sz)); @@ -878,7 +878,7 @@ static void or8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VOr)>(src1, step1, src2, step2, dst, step, sz)); @@ -890,7 +890,7 @@ static void xor8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VXor)>(src1, step1, src2, step2, dst, step, sz)); @@ -901,8 +901,8 @@ static void not8u( const uchar* src1, size_t step1, uchar* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2; - if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz)) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void)src2; + if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VNot)>(src1, step1, src2, step2, dst, step, sz)); @@ -2386,7 +2386,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif @@ -2469,7 +2469,7 @@ static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif @@ -2484,7 +2484,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st if( op > 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif @@ -2590,7 +2590,7 @@ static void cmp32f(const float* src1, size_t step1, const float* src2, size_t st if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index cd5cf9b733..155ca67d6f 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -1079,6 +1079,33 @@ dtype* dst, size_t dstep, Size size, double* scale) \ cvtScale_(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \ } +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) +#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ +static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ + dtype* dst, size_t dstep, Size size, double*) \ +{ \ + if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \ + return; \ + cvt_(src, sstep, dst, dstep, size); \ +} + +#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \ +static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ + dtype* dst, size_t dstep, Size size, double*) \ +{ \ + if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \ + return; \ + cvt_(src, sstep, dst, dstep, size); \ +} +#else +#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ +static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ + dtype* dst, size_t dstep, Size size, double*) \ +{ \ + cvt_(src, sstep, dst, dstep, size); \ +} +#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F +#endif #define DEF_CVT_FUNC(suffix, stype, dtype) \ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ @@ -1089,7 +1116,7 @@ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ #define DEF_CPY_FUNC(suffix, stype) \ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ -stype* dst, size_t dstep, Size size, double*) \ + stype* dst, size_t dstep, Size size, double*) \ { \ cpy_(src, sstep, dst, dstep, size); \ } @@ -1160,48 +1187,48 @@ DEF_CVT_SCALE_FUNC(32f64f, float, double, double) DEF_CVT_SCALE_FUNC(64f, double, double, double) DEF_CPY_FUNC(8u, uchar) -DEF_CVT_FUNC(8s8u, schar, uchar) -DEF_CVT_FUNC(16u8u, ushort, uchar) -DEF_CVT_FUNC(16s8u, short, uchar) -DEF_CVT_FUNC(32s8u, int, uchar) -DEF_CVT_FUNC(32f8u, float, uchar) +DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs) +DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R) +DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R) +DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R) +DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs) DEF_CVT_FUNC(64f8u, double, uchar) -DEF_CVT_FUNC(8u8s, uchar, schar) -DEF_CVT_FUNC(16u8s, ushort, schar) -DEF_CVT_FUNC(16s8s, short, schar) -DEF_CVT_FUNC(32s8s, int, schar) -DEF_CVT_FUNC(32f8s, float, schar) +DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs) +DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs) +DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs) +DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R) +DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs) DEF_CVT_FUNC(64f8s, double, schar) -DEF_CVT_FUNC(8u16u, uchar, ushort) -DEF_CVT_FUNC(8s16u, schar, ushort) +DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R) +DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs) DEF_CPY_FUNC(16u, ushort) -DEF_CVT_FUNC(16s16u, short, ushort) -DEF_CVT_FUNC(32s16u, int, ushort) -DEF_CVT_FUNC(32f16u, float, ushort) +DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs) +DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs) +DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs) DEF_CVT_FUNC(64f16u, double, ushort) -DEF_CVT_FUNC(8u16s, uchar, short) -DEF_CVT_FUNC(8s16s, schar, short) -DEF_CVT_FUNC(16u16s, ushort, short) -DEF_CVT_FUNC(32s16s, int, short) -DEF_CVT_FUNC(32f16s, float, short) +DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R) +DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R) +DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs) +DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs) +DEF_CVT_FUNC_F2(32f16s, float, short, 32f16s_C1RSfs) DEF_CVT_FUNC(64f16s, double, short) -DEF_CVT_FUNC(8u32s, uchar, int) -DEF_CVT_FUNC(8s32s, schar, int) -DEF_CVT_FUNC(16u32s, ushort, int) -DEF_CVT_FUNC(16s32s, short, int) +DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R) +DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R) +DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R) +DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R) DEF_CPY_FUNC(32s, int) -DEF_CVT_FUNC(32f32s, float, int) +DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs) DEF_CVT_FUNC(64f32s, double, int) -DEF_CVT_FUNC(8u32f, uchar, float) -DEF_CVT_FUNC(8s32f, schar, float) -DEF_CVT_FUNC(16u32f, ushort, float) -DEF_CVT_FUNC(16s32f, short, float) -DEF_CVT_FUNC(32s32f, int, float) +DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R) +DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R) +DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R) +DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R) +DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R) DEF_CVT_FUNC(64f32f, double, float) DEF_CVT_FUNC(8u64f, uchar, double) @@ -1434,7 +1461,7 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta) Size sz((int)(it.size*cn), 1); for( size_t i = 0; i < it.nplanes; i++, ++it ) - func(ptrs[0], 0, 0, 0, ptrs[1], 0, sz, scale); + func(ptrs[0], 1, 0, 0, ptrs[1], 1, sz, scale); } } diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 5ac5f22c58..202e7a9225 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -495,25 +495,17 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) else kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; - Size size = _src.size(); - int cols = size.width, rows = size.height; - if ((cols == 1 && flipType == FLIP_COLS) || - (rows == 1 && flipType == FLIP_ROWS) || - (rows == 1 && cols == 1 && flipType == FLIP_BOTH)) - { - _src.copyTo(_dst); - return true; - } - ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, format( "-D T=%s -D T1=%s -D cn=%d", ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), cn)); if (k.empty()) return false; + Size size = _src.size(); _dst.create(size, type); UMat src = _src.getUMat(), dst = _dst.getUMat(); + int cols = size.width, rows = size.height; cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; @@ -531,13 +523,59 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) void flip( InputArray _src, OutputArray _dst, int flip_mode ) { CV_Assert( _src.dims() <= 2 ); + Size size = _src.size(); - CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src,_dst, flip_mode)) + if (flip_mode < 0) + { + if (size.width == 1) + flip_mode = 0; + if (size.height == 1) + flip_mode = 1; + } + + if ((size.width == 1 && flip_mode > 0) || + (size.height == 1 && flip_mode == 0) || + (size.height == 1 && size.width == 1 && flip_mode < 0)) + { + return _src.copyTo(_dst); + } + + CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) Mat src = _src.getMat(); - _dst.create( src.size(), src.type() ); + int type = src.type(); + _dst.create( size, type ); Mat dst = _dst.getMat(); - size_t esz = src.elemSize(); + size_t esz = CV_ELEM_SIZE(type); + +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + typedef IppStatus (CV_STDCALL * ippiMirror)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize, IppiAxis flip); + ippiMirror ippFunc = + type == CV_8UC1 ? (ippiMirror)ippiMirror_8u_C1R : + type == CV_8UC3 ? (ippiMirror)ippiMirror_8u_C3R : + type == CV_8UC4 ? (ippiMirror)ippiMirror_8u_C4R : + type == CV_16UC1 ? (ippiMirror)ippiMirror_16u_C1R : + type == CV_16UC3 ? (ippiMirror)ippiMirror_16u_C3R : + type == CV_16UC4 ? (ippiMirror)ippiMirror_16u_C4R : + type == CV_16SC1 ? (ippiMirror)ippiMirror_16s_C1R : + type == CV_16SC3 ? (ippiMirror)ippiMirror_16s_C3R : + type == CV_16SC4 ? (ippiMirror)ippiMirror_16s_C4R : + type == CV_32SC1 ? (ippiMirror)ippiMirror_32s_C1R : + type == CV_32SC3 ? (ippiMirror)ippiMirror_32s_C3R : + type == CV_32SC4 ? (ippiMirror)ippiMirror_32s_C4R : + type == CV_32FC1 ? (ippiMirror)ippiMirror_32f_C1R : + type == CV_32FC3 ? (ippiMirror)ippiMirror_32f_C3R : + type == CV_32FC4 ? (ippiMirror)ippiMirror_32f_C4R : 0; + IppiAxis axis = flip_mode == 0 ? ippAxsHorizontal : + flip_mode > 0 ? ippAxsVertical : ippAxsBoth; + + if (ippFunc != 0) + { + IppStatus status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, ippiSize(src.cols, src.rows), axis); + if (status >= 0) + return; + } +#endif if( flip_mode <= 0 ) flipVert( src.data, src.step, dst.data, dst.step, src.size(), esz ); diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 12ba4fa5b1..65f78de085 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -238,6 +238,12 @@ float cubeRoot( float value ) static void Magnitude_32f(const float* x, const float* y, float* mag, int len) { +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + IppStatus status = ippsMagnitude_32f(x, y, mag, len); + if (status >= 0) + return; +#endif + int i = 0; #if CV_SSE @@ -264,6 +270,12 @@ static void Magnitude_32f(const float* x, const float* y, float* mag, int len) static void Magnitude_64f(const double* x, const double* y, double* mag, int len) { +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + IppStatus status = ippsMagnitude_64f(x, y, mag, len); + if (status >= 0) + return; +#endif + int i = 0; #if CV_SSE2 @@ -291,6 +303,11 @@ static void Magnitude_64f(const double* x, const double* y, double* mag, int len static void InvSqrt_32f(const float* src, float* dst, int len) { +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (ippsInvSqrt_32f_A21(src, dst, len) >= 0) + return; +#endif + int i = 0; #if CV_SSE @@ -334,6 +351,10 @@ static void InvSqrt_64f(const double* src, double* dst, int len) static void Sqrt_32f(const float* src, float* dst, int len) { +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (ippsSqrt_32f_A21(src, dst, len) >= 0) + return; +#endif int i = 0; #if CV_SSE @@ -363,6 +384,11 @@ static void Sqrt_32f(const float* src, float* dst, int len) static void Sqrt_64f(const double* src, double* dst, int len) { +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (ippsSqrt_64f_A50(src, dst, len) >= 0) + return; +#endif + int i = 0; #if CV_SSE2 @@ -729,6 +755,22 @@ void polarToCart( InputArray src1, InputArray src2, dst2.create( Angle.dims, Angle.size, type ); Mat X = dst1.getMat(), Y = dst2.getMat(); +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees) + { + typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase, + void * pDstRe, void * pDstIm, int len); + ippsPolarToCart ippFunc = + depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f : + depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0; + CV_Assert(ippFunc != 0); + + IppStatus status = ippFunc(Mag.data, Angle.data, X.data, Y.data, static_cast(cn * X.total())); + if (status >= 0) + return; + } +#endif + const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0}; uchar* ptrs[4]; NAryMatIterator it(arrays, ptrs); @@ -2119,6 +2161,29 @@ void pow( InputArray _src, double power, OutputArray _dst ) _src.copyTo(_dst); return; case 2: +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) || (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) )) + { + Mat src = _src.getMat(); + _dst.create( src.dims, src.size, type ); + Mat dst = _dst.getMat(); + + Size size = src.size(); + int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type); + if (src.isContinuous() && dst.isContinuous()) + { + size.width = (int)src.total(); + size.height = 1; + srcstep = dststep = (int)src.total() * esz; + } + size.width *= cn; + + IppStatus status = ippiSqr_32f_C1R((const Ipp32f *)src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + + if (status >= 0) + return; + } +#endif if (same) multiply(_dst, _dst, _dst); else @@ -2168,6 +2233,18 @@ void pow( InputArray _src, double power, OutputArray _dst ) } else { +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (src.isContinuous() && dst.isContinuous()) + { + IppStatus status = depth == CV_32F ? + ippsPowx_32f_A21((const Ipp32f *)src.data, (Ipp32f)power, (Ipp32f*)dst.data, (Ipp32s)(src.total() * cn)) : + ippsPowx_64f_A50((const Ipp64f *)src.data, power, (Ipp64f*)dst.data, (Ipp32s)(src.total() * cn)); + + if (status >= 0) + return; + } +#endif + int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn); size_t esz1 = src.elemSize1(); diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 8891bb05f8..23735194d3 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2212,7 +2212,7 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray Mat src1 = _src1.getMat(), src2 = _src2.getMat(); CV_Assert(src1.size == src2.size); - _dst.create(src1.dims, src1.size, src1.type()); + _dst.create(src1.dims, src1.size, type); Mat dst = _dst.getMat(); float falpha = (float)alpha; @@ -2220,9 +2220,16 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray ScaleAddFunc func = depth == CV_32F ? (ScaleAddFunc)scaleAdd_32f : (ScaleAddFunc)scaleAdd_64f; - if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() ) + if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous()) { size_t len = src1.total()*cn; +#if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY + if (depth == CV_32F && + ippmSaxpy_vava_32f((const Ipp32f *)src1.data, (int)src1.step, sizeof(Ipp32f), falpha, + (const Ipp32f *)src2.data, (int)src2.step, sizeof(Ipp32f), + (Ipp32f *)dst.data, (int)dst.step, sizeof(Ipp32f), (int)len, 1) >= 0) + return; +#endif func(src1.data, src2.data, dst.data, (int)len, palpha); return; } diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 45ae3d5124..4efba46548 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -2967,6 +2967,30 @@ void cv::transpose( InputArray _src, OutputArray _dst ) return; } +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + typedef IppStatus (CV_STDCALL * ippiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); + ippiTranspose ippFunc = + type == CV_8UC1 ? (ippiTranspose)ippiTranspose_8u_C1R : + type == CV_8UC3 ? (ippiTranspose)ippiTranspose_8u_C3R : + type == CV_8UC4 ? (ippiTranspose)ippiTranspose_8u_C4R : + type == CV_16UC1 ? (ippiTranspose)ippiTranspose_16u_C1R : + type == CV_16UC3 ? (ippiTranspose)ippiTranspose_16u_C3R : + type == CV_16UC4 ? (ippiTranspose)ippiTranspose_16u_C4R : + type == CV_16SC1 ? (ippiTranspose)ippiTranspose_16s_C1R : + type == CV_16SC3 ? (ippiTranspose)ippiTranspose_16s_C3R : + type == CV_16SC4 ? (ippiTranspose)ippiTranspose_16s_C4R : + type == CV_32SC1 ? (ippiTranspose)ippiTranspose_32s_C1R : + type == CV_32SC3 ? (ippiTranspose)ippiTranspose_32s_C3R : + type == CV_32SC4 ? (ippiTranspose)ippiTranspose_32s_C4R : + type == CV_32FC1 ? (ippiTranspose)ippiTranspose_32f_C1R : + type == CV_32FC3 ? (ippiTranspose)ippiTranspose_32f_C3R : + type == CV_32FC4 ? (ippiTranspose)ippiTranspose_32f_C4R : 0; + + IppiSize roiSize = { src.cols, src.rows }; + if (ippFunc != 0 && ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, roiSize) >= 0) + return; +#endif + if( dst.data == src.data ) { TransposeInplaceFunc func = transposeInplaceTab[esz]; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index ffea804ed9..24190c52c9 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1581,7 +1581,7 @@ void finish() #define IMPLEMENT_REFCOUNTABLE() \ void addref() { CV_XADD(&refcount, 1); } \ - void release() { if( CV_XADD(&refcount, -1) == 1 ) delete this; } \ + void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \ int refcount /////////////////////////////////////////// Platform ///////////////////////////////////////////// diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 0e3d44ed6b..ecc0f76cb8 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -933,10 +933,10 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input dcn_stddev = (int)stddev.total(); pstddev = (Ipp64f *)stddev.data; } - for( int k = cn; k < dcn_mean; k++ ) - pmean[k] = 0; - for( int k = cn; k < dcn_stddev; k++ ) - pstddev[k] = 0; + for( int c = cn; c < dcn_mean; c++ ) + pmean[c] = 0; + for( int c = cn; c < dcn_stddev; c++ ) + pstddev[c] = 0; IppiSize sz = { cols, rows }; int type = src.type(); if( !mask.empty() ) @@ -2016,6 +2016,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) size_t total_size = src.total(); int rows = src.size[0], cols = (int)(total_size/rows); + if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous())) && cols > 0 && (size_t)rows*cols == total_size && (normType == NORM_INF || normType == NORM_L1 || @@ -2030,7 +2031,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) normType == NORM_INF ? (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR : type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR : - type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR : +// type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR : type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR : 0) : normType == NORM_L1 ? diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 4b3efce4a8..cef4db3c2c 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -918,16 +918,22 @@ public: #pragma warning(disable:4447) // Disable warning 'main' signature found without threading model #endif -BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID); - +extern "C" BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved) { if (fdwReason == DLL_THREAD_DETACH || fdwReason == DLL_PROCESS_DETACH) { if (lpReserved != NULL) // called after ExitProcess() call + { cv::__termination = true; - cv::deleteThreadAllocData(); - cv::deleteThreadData(); + } + else + { + // Not allowed to free resources if lpReserved is non-null + // http://msdn.microsoft.com/en-us/library/windows/desktop/ms682583.aspx + cv::deleteThreadAllocData(); + cv::deleteThreadData(); + } } return TRUE; } diff --git a/modules/core/test/ocl/test_matrix_operation.cpp b/modules/core/test/ocl/test_matrix_operation.cpp index 901609538e..ee591e9bd9 100644 --- a/modules/core/test/ocl/test_matrix_operation.cpp +++ b/modules/core/test/ocl/test_matrix_operation.cpp @@ -107,6 +107,7 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool) { int depth, cn; bool use_roi, use_mask; + Scalar val; TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_INPUT_PARAMETER(mask); @@ -143,6 +144,8 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool) if (use_mask) UMAT_UPLOAD_INPUT_PARAMETER(mask); UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + + val = randomScalar(-MAX_VALUE, MAX_VALUE); } }; @@ -168,12 +171,38 @@ OCL_TEST_P(CopyTo, Accuracy) } } +typedef CopyTo SetTo; + +OCL_TEST_P(SetTo, Accuracy) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + if (use_mask) + { + OCL_OFF(dst_roi.setTo(val, mask_roi)); + OCL_ON(udst_roi.setTo(val, umask_roi)); + } + else + { + OCL_OFF(dst_roi.setTo(val)); + OCL_ON(udst_roi.setTo(val)); + } + + OCL_EXPECT_MATS_NEAR(dst, 0); + } +} + OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( OCL_ALL_DEPTHS, OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( + OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); + } } // namespace cvtest::ocl #endif diff --git a/modules/highgui/test/test_ffmpeg.cpp b/modules/highgui/test/test_ffmpeg.cpp index f8491d1a69..61fc3d49a4 100644 --- a/modules/highgui/test/test_ffmpeg.cpp +++ b/modules/highgui/test/test_ffmpeg.cpp @@ -329,7 +329,7 @@ public: EXPECT_EQ(reference.depth(), actual.depth()); EXPECT_EQ(reference.channels(), actual.channels()); - double psnr = PSNR(actual, reference); + double psnr = cvtest::PSNR(actual, reference); if (psnr < eps) { #define SUM cvtest::TS::SUMMARY diff --git a/modules/highgui/test/test_video_io.cpp b/modules/highgui/test/test_video_io.cpp index cacfde0b3c..f380e0d26b 100644 --- a/modules/highgui/test/test_video_io.cpp +++ b/modules/highgui/test/test_video_io.cpp @@ -198,7 +198,7 @@ void CV_HighGuiTest::ImageTest(const string& dir) } const double thresDbell = 20; - double psnr = PSNR(loaded, image); + double psnr = cvtest::PSNR(loaded, image); if (psnr < thresDbell) { ts->printf(ts->LOG, "Reading image from file: too big difference (=%g) with fmt=%s\n", psnr, ext.c_str()); @@ -235,7 +235,7 @@ void CV_HighGuiTest::ImageTest(const string& dir) continue; } - psnr = PSNR(buf_loaded, image); + psnr = cvtest::PSNR(buf_loaded, image); if (psnr < thresDbell) { @@ -316,7 +316,7 @@ void CV_HighGuiTest::VideoTest(const string& dir, const cvtest::VideoFormat& fmt Mat img = frames[i]; Mat img1 = cv::cvarrToMat(ipl1); - double psnr = PSNR(img1, img); + double psnr = cvtest::PSNR(img1, img); if (psnr < thresDbell) { ts->printf(ts->LOG, "Too low frame %d psnr = %gdb\n", i, psnr); @@ -371,7 +371,7 @@ void CV_HighGuiTest::SpecificImageTest(const string& dir) } const double thresDbell = 20; - double psnr = PSNR(loaded, image); + double psnr = cvtest::PSNR(loaded, image); if (psnr < thresDbell) { ts->printf(ts->LOG, "Reading image from file: too big difference (=%g) with fmt=bmp\n", psnr); @@ -408,7 +408,7 @@ void CV_HighGuiTest::SpecificImageTest(const string& dir) continue; } - psnr = PSNR(buf_loaded, image); + psnr = cvtest::PSNR(buf_loaded, image); if (psnr < thresDbell) { @@ -521,7 +521,7 @@ void CV_HighGuiTest::SpecificVideoTest(const string& dir, const cvtest::VideoFor Mat img = images[i]; const double thresDbell = 40; - double psnr = PSNR(img, frame); + double psnr = cvtest::PSNR(img, frame); if (psnr > thresDbell) { diff --git a/modules/highgui/test/test_video_pos.cpp b/modules/highgui/test/test_video_pos.cpp index a502040efb..c8fe4050da 100644 --- a/modules/highgui/test/test_video_pos.cpp +++ b/modules/highgui/test/test_video_pos.cpp @@ -160,7 +160,7 @@ public: return; } - double err = PSNR(img, img0); + double err = cvtest::PSNR(img, img0); if( err < 20 ) { diff --git a/modules/imgproc/src/accum.cpp b/modules/imgproc/src/accum.cpp index f130f34da9..74a63e916c 100644 --- a/modules/imgproc/src/accum.cpp +++ b/modules/imgproc/src/accum.cpp @@ -457,6 +457,56 @@ void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _m Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous()))) + { + typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize); + typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, + int srcDstStep, IppiSize roiSize); + ippiAddSquare ippFunc = 0; + ippiAddSquareMask ippFuncMask = 0; + + if (mask.empty()) + { + ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0; + } + else if (scn == 1) + { + ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0; + } + + if (ippFunc || ippFuncMask) + { + IppStatus status = ippStsNoErr; + + Size size = src.size(); + int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step; + if (src.isContinuous() && dst.isContinuous() && mask.isContinuous()) + { + srcstep = static_cast(src.total() * src.elemSize()); + dststep = static_cast(dst.total() * dst.elemSize()); + maskstep = static_cast(mask.total() * mask.elemSize()); + size.width = static_cast(src.total()); + size.height = 1; + } + size.width *= scn; + + if (mask.empty()) + status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + else + status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep, + (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + + if (status >= 0) + return; + } + } +#endif + int fidx = getAccTabIdx(sdepth, ddepth); AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0; CV_Assert( func != 0 ); @@ -485,6 +535,59 @@ void cv::accumulateProduct( InputArray _src1, InputArray _src2, Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous())) + { + typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2, + int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize); + typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step, + const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize); + ippiAddProduct ippFunc = 0; + ippiAddProductMask ippFuncMask = 0; + + if (mask.empty()) + { + ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0; + } + else if (scn == 1) + { + ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0; + } + + if (ippFunc || ippFuncMask) + { + IppStatus status = ippStsNoErr; + + Size size = src1.size(); + int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step; + if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous()) + { + src1step = static_cast(src1.total() * src1.elemSize()); + src2step = static_cast(src2.total() * src2.elemSize()); + dststep = static_cast(dst.total() * dst.elemSize()); + maskstep = static_cast(mask.total() * mask.elemSize()); + size.width = static_cast(src1.total()); + size.height = 1; + } + size.width *= scn; + + if (mask.empty()) + status = ippFunc(src1.data, src1step, src2.data, src2step, (Ipp32f *)dst.data, + dststep, ippiSize(size.width, size.height)); + else + status = ippFuncMask(src1.data, src1step, src2.data, src2step, (Ipp8u *)mask.data, maskstep, + (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + + if (status >= 0) + return; + } + } +#endif + int fidx = getAccTabIdx(sdepth, ddepth); AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0; CV_Assert( func != 0 ); @@ -512,6 +615,58 @@ void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst, Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous())) + { + typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, + IppiSize roiSize, Ipp32f alpha); + typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, + int maskStep, Ipp32f * pSrcDst, + int srcDstStep, IppiSize roiSize, Ipp32f alpha); + ippiAddWeighted ippFunc = 0; + ippiAddWeightedMask ippFuncMask = 0; + + if (mask.empty()) + { + ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0; + } + else if (scn == 1) + { + ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0; + } + + if (ippFunc || ippFuncMask) + { + IppStatus status = ippStsNoErr; + + Size size = src.size(); + int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step; + if (src.isContinuous() && dst.isContinuous() && mask.isContinuous()) + { + srcstep = static_cast(src.total() * src.elemSize()); + dststep = static_cast(dst.total() * dst.elemSize()); + maskstep = static_cast(mask.total() * mask.elemSize()); + size.width = static_cast((int)src.total()); + size.height = 1; + } + size.width *= scn; + + if (mask.empty()) + status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha); + else + status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep, + (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha); + + if (status >= 0) + return; + } + } +#endif + int fidx = getAccTabIdx(sdepth, ddepth); AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0; CV_Assert( func != 0 ); diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 8ab7e4929f..287a188807 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -200,12 +200,14 @@ void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt) } #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *); typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize); typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *); template -class CvtColorIPPLoop_Invoker : public ParallelLoopBody +class CvtColorIPPLoop_Invoker : + public ParallelLoopBody { public: @@ -251,8 +253,8 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt) source = temp; } bool ok; - parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker(source, dst, cvt, &ok), source.total()/(double)(1<<16) ); - //ok = cvt(src.ptr(0), (int)src.step[0], dst.ptr(0), (int)dst.step[0], src.cols, src.rows); + parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker(source, dst, cvt, &ok), + source.total()/(double)(1<<16) ); return ok; } @@ -298,7 +300,7 @@ static ippiReorderFunc ippiSwapChannelsC3RTab[] = 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 }; -#if (IPP_VERSION_X100 >= 801) +#if !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 801 static ippiReorderFunc ippiSwapChannelsC4RTab[] = { (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0, @@ -308,8 +310,8 @@ static ippiReorderFunc ippiSwapChannelsC4RTab[] = static ippiColor2GrayFunc ippiColor2GrayC3Tab[] = { - (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0, - 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0 + /*(ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R*/ 0, 0, /*(ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R*/ 0, 0, + 0, /*(ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R*/ 0, 0, 0 }; static ippiColor2GrayFunc ippiColor2GrayC4Tab[] = @@ -339,18 +341,18 @@ static ippiGeneralFunc ippiCopyP3C3RTab[] = static ippiGeneralFunc ippiRGB2XYZTab[] = { (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0, - 0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0 + 0, /*(ippiGeneralFunc)ippiRGBToXYZ_32f_C3R*/ 0, 0, 0 }; static ippiGeneralFunc ippiXYZ2RGBTab[] = { (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0, - 0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0 + 0, /*(ippiGeneralFunc)ippiXYZToRGB_32f_C3R*/ 0, 0, 0 }; static ippiGeneralFunc ippiRGB2HSVTab[] = { - (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0, + /*(ippiGeneralFunc)ippiRGBToHSV_8u_C3R*/ 0, 0, /*(ippiGeneralFunc)ippiRGBToHSV_16u_C3R*/ 0, 0, 0, 0, 0, 0 }; @@ -377,7 +379,7 @@ struct IPPGeneralFunctor IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){} bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { - return func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0; + return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false; } private: ippiGeneralFunc func; @@ -394,7 +396,7 @@ struct IPPReorderFunctor } bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { - return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0; + return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false; } private: ippiReorderFunc func; @@ -403,7 +405,8 @@ private: struct IPPColor2GrayFunctor { - IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func) + IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : + func(_func) { coeffs[0] = 0.114f; coeffs[1] = 0.587f; @@ -411,7 +414,7 @@ struct IPPColor2GrayFunctor } bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { - return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0; + return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false; } private: ippiColor2GrayFunc func; @@ -420,9 +423,16 @@ private: struct IPPGray2BGRFunctor { - IPPGray2BGRFunctor(ippiGeneralFunc _func) : func(_func){} + IPPGray2BGRFunctor(ippiGeneralFunc _func) : + func(_func) + { + } + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { + if (func == 0) + return false; + const void* srcarray[3] = { src, src, src }; return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0; } @@ -432,9 +442,16 @@ private: struct IPPGray2BGRAFunctor { - IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : func1(_func1), func2(_func2), depth(_depth){} + IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : + func1(_func1), func2(_func2), depth(_depth) + { + } + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { + if (func1 == 0 || func2 == 0) + return false; + const void* srcarray[3] = { src, src, src }; Mat temp(rows, cols, CV_MAKETYPE(depth, 3)); if(func1(srcarray, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0) @@ -450,7 +467,8 @@ private: struct IPPReorderGeneralFunctor { - IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth) + IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) : + func1(_func1), func2(_func2), depth(_depth) { order[0] = _order0; order[1] = _order1; @@ -459,6 +477,9 @@ struct IPPReorderGeneralFunctor } bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { + if (func1 == 0 || func2 == 0) + return false; + Mat temp; temp.create(rows, cols, CV_MAKETYPE(depth, 3)); if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows), order) < 0) @@ -474,7 +495,8 @@ private: struct IPPGeneralReorderFunctor { - IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth) + IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) : + func1(_func1), func2(_func2), depth(_depth) { order[0] = _order0; order[1] = _order1; @@ -483,6 +505,9 @@ struct IPPGeneralReorderFunctor } bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { + if (func1 == 0 || func2 == 0) + return false; + Mat temp; temp.create(rows, cols, CV_MAKETYPE(depth, 3)); if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0) @@ -495,6 +520,7 @@ private: int order[4]; int depth; }; + #endif ////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// @@ -3254,7 +3280,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) return; } -#if (IPP_VERSION_X100 >= 801) +#if !defined(HAVE_IPP_ICV_ONLY) && (IPP_VERSION_X100 >= 801) else if( code == CV_RGBA2BGRA ) { if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) @@ -3315,17 +3341,14 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( scn == 3 || scn == 4 ); _dst.create(sz, CV_MAKETYPE(depth, 1)); dst = _dst.getMat(); -/**/ + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) -/* if( code == CV_BGR2GRAY ) { if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) return; } - else -*/ - if( code == CV_RGB2GRAY ) + else if( code == CV_RGB2GRAY ) { if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) return; @@ -3341,7 +3364,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) return; } #endif -/**/ + bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; if( depth == CV_8U ) diff --git a/modules/imgproc/src/deriv.cpp b/modules/imgproc/src/deriv.cpp index 0b19f22be4..1b3e2c417b 100644 --- a/modules/imgproc/src/deriv.cpp +++ b/modules/imgproc/src/deriv.cpp @@ -233,6 +233,9 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, } } case CV_32F: +#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R + return false; +#else { switch(dst.type()) { @@ -277,6 +280,7 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, return false; } } +#endif default: return false; } @@ -341,6 +345,10 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k if (src.type() == CV_32F && dst.type() == CV_32F) { +#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiMulC_32f_C1R + return false; +#else +#if 0 if ((dx == 1) && (dy == 0)) { if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize)) @@ -374,6 +382,7 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } +#endif if((dx == 2) && (dy == 0)) { @@ -409,6 +418,7 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiMulC_32f_C1R((Ipp32f *)dst.data, (int)dst.step, (Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } +#endif } } diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 8b337f645b..c9a5ed1e75 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -1464,7 +1464,7 @@ private: int ippiOperator(const uchar* _src, uchar* _dst, int width, int cn) const { int _ksize = kernel.rows + kernel.cols - 1; - if ((1 != cn && 3 != cn) || width < _ksize*8) +// if ((1 != cn && 3 != cn) || width < _ksize*8) return 0; const float* src = (const float*)_src; diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 0c7aafc7b9..45a66bd83e 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -61,9 +61,9 @@ namespace cv typedef IppStatus (CV_STDCALL* ippiResizeGetSrcOffset)(void*, IppiPoint, IppiPoint*); #endif -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) && 0 typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize); - typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int); + typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int); typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int); template @@ -75,7 +75,7 @@ namespace cv return func(values, dataPointer, step, size) >= 0; } - bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth) + static bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth) { if( channels == 1 ) { @@ -1912,7 +1912,7 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec getBufferSizeFunc = (ippiResizeGetBufferSize)ippiResizeGetBufferSize_##TYPE;\ getSrcOffsetFunc = (ippiResizeGetSrcOffset)ippiResizeGetSrcOffset_##TYPE; -#if IPP_VERSION_X100 >= 701 +#if !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 701 class IPPresizeInvoker : public ParallelLoopBody { @@ -2384,7 +2384,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y; int k, sx, sy, dx, dy; -#if IPP_VERSION_X100 >= 701 +#if !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 701 #define IPP_RESIZE_EPS 1.e-10 double ex = fabs((double)dsize.width/src.cols - inv_scale_x)/inv_scale_x; @@ -3892,11 +3892,11 @@ void cv::convertMaps( InputArray _map1, InputArray _map2, namespace cv { -class warpAffineInvoker : +class WarpAffineInvoker : public ParallelLoopBody { public: - warpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType, + WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType, const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) : ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta), @@ -4013,16 +4013,20 @@ private: double *M; }; -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) -class IPPwarpAffineInvoker : + + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 +class IPPWarpAffineInvoker : public ParallelLoopBody { public: - IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) - { - *ok = true; - } + IPPWarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int _borderType, + const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), + borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) + { + *ok = true; + } virtual void operator() (const Range& range) const { @@ -4040,21 +4044,26 @@ public: return; } } - if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr + + // Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr + IppStatus status = func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, + (int)dst.step[0], dstroi, coeffs, mode ); + if( status < 0) *ok = false; } private: Mat &src; Mat &dst; - double (&coeffs)[2][3]; int mode; + double (&coeffs)[2][3]; int borderType; Scalar borderValue; ippiWarpAffineBackFunc func; bool *ok; - const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&); + const IPPWarpAffineInvoker& operator= (const IPPWarpAffineInvoker&); }; #endif + */ #ifdef HAVE_OPENCL @@ -4204,16 +4213,19 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols; const int AB_BITS = MAX(10, (int)INTER_BITS); const int AB_SCALE = 1 << AB_BITS; -/* -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) - int depth = src.depth(); - int channels = src.channels(); + + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 + int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && - ( channels == 1 || channels == 3 || channels == 4 ) && - ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) ) + ( cn == 1 || cn == 3 || cn == 4 ) && + ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) && + ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT) ) { - int type = src.type(); - ippiWarpAffineBackFunc ippFunc = + ippiWarpAffineBackFunc ippFunc = 0; + if ((flags & WARP_INVERSE_MAP) != 0) + { + ippFunc = type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R : type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R : type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R : @@ -4224,31 +4236,43 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R : type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R : 0; - int mode = - flags == INTER_LINEAR ? IPPI_INTER_LINEAR : - flags == INTER_NEAREST ? IPPI_INTER_NN : - flags == INTER_CUBIC ? IPPI_INTER_CUBIC : - 0; - if( mode && ippFunc ) - { - double coeffs[2][3]; - for( int i = 0; i < 2; i++ ) - { - for( int j = 0; j < 3; j++ ) - { - coeffs[i][j] = matM.at(i, j); - } - } - bool ok; - Range range(0, dst.rows); - IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); - parallel_for_(range, invoker, dst.total()/(double)(1<<16)); - if( ok ) - return; } + else + { + ippFunc = + type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C1R : + type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C3R : + type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C4R : + type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C1R : + type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C3R : + type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C4R : + type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C1R : + type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C3R : + type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C4R : + 0; + } + int mode = + interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : + interpolation == INTER_NEAREST ? IPPI_INTER_NN : + interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : + 0; + CV_Assert(mode && ippFunc); + + double coeffs[2][3]; + for( int i = 0; i < 2; i++ ) + for( int j = 0; j < 3; j++ ) + coeffs[i][j] = matM.at(i, j); + + bool ok; + Range range(0, dst.rows); + IPPWarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + if( ok ) + return; } #endif -*/ + */ + for( x = 0; x < dst.cols; x++ ) { adelta[x] = saturate_cast(M[0]*x*AB_SCALE); @@ -4256,7 +4280,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, } Range range(0, dst.rows); - warpAffineInvoker invoker(src, dst, interpolation, borderType, + WarpAffineInvoker invoker(src, dst, interpolation, borderType, borderValue, adelta, bdelta, M); parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } @@ -4265,12 +4289,12 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, namespace cv { -class warpPerspectiveInvoker : +class WarpPerspectiveInvoker : public ParallelLoopBody { public: - warpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation, + WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation, int _borderType, const Scalar &_borderValue) : ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue) @@ -4356,16 +4380,19 @@ private: Scalar borderValue; }; -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) -class IPPwarpPerspectiveInvoker : + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 +class IPPWarpPerspectiveInvoker : public ParallelLoopBody { public: - IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) - { - *ok = true; - } + IPPWarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, + int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveFunc _func, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), + borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) + { + *ok = true; + } virtual void operator() (const Range& range) const { @@ -4384,22 +4411,25 @@ public: return; } } - if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0) + + IppStatus status = func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode); + if (status != ippStsNoErr) *ok = false; } private: Mat &src; Mat &dst; - double (&coeffs)[3][3]; int mode; + double (&coeffs)[3][3]; int borderType; const Scalar borderValue; - ippiWarpPerspectiveBackFunc func; + ippiWarpPerspectiveFunc func; bool *ok; - const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&); + + const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&); }; #endif - + */ } void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, @@ -4432,55 +4462,65 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, return; #endif - if( !(flags & WARP_INVERSE_MAP) ) - invert(matM, matM); -/* -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) - int depth = src.depth(); - int channels = src.channels(); - if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && - ( channels == 1 || channels == 3 || channels == 4 ) && - ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) ) + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 + int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + if( (depth == CV_8U || depth == CV_16U || depth == CV_32F) && + (cn == 1 || cn == 3 || cn == 4) && + ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) && + (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC)) { - int type = src.type(); - ippiWarpPerspectiveBackFunc ippFunc = - type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R : - type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R : - type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R : - type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R : - type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R : - type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R : - type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R : - type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R : - type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R : - 0; - int mode = - flags == INTER_LINEAR ? IPPI_INTER_LINEAR : - flags == INTER_NEAREST ? IPPI_INTER_NN : - flags == INTER_CUBIC ? IPPI_INTER_CUBIC : - 0; - if( mode && ippFunc ) + ippiWarpPerspectiveFunc ippFunc = 0; + if ((flags & WARP_INVERSE_MAP) != 0) { - double coeffs[3][3]; - for( int i = 0; i < 3; i++ ) - { - for( int j = 0; j < 3; j++ ) - { - coeffs[i][j] = matM.at(i, j); - } - } - bool ok; - Range range(0, dst.rows); - IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); - parallel_for_(range, invoker, dst.total()/(double)(1<<16)); - if( ok ) - return; + ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C1R : + type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C3R : + type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C4R : + type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C1R : + type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C3R : + type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C4R : + type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C1R : + type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C3R : + type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C4R : 0; } + else + { + ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C1R : + type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C3R : + type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C4R : + type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C1R : + type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C3R : + type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C4R : + type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C1R : + type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C3R : + type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C4R : 0; + } + int mode = + interpolation == INTER_NEAREST ? IPPI_INTER_NN : + interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : + interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : 0; + CV_Assert(mode && ippFunc); + + double coeffs[3][3]; + for( int i = 0; i < 3; i++ ) + for( int j = 0; j < 3; j++ ) + coeffs[i][j] = matM.at(i, j); + + bool ok; + Range range(0, dst.rows); + IPPWarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + if( ok ) + return; } #endif -*/ + */ + + if( !(flags & WARP_INVERSE_MAP) ) + invert(matM, matM); + Range range(0, dst.rows); - warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue); + WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue); parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 07aa4c5dd3..5b13ffc29e 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1228,6 +1228,9 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne } else { +#if defined(HAVE_IPP_ICV_ONLY) // N/A: ippiFilterMin*/ippiFilterMax* + return false; +#else IppiPoint point = {anchor.x, anchor.y}; #define IPP_MORPH_CASE(cvtype, flavor, data_type) \ @@ -1257,6 +1260,7 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne } #undef IPP_MORPH_CASE +#endif } } diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 84570bd2c2..4318cd1871 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -841,7 +841,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth, CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize)) Mat src = _src.getMat(); - int sdepth = src.depth(), cn = src.channels(); + int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); if( ddepth < 0 ) ddepth = sdepth; _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) ); @@ -858,6 +858,69 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth, return; #endif +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + int ippBorderType = borderType & ~BORDER_ISOLATED; + Point ocvAnchor, ippAnchor; + ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x; + ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y; + ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0); + ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0); + + if (normalize && !src.isSubmatrix() && ddepth == sdepth && + (ippBorderType == BORDER_REPLICATE || ippBorderType == BORDER_CONSTANT) && + ocvAnchor == ippAnchor ) + { + Ipp32s bufSize; + IppiSize roiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize.width, ksize.height); + +#define IPP_FILTER_BOX_BORDER(ippType, ippDataType, flavor) \ + do \ + { \ + if (ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippDataType, cn, &bufSize) >= 0) \ + { \ + Ipp8u * buffer = ippsMalloc_8u(bufSize); \ + ippType borderValue[4] = { 0, 0, 0, 0 }; \ + ippBorderType = ippBorderType == BORDER_CONSTANT ? ippBorderConst : ippBorderType == BORDER_REPLICATE ? ippBorderRepl : -1; \ + CV_Assert(ippBorderType >= 0); \ + IppStatus status = ippiFilterBoxBorder_##flavor((ippType *)src.data, (int)src.step, (ippType *)dst.data, (int)dst.step, roiSize, maskSize, \ + (IppiBorderType)ippBorderType, borderValue, buffer); \ + ippsFree(buffer); \ + if (status >= 0) \ + return; \ + } \ + } while ((void)0, 0) + + if (stype == CV_8UC1) + IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C1R); + else if (stype == CV_8UC3) + IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C3R); + else if (stype == CV_8UC4) + IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C4R); + + else if (stype == CV_16UC1) + IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C1R); + else if (stype == CV_16UC3) + IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C3R); + else if (stype == CV_16UC4) + IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C4R); + + else if (stype == CV_16SC1) + IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C1R); + else if (stype == CV_16SC3) + IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C3R); + else if (stype == CV_16SC4) + IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C4R); + + else if (stype == CV_32FC1) + IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C1R); + else if (stype == CV_32FC3) + IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C3R); + else if (stype == CV_32FC4) + IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C4R); + } +#undef IPP_FILTER_BOX_BORDER +#endif + Ptr f = createBoxFilter( src.type(), dst.type(), ksize, anchor, normalize, borderType ); f->apply( src, dst ); @@ -1948,13 +2011,46 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) return; } - CV_OCL_RUN(_src0.dims() <= 2 && _dst.isUMat(), + CV_OCL_RUN(_dst.isUMat(), ocl_medianFilter(_src0,_dst, ksize)) Mat src0 = _src0.getMat(); _dst.create( src0.size(), src0.type() ); Mat dst = _dst.getMat(); +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 801 +#define IPP_FILTER_MEDIAN_BORDER(ippType, ippDataType, flavor) \ + do \ + { \ + if (ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, \ + ippDataType, CV_MAT_CN(type), &bufSize) >= 0) \ + { \ + Ipp8u * buffer = ippsMalloc_8u(bufSize); \ + IppStatus status = ippiFilterMedianBorder_##flavor((const ippType *)src0.data, (int)src0.step, \ + (ippType *)dst.data, (int)dst.step, dstRoiSize, maskSize, \ + ippBorderRepl, (ippType)0, buffer); \ + ippsFree(buffer); \ + if (status >= 0) \ + return; \ + } \ + } \ + while ((void)0, 0) + + Ipp32s bufSize; + IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); + + int type = src0.type(); + if (type == CV_8UC1) + IPP_FILTER_MEDIAN_BORDER(Ipp8u, ipp8u, 8u_C1R); + else if (type == CV_16UC1) + IPP_FILTER_MEDIAN_BORDER(Ipp16u, ipp16u, 16u_C1R); + else if (type == CV_16SC1) + IPP_FILTER_MEDIAN_BORDER(Ipp16s, ipp16s, 16s_C1R); + else if (type == CV_32FC1) + IPP_FILTER_MEDIAN_BORDER(Ipp32f, ipp32f, 32f_C1R); +#undef IPP_FILTER_MEDIAN_BORDER +#endif + #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::medianBlur(src0, dst, ksize)) return; @@ -2329,13 +2425,14 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d, Mat temp; copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); -#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) +#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) && 0 if( cn == 1 ) { bool ok; IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok ); parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16)); - if( ok ) return; + if( ok ) + return; } #endif diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index 7fd0b2372d..17f323a1ad 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -53,11 +53,14 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) uchar tab[256]; Size roi = _src.size(); roi.width *= _src.channels(); + size_t src_step = _src.step; + size_t dst_step = _dst.step; if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; + src_step = dst_step = roi.width; } #ifdef HAVE_TEGRA_OPTIMIZATION @@ -65,6 +68,25 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) return; #endif +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + IppiSize sz = { roi.width, roi.height }; + switch( type ) + { + case THRESH_TRUNC: + if (0 <= ippiThreshold_GT_8u_C1R(_src.data, (int)src_step, _dst.data, (int)dst_step, sz, thresh)) + return; + break; + case THRESH_TOZERO: + if (0 <= ippiThreshold_LTVal_8u_C1R(_src.data, (int)src_step, _dst.data, (int)dst_step, sz, thresh+1, 0)) + return; + break; + case THRESH_TOZERO_INV: + if (0 <= ippiThreshold_GTVal_8u_C1R(_src.data, (int)src_step, _dst.data, (int)dst_step, sz, thresh, 0)) + return; + break; + } +#endif + switch( type ) { case THRESH_BINARY: @@ -112,8 +134,8 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) for( i = 0; i < roi.height; i++ ) { - const uchar* src = (const uchar*)(_src.data + _src.step*i); - uchar* dst = (uchar*)(_dst.data + _dst.step*i); + const uchar* src = (const uchar*)(_src.data + src_step*i); + uchar* dst = (uchar*)(_dst.data + dst_step*i); switch( type ) { @@ -231,8 +253,8 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) { for( i = 0; i < roi.height; i++ ) { - const uchar* src = (const uchar*)(_src.data + _src.step*i); - uchar* dst = (uchar*)(_dst.data + _dst.step*i); + const uchar* src = (const uchar*)(_src.data + src_step*i); + uchar* dst = (uchar*)(_dst.data + dst_step*i); j = j_scalar; #if CV_ENABLE_UNROLLED for( ; j <= roi.width - 4; j += 4 ) @@ -276,6 +298,7 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) { roi.width *= roi.height; roi.height = 1; + src_step = dst_step = roi.width; } #ifdef HAVE_TEGRA_OPTIMIZATION @@ -283,6 +306,25 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) return; #endif +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + IppiSize sz = { roi.width, roi.height }; + switch( type ) + { + case THRESH_TRUNC: + if (0 <= ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh)) + return; + break; + case THRESH_TOZERO: + if (0 <= ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0)) + return; + break; + case THRESH_TOZERO_INV: + if (0 <= ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0)) + return; + break; + } +#endif + switch( type ) { case THRESH_BINARY: @@ -455,6 +497,25 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) return; #endif +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + IppiSize sz = { roi.width, roi.height }; + switch( type ) + { + case THRESH_TRUNC: + if (0 <= ippiThreshold_GT_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh)) + return; + break; + case THRESH_TOZERO: + if (0 <= ippiThreshold_LTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+FLT_EPSILON, 0)) + return; + break; + case THRESH_TOZERO_INV: + if (0 <= ippiThreshold_GTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0)) + return; + break; + } +#endif + switch( type ) { case THRESH_BINARY: diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index fcf270f8e7..f0cf560bb1 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -155,15 +155,23 @@ OCL_TEST_P(CvtColor, YCrCb2BGRA) { performTest(3, 4, CVTCODE(YCrCb2BGR)); } // RGB <-> XYZ -OCL_TEST_P(CvtColor, RGB2XYZ) { performTest(3, 3, CVTCODE(RGB2XYZ)); } -OCL_TEST_P(CvtColor, BGR2XYZ) { performTest(3, 3, CVTCODE(BGR2XYZ)); } -OCL_TEST_P(CvtColor, RGBA2XYZ) { performTest(4, 3, CVTCODE(RGB2XYZ)); } -OCL_TEST_P(CvtColor, BGRA2XYZ) { performTest(4, 3, CVTCODE(BGR2XYZ)); } +#if IPP_VERSION_X100 > 0 +#define IPP_EPS depth <= CV_32S ? 1 : 4e-5 +#else +#define IPP_EPS 0 +#endif -OCL_TEST_P(CvtColor, XYZ2RGB) { performTest(3, 3, CVTCODE(XYZ2RGB)); } -OCL_TEST_P(CvtColor, XYZ2BGR) { performTest(3, 3, CVTCODE(XYZ2BGR)); } -OCL_TEST_P(CvtColor, XYZ2RGBA) { performTest(3, 4, CVTCODE(XYZ2RGB)); } -OCL_TEST_P(CvtColor, XYZ2BGRA) { performTest(3, 4, CVTCODE(XYZ2BGR)); } +OCL_TEST_P(CvtColor, RGB2XYZ) { performTest(3, 3, CVTCODE(RGB2XYZ), IPP_EPS); } +OCL_TEST_P(CvtColor, BGR2XYZ) { performTest(3, 3, CVTCODE(BGR2XYZ), IPP_EPS); } +OCL_TEST_P(CvtColor, RGBA2XYZ) { performTest(4, 3, CVTCODE(RGB2XYZ), IPP_EPS); } +OCL_TEST_P(CvtColor, BGRA2XYZ) { performTest(4, 3, CVTCODE(BGR2XYZ), IPP_EPS); } + +OCL_TEST_P(CvtColor, XYZ2RGB) { performTest(3, 3, CVTCODE(XYZ2RGB), IPP_EPS); } +OCL_TEST_P(CvtColor, XYZ2BGR) { performTest(3, 3, CVTCODE(XYZ2BGR), IPP_EPS); } +OCL_TEST_P(CvtColor, XYZ2RGBA) { performTest(3, 4, CVTCODE(XYZ2RGB), IPP_EPS); } +OCL_TEST_P(CvtColor, XYZ2BGRA) { performTest(3, 4, CVTCODE(XYZ2BGR), IPP_EPS); } + +#undef IPP_EPS // RGB <-> HSV @@ -191,15 +199,21 @@ OCL_TEST_P(CvtColor8u32f, HSV2BGRA_FULL) { performTest(3, 4, CVTCODE(HSV2BGR_FUL // RGB <-> HLS +#if IPP_VERSION_X100 > 0 +#define IPP_EPS depth == CV_8U ? 2 : 1e-3 +#else +#define IPP_EPS depth == CV_8U ? 1 : 1e-3 +#endif + OCL_TEST_P(CvtColor8u32f, RGB2HLS) { performTest(3, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); } OCL_TEST_P(CvtColor8u32f, BGR2HLS) { performTest(3, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); } OCL_TEST_P(CvtColor8u32f, RGBA2HLS) { performTest(4, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); } OCL_TEST_P(CvtColor8u32f, BGRA2HLS) { performTest(4, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); } -OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { performTest(3, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); } -OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { performTest(3, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); } -OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { performTest(4, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); } -OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { performTest(4, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); } +OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { performTest(3, 3, CVTCODE(RGB2HLS_FULL), IPP_EPS); } +OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { performTest(3, 3, CVTCODE(BGR2HLS_FULL), IPP_EPS); } +OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { performTest(4, 3, CVTCODE(RGB2HLS_FULL), IPP_EPS); } +OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { performTest(4, 3, CVTCODE(BGR2HLS_FULL), IPP_EPS); } OCL_TEST_P(CvtColor8u32f, HLS2RGB) { performTest(3, 3, CVTCODE(HLS2RGB), 1); } OCL_TEST_P(CvtColor8u32f, HLS2BGR) { performTest(3, 3, CVTCODE(HLS2BGR), 1); } @@ -211,6 +225,8 @@ OCL_TEST_P(CvtColor8u32f, HLS2BGR_FULL) { performTest(3, 3, CVTCODE(HLS2BGR_FULL OCL_TEST_P(CvtColor8u32f, HLS2RGBA_FULL) { performTest(3, 4, CVTCODE(HLS2RGB_FULL), 1); } OCL_TEST_P(CvtColor8u32f, HLS2BGRA_FULL) { performTest(3, 4, CVTCODE(HLS2BGR_FULL), 1); } +#undef IPP_EPS + // RGB5x5 <-> RGB typedef CvtColor CvtColor8u; diff --git a/modules/imgproc/test/test_precomp.hpp b/modules/imgproc/test/test_precomp.hpp index 9650b7fcc8..53f315ee4f 100644 --- a/modules/imgproc/test/test_precomp.hpp +++ b/modules/imgproc/test/test_precomp.hpp @@ -11,6 +11,7 @@ #include #include "opencv2/ts.hpp" +#include "opencv2/core/private.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/highgui.hpp" diff --git a/modules/legacy/test/test_stereomatching.cpp b/modules/legacy/test/test_stereomatching.cpp index 7262249844..95aa97bbe4 100644 --- a/modules/legacy/test/test_stereomatching.cpp +++ b/modules/legacy/test/test_stereomatching.cpp @@ -278,7 +278,7 @@ float dispRMS( const Mat& computedDisp, const Mat& groundTruthDisp, const Mat& m checkTypeAndSizeOfMask( mask, sz ); pointsCount = countNonZero(mask); } - return 1.f/sqrt((float)pointsCount) * (float)norm(computedDisp, groundTruthDisp, NORM_L2, mask); + return 1.f/sqrt((float)pointsCount) * (float)cvtest::norm(computedDisp, groundTruthDisp, NORM_L2, mask); } /* diff --git a/modules/optim/test/test_denoise_tvl1.cpp b/modules/optim/test/test_denoise_tvl1.cpp index 9334dc5c5b..76ec2cda3e 100644 --- a/modules/optim/test/test_denoise_tvl1.cpp +++ b/modules/optim/test/test_denoise_tvl1.cpp @@ -41,7 +41,8 @@ #include "test_precomp.hpp" #include "opencv2/highgui.hpp" -void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_salt_ratio,cv::RNG& rng){ +void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_salt_ratio,cv::RNG& rng) +{ noisy.create(img.size(), img.type()); cv::Mat noise(img.size(), img.type()), mask(img.size(), CV_8U); rng.fill(noise,cv::RNG::NORMAL,128.0,sigma); @@ -54,34 +55,36 @@ void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_ noise.setTo(128, mask); cv::addWeighted(noisy, 1, noise, 1, -128, noisy); } -void make_spotty(cv::Mat& img,cv::RNG& rng, int r=3,int n=1000){ - for(int i=0;i(x,y),val,(val==image.at(x,y))?"true":"false"); return (image.at(x,y)==val); } -TEST(Optim_denoise_tvl1, regression_basic){ +TEST(Optim_denoise_tvl1, regression_basic) +{ cv::RNG rng(42); - cv::Mat img = cv::imread("lena.jpg", 0), noisy,res; - if(img.rows!=512 || img.cols!=512){ - printf("\tplease, put lena.jpg from samples/c in the current folder\n"); - printf("\tnow, the test will fail...\n"); - ASSERT_TRUE(false); - } + cv::Mat img = cv::imread(cvtest::TS::ptr()->get_data_path() + "shared/lena.png", 0), noisy, res; + + ASSERT_FALSE(img.empty()) << "Error: can't open 'lena.png'"; const int obs_num=5; - std::vector images(obs_num,cv::Mat()); - for(int i=0;i<(int)images.size();i++){ + std::vector images(obs_num, cv::Mat()); + for(int i=0;i<(int)images.size();i++) + { make_noisy(img,images[i], 20, 0.02,rng); //make_spotty(images[i],rng); } diff --git a/modules/photo/test/test_denoising.cpp b/modules/photo/test/test_denoising.cpp index ca4f63f222..9808e9cddc 100644 --- a/modules/photo/test/test_denoising.cpp +++ b/modules/photo/test/test_denoising.cpp @@ -73,7 +73,7 @@ TEST(Photo_DenoisingGrayscale, regression) DUMP(result, expected_path + ".res.png"); - ASSERT_EQ(0, norm(result != expected)); + ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2)); } TEST(Photo_DenoisingColored, regression) @@ -93,7 +93,7 @@ TEST(Photo_DenoisingColored, regression) DUMP(result, expected_path + ".res.png"); - ASSERT_EQ(0, norm(result != expected)); + ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2)); } TEST(Photo_DenoisingGrayscaleMulti, regression) @@ -118,7 +118,7 @@ TEST(Photo_DenoisingGrayscaleMulti, regression) DUMP(result, expected_path + ".res.png"); - ASSERT_EQ(0, norm(result != expected)); + ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2)); } TEST(Photo_DenoisingColoredMulti, regression) @@ -143,7 +143,7 @@ TEST(Photo_DenoisingColoredMulti, regression) DUMP(result, expected_path + ".res.png"); - ASSERT_EQ(0, norm(result != expected)); + ASSERT_EQ(0, cvtest::norm(result, expected, NORM_L2)); } TEST(Photo_White, issue_2646) diff --git a/modules/photo/test/test_inpaint.cpp b/modules/photo/test/test_inpaint.cpp index 3c341b27a0..8f031e8d38 100644 --- a/modules/photo/test/test_inpaint.cpp +++ b/modules/photo/test/test_inpaint.cpp @@ -91,8 +91,8 @@ void CV_InpaintTest::run( int ) absdiff( orig, res1, diff1 ); absdiff( orig, res2, diff2 ); - double n1 = norm(diff1.reshape(1), NORM_INF, inv_mask.reshape(1)); - double n2 = norm(diff2.reshape(1), NORM_INF, inv_mask.reshape(1)); + double n1 = cvtest::norm(diff1.reshape(1), NORM_INF, inv_mask.reshape(1)); + double n2 = cvtest::norm(diff2.reshape(1), NORM_INF, inv_mask.reshape(1)); if (n1 != 0 || n2 != 0) { @@ -103,8 +103,8 @@ void CV_InpaintTest::run( int ) absdiff( exp1, res1, diff1 ); absdiff( exp2, res2, diff2 ); - n1 = norm(diff1.reshape(1), NORM_INF, mask.reshape(1)); - n2 = norm(diff2.reshape(1), NORM_INF, mask.reshape(1)); + n1 = cvtest::norm(diff1.reshape(1), NORM_INF, mask.reshape(1)); + n2 = cvtest::norm(diff2.reshape(1), NORM_INF, mask.reshape(1)); const int jpeg_thres = 3; if (n1 > jpeg_thres || n2 > jpeg_thres) diff --git a/modules/stitching/test/test_blenders.cpp b/modules/stitching/test/test_blenders.cpp index 6702eabf0c..cb84482f21 100644 --- a/modules/stitching/test/test_blenders.cpp +++ b/modules/stitching/test/test_blenders.cpp @@ -73,6 +73,6 @@ TEST(MultiBandBlender, CanBlendTwoImages) Mat result; result_s.convertTo(result, CV_8U); Mat expected = imread(string(cvtest::TS::ptr()->get_data_path()) + "stitching/baboon_lena.png"); - double rmsErr = norm(expected, result, NORM_L2) / sqrt(double(expected.size().area())); + double rmsErr = cvtest::norm(expected, result, NORM_L2) / sqrt(double(expected.size().area())); ASSERT_LT(rmsErr, 1e-3); } diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index 457f00b3e2..8aeec65712 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -129,6 +129,7 @@ CV_EXPORTS void minMaxLoc(const Mat& src, double* minval, double* maxval, CV_EXPORTS double norm(InputArray src, int normType, InputArray mask=noArray()); CV_EXPORTS double norm(InputArray src1, InputArray src2, int normType, InputArray mask=noArray()); CV_EXPORTS Scalar mean(const Mat& src, const Mat& mask=Mat()); +CV_EXPORTS double PSNR(InputArray src1, InputArray src2); CV_EXPORTS bool cmpUlps(const Mat& data, const Mat& refdata, int expMaxDiff, double* realMaxDiff, vector* idx); diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index e3563caa4f..2042f5cf0c 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -1399,6 +1399,12 @@ double norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask) return isRelative ? result / (cvtest::norm(src2, normType) + DBL_EPSILON) : result; } +double PSNR(InputArray _src1, InputArray _src2) +{ + CV_Assert( _src1.depth() == CV_8U ); + double diff = std::sqrt(cvtest::norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels())); + return 20*log10(255./(diff+DBL_EPSILON)); +} template static double crossCorr_(const _Tp* src1, const _Tp* src2, size_t total) diff --git a/modules/video/src/motempl.cpp b/modules/video/src/motempl.cpp index 3fc87e657e..152706b9fe 100644 --- a/modules/video/src/motempl.cpp +++ b/modules/video/src/motempl.cpp @@ -80,13 +80,27 @@ void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi, Mat silh = _silhouette.getMat(), mhi = _mhi.getMat(); Size size = silh.size(); +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + int silhstep = (int)silh.step, mhistep = (int)mhi.step; +#endif if( silh.isContinuous() && mhi.isContinuous() ) { size.width *= size.height; size.height = 1; +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + silhstep = (int)silh.total(); + mhistep = (int)mhi.total() * sizeof(Ipp32f); +#endif } +#if defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) + IppStatus status = ippiUpdateMotionHistory_8u32f_C1IR((const Ipp8u *)silh.data, silhstep, (Ipp32f *)mhi.data, mhistep, + ippiSize(size.width, size.height), (Ipp32f)timestamp, (Ipp32f)duration); + if (status >= 0) + return; +#endif + #if CV_SSE2 volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2); #endif diff --git a/modules/video/test/test_estimaterigid.cpp b/modules/video/test/test_estimaterigid.cpp index 5259ce7ee2..50508b4abb 100644 --- a/modules/video/test/test_estimaterigid.cpp +++ b/modules/video/test/test_estimaterigid.cpp @@ -109,8 +109,8 @@ bool CV_RigidTransform_Test::testNPoints(int from) Mat aff_est = estimateRigidTransform(fpts, tpts, true); - double thres = 0.1*norm(aff); - double d = norm(aff_est, aff, NORM_L2); + double thres = 0.1*cvtest::norm(aff, NORM_L2); + double d = cvtest::norm(aff_est, aff, NORM_L2); if (d > thres) { double dB=0, nB=0; @@ -120,7 +120,7 @@ bool CV_RigidTransform_Test::testNPoints(int from) Mat B = A - repeat(A.row(0), 3, 1), Bt = B.t(); B = Bt*B; dB = cv::determinant(B); - nB = norm(B); + nB = cvtest::norm(B, NORM_L2); if( fabs(dB) < 0.01*nB ) continue; } @@ -154,11 +154,11 @@ bool CV_RigidTransform_Test::testImage() Mat aff_est = estimateRigidTransform(img, rotated, true); const double thres = 0.033; - if (norm(aff_est, aff, NORM_INF) > thres) + if (cvtest::norm(aff_est, aff, NORM_INF) > thres) { ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); ts->printf( cvtest::TS::LOG, "Threshold = %f, norm of difference = %f", thres, - norm(aff_est, aff, NORM_INF) ); + cvtest::norm(aff_est, aff, NORM_INF) ); return false; }