mirror of
https://github.com/opencv/opencv.git
synced 2024-11-27 12:40:05 +08:00
ICV2017u3 package update;
- Optimizations set change. Now IPP integrations will provide code for SSE42, AVX2 and AVX512 (SKX) CPUs only. For HW below SSE42 IPP code is disabled. - Performance regressions fixes for IPP code paths; - cv::boxFilter integration improvement; - cv::filter2D integration improvement;
This commit is contained in:
parent
87c27a074d
commit
a57718e1ac
7
3rdparty/ippicv/CMakeLists.txt
vendored
7
3rdparty/ippicv/CMakeLists.txt
vendored
@ -6,7 +6,7 @@ project(${IPP_IW_LIBRARY})
|
||||
|
||||
ocv_include_directories(${IPP_INCLUDE_DIRS} ${IPP_IW_PATH}/include)
|
||||
add_definitions(-DIW_BUILD)
|
||||
if(HAVE_IPP_ICV_ONLY)
|
||||
if(HAVE_IPP_ICV)
|
||||
add_definitions(-DICV_BASE)
|
||||
endif()
|
||||
|
||||
@ -21,7 +21,10 @@ add_library(${IPP_IW_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
|
||||
|
||||
if(UNIX)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function -Wno-missing-braces -Wno-missing-field-initializers")
|
||||
endif()
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-self-assign")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
26
3rdparty/ippicv/ippicv.cmake
vendored
26
3rdparty/ippicv/ippicv.cmake
vendored
@ -2,37 +2,37 @@ function(download_ippicv root_var)
|
||||
set(${root_var} "" PARENT_SCOPE)
|
||||
|
||||
# Commit SHA in the opencv_3rdparty repo
|
||||
set(IPPICV_COMMIT "a62e20676a60ee0ad6581e217fe7e4bada3b95db")
|
||||
set(IPPICV_COMMIT "dfe3162c237af211e98b8960018b564bc209261d")
|
||||
# Define actual ICV versions
|
||||
if(APPLE)
|
||||
set(OPENCV_ICV_PLATFORM "macosx")
|
||||
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac")
|
||||
if(X86_64)
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u2_mac_intel64_20170418.tgz")
|
||||
set(OPENCV_ICV_HASH "0c25953c99dbb499ff502485a9356d8d")
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u3_mac_intel64_general_20170822.tgz")
|
||||
set(OPENCV_ICV_HASH "c1ebb5dfa5b7f54b0c44e1917805a463")
|
||||
else()
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u2_mac_ia32_20170418.tgz")
|
||||
set(OPENCV_ICV_HASH "5f225948f3f64067c681293c098d50d8")
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u3_mac_ia32_general_20170822.tgz")
|
||||
set(OPENCV_ICV_HASH "49b05a669042753ae75895a445ebd612")
|
||||
endif()
|
||||
elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86"))
|
||||
set(OPENCV_ICV_PLATFORM "linux")
|
||||
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx")
|
||||
if(X86_64)
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_intel64_20170418.tgz")
|
||||
set(OPENCV_ICV_HASH "87cbdeb627415d8e4bc811156289fa3a")
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_intel64_general_20170822.tgz")
|
||||
set(OPENCV_ICV_HASH "4e0352ce96473837b1d671ce87f17359")
|
||||
else()
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170418.tgz")
|
||||
set(OPENCV_ICV_HASH "f2cece00d802d4dea86df52ed095257e")
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_ia32_general_20170822.tgz")
|
||||
set(OPENCV_ICV_HASH "dcdb0ba4b123f240596db1840cd59a76")
|
||||
endif()
|
||||
elseif(WIN32 AND NOT ARM)
|
||||
set(OPENCV_ICV_PLATFORM "windows")
|
||||
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win")
|
||||
if(X86_64)
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u2_win_intel64_20170418.zip")
|
||||
set(OPENCV_ICV_HASH "75060a0c662c0800f48995b7e9b085f6")
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u3_win_intel64_general_20170822.zip")
|
||||
set(OPENCV_ICV_HASH "0421e642bc7ad741a2236d3ec4190bdd")
|
||||
else()
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u2_win_ia32_20170418.zip")
|
||||
set(OPENCV_ICV_HASH "60fcf3ccd9a2ebc9e432ffb5cb91638b")
|
||||
set(OPENCV_ICV_NAME "ippicv_2017u3_win_ia32_general_20170822.zip")
|
||||
set(OPENCV_ICV_HASH "8a7680ae352c192de2e2e34936164bd0")
|
||||
endif()
|
||||
else()
|
||||
return()
|
||||
|
@ -255,7 +255,6 @@ OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON
|
||||
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
|
||||
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) )
|
||||
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) )
|
||||
OCV_OPTION(WITH_IPP_A "Include Intel IPP_A support" OFF IF (MSVC OR X86 OR X86_64) )
|
||||
OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT))
|
||||
OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) )
|
||||
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
|
||||
@ -1311,15 +1310,16 @@ status(" Other third-party libraries:")
|
||||
if(WITH_IPP AND HAVE_IPP)
|
||||
status(" Use Intel IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]")
|
||||
status(" at:" "${IPP_ROOT_DIR}")
|
||||
if(NOT HAVE_IPP_ICV_ONLY)
|
||||
if(NOT HAVE_IPP_ICV)
|
||||
status(" linked:" BUILD_WITH_DYNAMIC_IPP THEN "dynamic" ELSE "static")
|
||||
endif()
|
||||
if(HAVE_IPP_IW)
|
||||
if(BUILD_IPP_IW)
|
||||
status(" Use Intel IPP IW:" "build (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
|
||||
status(" Use Intel IPP IW:" "sources (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
|
||||
else()
|
||||
status(" Use Intel IPP IW:" "prebuilt binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
|
||||
status(" Use Intel IPP IW:" "binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
|
||||
endif()
|
||||
status(" at:" "${IPP_IW_PATH}")
|
||||
else()
|
||||
status(" Use Intel IPP IW:" NO)
|
||||
endif()
|
||||
@ -1328,10 +1328,6 @@ else()
|
||||
status(" Use Intel IPP IW:" WITH_IPP AND NOT HAVE_IPP AND HAVE_IPP_IW THEN "IPP not found or implicitly disabled" ELSE NO)
|
||||
endif()
|
||||
|
||||
if(DEFINED WITH_IPP_A)
|
||||
status(" Use Intel IPP Async:" HAVE_IPP_A THEN "YES" ELSE NO)
|
||||
endif(DEFINED WITH_IPP_A)
|
||||
|
||||
if(DEFINED WITH_VA)
|
||||
status(" Use VA:" HAVE_VA THEN "YES" ELSE NO)
|
||||
endif(DEFINED WITH_VA)
|
||||
|
@ -11,13 +11,13 @@
|
||||
#
|
||||
# On return this will define:
|
||||
#
|
||||
# HAVE_IPP - True if Intel IPP found
|
||||
# HAVE_IPP_ICV_ONLY - True if Intel IPP ICV version is available
|
||||
# IPP_ROOT_DIR - root of IPP installation
|
||||
# IPP_INCLUDE_DIRS - IPP include folder
|
||||
# IPP_LIBRARIES - IPP libraries that are used by OpenCV
|
||||
# IPP_VERSION_STR - string with the newest detected IPP version
|
||||
# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
|
||||
# HAVE_IPP - True if Intel IPP found
|
||||
# HAVE_IPP_ICV - True if Intel IPP ICV version is available
|
||||
# IPP_ROOT_DIR - root of IPP installation
|
||||
# IPP_INCLUDE_DIRS - IPP include folder
|
||||
# IPP_LIBRARIES - IPP libraries that are used by OpenCV
|
||||
# IPP_VERSION_STR - string with the newest detected IPP version
|
||||
# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
|
||||
# IPP_VERSION_MINOR
|
||||
# IPP_VERSION_BUILD
|
||||
#
|
||||
@ -25,7 +25,7 @@
|
||||
#
|
||||
|
||||
unset(HAVE_IPP CACHE)
|
||||
unset(HAVE_IPP_ICV_ONLY)
|
||||
unset(HAVE_IPP_ICV)
|
||||
unset(IPP_ROOT_DIR)
|
||||
unset(IPP_INCLUDE_DIRS)
|
||||
unset(IPP_LIBRARIES)
|
||||
@ -79,7 +79,7 @@ endmacro()
|
||||
macro(_ipp_not_supported)
|
||||
message(STATUS ${ARGN})
|
||||
unset(HAVE_IPP)
|
||||
unset(HAVE_IPP_ICV_ONLY)
|
||||
unset(HAVE_IPP_ICV)
|
||||
unset(IPP_VERSION_STR)
|
||||
return()
|
||||
endmacro()
|
||||
@ -92,7 +92,7 @@ macro(ipp_detect_version)
|
||||
set(__msg)
|
||||
if(EXISTS ${IPP_ROOT_DIR}/include/ippicv_redefs.h)
|
||||
set(__msg " (ICV version)")
|
||||
set(HAVE_IPP_ICV_ONLY 1)
|
||||
set(HAVE_IPP_ICV 1)
|
||||
elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h)
|
||||
# nothing
|
||||
else()
|
||||
@ -118,7 +118,7 @@ macro(ipp_detect_version)
|
||||
set(IPP_LIBRARY_DIR ${DIR})
|
||||
endmacro()
|
||||
|
||||
if(APPLE AND NOT HAVE_IPP_ICV_ONLY)
|
||||
if(APPLE AND NOT HAVE_IPP_ICV)
|
||||
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib)
|
||||
elseif(IPP_X64)
|
||||
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64)
|
||||
@ -128,7 +128,7 @@ macro(ipp_detect_version)
|
||||
|
||||
macro(_ipp_add_library name)
|
||||
# dynamic linking is only supported for standalone version of Intel IPP
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
|
||||
if (WIN32)
|
||||
set(IPP_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX})
|
||||
set(IPP_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
|
||||
@ -141,7 +141,7 @@ macro(ipp_detect_version)
|
||||
set(IPP_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
endif ()
|
||||
if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
|
||||
# When using dynamic libraries from standalone Intel IPP it is your responsibility to install those on the target system
|
||||
list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
|
||||
else ()
|
||||
@ -167,14 +167,14 @@ macro(ipp_detect_version)
|
||||
|
||||
set(IPP_PREFIX "ipp")
|
||||
if(${IPP_VERSION_STR} VERSION_LESS "8.0")
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
|
||||
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 7.x
|
||||
else ()
|
||||
set(IPP_SUFFIX "_l") # static not threaded libs suffix Intel IPP 7.x
|
||||
endif ()
|
||||
else ()
|
||||
if(WIN32)
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
|
||||
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
|
||||
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 8.x for Windows
|
||||
else ()
|
||||
set(IPP_SUFFIX "mt") # static not threaded libs suffix Intel IPP 8.x for Windows
|
||||
@ -184,7 +184,7 @@ macro(ipp_detect_version)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(HAVE_IPP_ICV_ONLY)
|
||||
if(HAVE_IPP_ICV)
|
||||
_ipp_add_library(icv)
|
||||
else()
|
||||
_ipp_add_library(cv)
|
||||
|
@ -27,6 +27,7 @@ macro(ippiw_debugmsg MESSAGE)
|
||||
message(STATUS "${MESSAGE}")
|
||||
endif()
|
||||
endmacro()
|
||||
file(TO_CMAKE_PATH "${IPPROOT}" IPPROOT)
|
||||
|
||||
# This function detects Intel IPP IW version by analyzing .h file
|
||||
macro(ippiw_setup PATH BUILD)
|
||||
@ -153,7 +154,7 @@ ippiw_setup("${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" 0)
|
||||
|
||||
|
||||
# take Intel IPP IW from ICV package
|
||||
if(NOT HAVE_IPP_ICV_ONLY AND BUILD_IPP_IW)
|
||||
if(NOT HAVE_IPP_ICV AND BUILD_IPP_IW)
|
||||
message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package")
|
||||
set(TEMP_ROOT 0)
|
||||
include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake")
|
||||
|
@ -103,7 +103,7 @@
|
||||
|
||||
/* Intel Integrated Performance Primitives */
|
||||
#cmakedefine HAVE_IPP
|
||||
#cmakedefine HAVE_IPP_ICV_ONLY
|
||||
#cmakedefine HAVE_IPP_ICV
|
||||
#cmakedefine HAVE_IPP_IW
|
||||
|
||||
/* Intel IPP Async */
|
||||
|
@ -693,8 +693,14 @@ CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, c
|
||||
int line = 0);
|
||||
CV_EXPORTS int getIppStatus();
|
||||
CV_EXPORTS String getIppErrorLocation();
|
||||
CV_EXPORTS_W bool useIPP();
|
||||
CV_EXPORTS_W void setUseIPP(bool flag);
|
||||
CV_EXPORTS_W bool useIPP();
|
||||
CV_EXPORTS_W void setUseIPP(bool flag);
|
||||
CV_EXPORTS_W String getIppVersion();
|
||||
|
||||
// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
|
||||
// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
|
||||
CV_EXPORTS_W bool useIPP_NE();
|
||||
CV_EXPORTS_W void setUseIPP_NE(bool flag);
|
||||
|
||||
} // ipp
|
||||
|
||||
|
@ -194,8 +194,6 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
|
||||
#define IPP_DISABLE_WARPAFFINE 1 // Different results
|
||||
#define IPP_DISABLE_WARPPERSPECTIVE 1 // Different results
|
||||
#define IPP_DISABLE_REMAP 1 // Different results
|
||||
#define IPP_DISABLE_MORPH_ADV 1 // mask flipping in IPP
|
||||
#define IPP_DISABLE_SORT_IDX 0 // different order in index tables
|
||||
#define IPP_DISABLE_YUV_RGB 1 // accuracy difference
|
||||
#define IPP_DISABLE_RGB_YUV 1 // breaks OCL accuracy tests
|
||||
#define IPP_DISABLE_RGB_HSV 1 // breaks OCL accuracy tests
|
||||
@ -205,21 +203,12 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
|
||||
#define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference
|
||||
#define IPP_DISABLE_HAAR 1 // improper integration/results
|
||||
#define IPP_DISABLE_HOUGH 1 // improper integration/results
|
||||
#define IPP_DISABLE_RESIZE_8U 1 // Incompatible accuracy
|
||||
#define IPP_DISABLE_RESIZE_NEAREST 1 // Accuracy mismatch (max diff 1)
|
||||
#define IPP_DISABLE_RESIZE_AREA 1 // Accuracy mismatch (max diff 1)
|
||||
|
||||
#define IPP_DISABLE_MINMAX_NAN_SSE42 1 // cv::minMaxIdx problem with NaN input
|
||||
|
||||
// Temporary disabled named IPP region. Performance
|
||||
#define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations
|
||||
#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653)
|
||||
#define IPP_DISABLE_PERF_TRUE_DIST_MT 1 // cv::distanceTransform OpenCV MT performance is better
|
||||
#define IPP_DISABLE_PERF_CANNY_MT 1 // cv::Canny OpenCV MT performance is better
|
||||
#define IPP_DISABLE_PERF_HISTU32F_SSE42 1 // cv::calcHist optimizations problem
|
||||
#define IPP_DISABLE_PERF_MORPH_SSE42 1 // cv::erode, cv::dilate optimizations problem
|
||||
#define IPP_DISABLE_PERF_MAG_SSE42 1 // cv::magnitude optimizations problem
|
||||
#define IPP_DISABLE_PERF_BOX16S_SSE42 1 // cv::boxFilter optimizations problem
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
#include "ippversion.h"
|
||||
@ -229,7 +218,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
|
||||
|
||||
#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE)
|
||||
|
||||
#ifdef HAVE_IPP_ICV_ONLY
|
||||
#ifdef HAVE_IPP_ICV
|
||||
#define ICV_BASE
|
||||
#if IPP_VERSION_X100 >= 201700
|
||||
#include "ippicv.h"
|
||||
@ -241,6 +230,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
|
||||
#endif
|
||||
#ifdef HAVE_IPP_IW
|
||||
#include "iw++/iw.hpp"
|
||||
#include "iw/iw_ll.h"
|
||||
#endif
|
||||
|
||||
#if IPP_VERSION_X100 >= 201700
|
||||
@ -251,6 +241,17 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
|
||||
|
||||
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
|
||||
|
||||
#define ippCPUID_AVX512_SKX (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ)
|
||||
#define ippCPUID_AVX512_KNL (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER)
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ipp
|
||||
{
|
||||
CV_EXPORTS unsigned long long getIppTopFeatures(); // Returns top major enabled IPP feature flag
|
||||
}
|
||||
}
|
||||
|
||||
static inline IppiSize ippiSize(size_t width, size_t height)
|
||||
{
|
||||
IppiSize size = { (int)width, (int)height };
|
||||
@ -322,7 +323,43 @@ static inline IppDataType ippiGetDataType(int depth)
|
||||
(IppDataType)-1;
|
||||
}
|
||||
|
||||
static inline int ippiSuggestThreadsNum(size_t width, size_t height, size_t elemSize, double multiplier)
|
||||
{
|
||||
int threads = cv::getNumThreads();
|
||||
if(threads > 1 && height >= 64)
|
||||
{
|
||||
size_t opMemory = (int)(width*height*elemSize*multiplier);
|
||||
int l2cache = 0;
|
||||
#if IPP_VERSION_X100 >= 201700
|
||||
ippGetL2CacheSize(&l2cache);
|
||||
#endif
|
||||
if(!l2cache)
|
||||
l2cache = 1 << 18;
|
||||
|
||||
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
|
||||
{
|
||||
return ippiSuggestThreadsNum(image.cols, image.rows, image.elemSize(), multiplier);
|
||||
}
|
||||
|
||||
#ifdef HAVE_IPP_IW
|
||||
static inline bool ippiCheckAnchor(int x, int y, int kernelWidth, int kernelHeight)
|
||||
{
|
||||
if(x != ((kernelWidth-1)/2) || y != ((kernelHeight-1)/2))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline ::ipp::IwiSize ippiGetSize(const cv::Size & size)
|
||||
{
|
||||
return ::ipp::IwiSize((IwSize)size.width, (IwSize)size.height);
|
||||
}
|
||||
|
||||
static inline IwiDerivativeType ippiGetDerivType(int dx, int dy, bool nvert)
|
||||
{
|
||||
return (dx == 1 && dy == 0) ? ((nvert)?iwiDerivNVerFirst:iwiDerivVerFirst) :
|
||||
@ -341,10 +378,10 @@ static inline void ippiGetImage(const cv::Mat &src, ::ipp::IwiImage &dst)
|
||||
cv::Point offset;
|
||||
src.locateROI(origSize, offset);
|
||||
|
||||
inMemBorder.borderLeft = (Ipp32u)offset.x;
|
||||
inMemBorder.borderTop = (Ipp32u)offset.y;
|
||||
inMemBorder.borderRight = (Ipp32u)(origSize.width - src.cols - offset.x);
|
||||
inMemBorder.borderBottom = (Ipp32u)(origSize.height - src.rows - offset.y);
|
||||
inMemBorder.left = (IwSize)offset.x;
|
||||
inMemBorder.top = (IwSize)offset.y;
|
||||
inMemBorder.right = (IwSize)(origSize.width - src.cols - offset.x);
|
||||
inMemBorder.bottom = (IwSize)(origSize.height - src.rows - offset.y);
|
||||
}
|
||||
|
||||
dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step);
|
||||
@ -357,7 +394,7 @@ static inline ::ipp::IwiImage ippiGetImage(const cv::Mat &src)
|
||||
return image;
|
||||
}
|
||||
|
||||
static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, IppiBorderSize &borderSize)
|
||||
static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, ipp::IwiBorderSize &borderSize)
|
||||
{
|
||||
int inMemFlags = 0;
|
||||
IppiBorderType border = ippiGetBorderType(ocvBorderType & ~cv::BORDER_ISOLATED);
|
||||
@ -366,91 +403,60 @@ static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorder
|
||||
|
||||
if(!(ocvBorderType & cv::BORDER_ISOLATED))
|
||||
{
|
||||
if(image.m_inMemSize.borderLeft)
|
||||
if(image.m_inMemSize.left)
|
||||
{
|
||||
if(image.m_inMemSize.borderLeft >= borderSize.borderLeft)
|
||||
if(image.m_inMemSize.left >= borderSize.left)
|
||||
inMemFlags |= ippBorderInMemLeft;
|
||||
else
|
||||
return (IppiBorderType)0;
|
||||
}
|
||||
else
|
||||
borderSize.borderLeft = 0;
|
||||
if(image.m_inMemSize.borderTop)
|
||||
borderSize.left = 0;
|
||||
if(image.m_inMemSize.top)
|
||||
{
|
||||
if(image.m_inMemSize.borderTop >= borderSize.borderTop)
|
||||
if(image.m_inMemSize.top >= borderSize.top)
|
||||
inMemFlags |= ippBorderInMemTop;
|
||||
else
|
||||
return (IppiBorderType)0;
|
||||
}
|
||||
else
|
||||
borderSize.borderTop = 0;
|
||||
if(image.m_inMemSize.borderRight)
|
||||
borderSize.top = 0;
|
||||
if(image.m_inMemSize.right)
|
||||
{
|
||||
if(image.m_inMemSize.borderRight >= borderSize.borderRight)
|
||||
if(image.m_inMemSize.right >= borderSize.right)
|
||||
inMemFlags |= ippBorderInMemRight;
|
||||
else
|
||||
return (IppiBorderType)0;
|
||||
}
|
||||
else
|
||||
borderSize.borderRight = 0;
|
||||
if(image.m_inMemSize.borderBottom)
|
||||
borderSize.right = 0;
|
||||
if(image.m_inMemSize.bottom)
|
||||
{
|
||||
if(image.m_inMemSize.borderBottom >= borderSize.borderBottom)
|
||||
if(image.m_inMemSize.bottom >= borderSize.bottom)
|
||||
inMemFlags |= ippBorderInMemBottom;
|
||||
else
|
||||
return (IppiBorderType)0;
|
||||
}
|
||||
else
|
||||
borderSize.borderBottom = 0;
|
||||
borderSize.bottom = 0;
|
||||
}
|
||||
else
|
||||
borderSize.borderLeft = borderSize.borderRight = borderSize.borderTop = borderSize.borderBottom = 0;
|
||||
borderSize.left = borderSize.right = borderSize.top = borderSize.bottom = 0;
|
||||
|
||||
return (IppiBorderType)(border|inMemFlags);
|
||||
}
|
||||
|
||||
static inline ::ipp::IwValue ippiGetValue(const cv::Scalar &scalar)
|
||||
static inline ::ipp::IwValueFloat ippiGetValue(const cv::Scalar &scalar)
|
||||
{
|
||||
return ::ipp::IwValue(scalar[0], scalar[1], scalar[2], scalar[3]);
|
||||
return ::ipp::IwValueFloat(scalar[0], scalar[1], scalar[2], scalar[3]);
|
||||
}
|
||||
|
||||
static inline int ippiSuggestThreadsNum(const ::ipp::IwiImage &image, double multiplier)
|
||||
{
|
||||
int threads = cv::getNumThreads();
|
||||
if(image.m_size.height > threads)
|
||||
{
|
||||
size_t opMemory = (int)(image.m_step*image.m_size.height*multiplier);
|
||||
int l2cache = 0;
|
||||
#if IPP_VERSION_X100 >= 201700
|
||||
ippGetL2CacheSize(&l2cache);
|
||||
#endif
|
||||
if(!l2cache)
|
||||
l2cache = 1 << 18;
|
||||
|
||||
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
|
||||
}
|
||||
return 1;
|
||||
return ippiSuggestThreadsNum(image.m_size.width, image.m_size.height, image.m_typeSize*image.m_channels, multiplier);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
|
||||
{
|
||||
int threads = cv::getNumThreads();
|
||||
if(image.rows > threads)
|
||||
{
|
||||
size_t opMemory = (int)(image.total()*multiplier);
|
||||
int l2cache = 0;
|
||||
#if IPP_VERSION_X100 >= 201700
|
||||
ippGetL2CacheSize(&l2cache);
|
||||
#endif
|
||||
if(!l2cache)
|
||||
l2cache = 1 << 18;
|
||||
|
||||
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
// IPP temporary buffer helper
|
||||
template<typename T>
|
||||
class IppAutoBuffer
|
||||
|
@ -86,12 +86,6 @@ static MergeFunc getMergeFunc(int depth)
|
||||
}
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
#ifdef HAVE_IPP_IW
|
||||
extern "C" {
|
||||
IW_DECL(IppStatus) llwiCopySplit(const void *pSrc, int srcStep, void* const pDstOrig[], int dstStep,
|
||||
IppiSize size, int typeSize, int channels);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
static bool ipp_split(const Mat& src, Mat* mv, int channels)
|
||||
@ -114,7 +108,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels)
|
||||
return false;
|
||||
}
|
||||
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels) >= 0;
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels, 0) >= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -132,7 +126,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels)
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
{
|
||||
if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels) < 0)
|
||||
if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels, 0) < 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -273,12 +267,6 @@ void cv::split(InputArray _m, OutputArrayOfArrays _mv)
|
||||
}
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
#ifdef HAVE_IPP_IW
|
||||
extern "C" {
|
||||
IW_DECL(IppStatus) llwiCopyMerge(const void* const pSrc[], int srcStep, void *pDst, int dstStep,
|
||||
IppiSize size, int typeSize, int channels);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
|
||||
@ -301,7 +289,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
|
||||
return false;
|
||||
}
|
||||
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels) >= 0;
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels, 0) >= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -319,7 +307,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
{
|
||||
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels) < 0)
|
||||
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels, 0) < 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -820,16 +808,10 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
|
||||
}
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
#ifdef HAVE_IPP_IW
|
||||
extern "C" {
|
||||
IW_DECL(IppStatus) llwiCopyMixed(const void *pSrc, int srcStep, int srcChannels, void *pDst, int dstStep, int dstChannels,
|
||||
IppiSize size, int typeSize, int channelsShift);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
|
||||
static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel)
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
@ -840,14 +822,11 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
|
||||
if(src.dims != dst.dims)
|
||||
return false;
|
||||
|
||||
if(srcChannels == dstChannels || (srcChannels != 1 && dstChannels != 1))
|
||||
return false;
|
||||
|
||||
if(src.dims <= 2)
|
||||
{
|
||||
IppiSize size = ippiSize(src.size());
|
||||
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, src.ptr(), (int)src.step, srcChannels, dst.ptr(), (int)dst.step, dstChannels, size, (int)src.elemSize1(), channel) >= 0;
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, channel, dst.ptr(), (int)dst.step, dstChannels, 0, size, (int)src.elemSize1()) >= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -859,7 +838,45 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
{
|
||||
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, ptrs[0], 0, srcChannels, ptrs[1], 0, dstChannels, size, (int)src.elemSize1(), channel) < 0)
|
||||
if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, channel, ptrs[1], 0, dstChannels, 0, size, (int)src.elemSize1()) < 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(channel);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool ipp_insertChannel(const Mat &src, Mat &dst, int channel)
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
int srcChannels = src.channels();
|
||||
int dstChannels = dst.channels();
|
||||
|
||||
if(src.dims != dst.dims)
|
||||
return false;
|
||||
|
||||
if(src.dims <= 2)
|
||||
{
|
||||
IppiSize size = ippiSize(src.size());
|
||||
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, 0, dst.ptr(), (int)dst.step, dstChannels, channel, size, (int)src.elemSize1()) >= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat *arrays[] = {&dst, NULL};
|
||||
uchar *ptrs[2] = {NULL};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
|
||||
IppiSize size = {(int)it.size, 1};
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
{
|
||||
if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, 0, ptrs[1], 0, dstChannels, channel, size, (int)src.elemSize1()) < 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -893,7 +910,7 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi)
|
||||
_dst.create(src.dims, &src.size[0], depth);
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
|
||||
CV_IPP_RUN_FAST(ipp_extractChannel(src, dst, coi))
|
||||
|
||||
mixChannels(&src, 1, &dst, 1, ch, 1);
|
||||
}
|
||||
@ -917,7 +934,7 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi)
|
||||
|
||||
Mat src = _src.getMat(), dst = _dst.getMat();
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
|
||||
CV_IPP_RUN_FAST(ipp_insertChannel(src, dst, coi))
|
||||
|
||||
mixChannels(&src, 1, &dst, 1, ch, 1);
|
||||
}
|
||||
@ -5152,7 +5169,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
|
||||
iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step);
|
||||
iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step);
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -5168,7 +5185,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
|
||||
iwSrc.m_ptr = ptrs[0];
|
||||
iwDst.m_ptr = ptrs[1];
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -49,18 +49,6 @@
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
#ifdef HAVE_IPP_IW
|
||||
extern "C" {
|
||||
IW_DECL(IppStatus) llwiCopyMask(const void *pSrc, int srcStep, void *pDst, int dstStep,
|
||||
IppiSize size, int typeSize, int channels, const Ipp8u *pMask, int maskStep);
|
||||
IW_DECL(IppStatus) llwiSet(const double *pValue, void *pDst, int dstStep,
|
||||
IppiSize size, IppDataType dataType, int channels);
|
||||
IW_DECL(IppStatus) llwiSetMask(const double *pValue, void *pDst, int dstStep,
|
||||
IppiSize size, IppDataType dataType, int channels, const Ipp8u *pMask, int maskStep);
|
||||
IW_DECL(IppStatus) llwiCopyMakeBorder(const void *pSrc, IppSizeL srcStep, void *pDst, IppSizeL dstStep,
|
||||
IppiSizeL size, IppDataType dataType, int channels, IppiBorderSize *pBorderSize, IppiBorderType border, const Ipp64f *pBorderVal);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -480,9 +468,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
|
||||
|
||||
if(dst.dims <= 2)
|
||||
{
|
||||
IppiSize size = ippiSize(dst.size());
|
||||
IppDataType dataType = ippiGetDataType(dst.depth());
|
||||
::ipp::IwValue s;
|
||||
IppiSize size = ippiSize(dst.size());
|
||||
IppDataType dataType = ippiGetDataType(dst.depth());
|
||||
::ipp::IwValueFloat s;
|
||||
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
|
||||
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0;
|
||||
@ -493,9 +481,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
|
||||
uchar *ptrs[2] = {NULL};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
|
||||
IppiSize size = {(int)it.size, 1};
|
||||
IppDataType dataType = ippiGetDataType(dst.depth());
|
||||
::ipp::IwValue s;
|
||||
IppiSize size = {(int)it.size, 1};
|
||||
IppDataType dataType = ippiGetDataType(dst.depth());
|
||||
::ipp::IwValueFloat s;
|
||||
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
@ -717,7 +705,7 @@ static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, &iwSrc, &iwDst, ippMode);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
|
||||
}
|
||||
catch(::ipp::IwException)
|
||||
{
|
||||
@ -1155,13 +1143,13 @@ static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom,
|
||||
if(_src.dims > 2)
|
||||
return false;
|
||||
|
||||
Rect dstRect(borderSize.borderLeft, borderSize.borderTop,
|
||||
_dst.cols - borderSize.borderRight - borderSize.borderLeft,
|
||||
_dst.rows - borderSize.borderBottom - borderSize.borderTop);
|
||||
Rect dstRect(borderSize.left, borderSize.top,
|
||||
_dst.cols - borderSize.right - borderSize.left,
|
||||
_dst.rows - borderSize.bottom - borderSize.top);
|
||||
Mat subDst = Mat(_dst, dstRect);
|
||||
Mat *pSrc = &_src;
|
||||
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), &borderSize, borderType, &value[0]) >= 0;
|
||||
return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), borderSize, borderType, &value[0]) >= 0;
|
||||
#else
|
||||
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right);
|
||||
CV_UNUSED(_borderType); CV_UNUSED(value);
|
||||
|
@ -44,7 +44,8 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
|
||||
CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
|
||||
// SSE42 performance issues
|
||||
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
@ -55,7 +56,8 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
|
||||
CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
|
||||
// SSE42 performance issues
|
||||
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
@ -91,7 +93,6 @@ void sqrt32f(const float* src, float* dst, int len)
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(sqrt32f, (src, dst, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
@ -103,7 +104,6 @@ void sqrt64f(const double* src, double* dst, int len)
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(sqrt64f, (src, dst, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
|
@ -3101,7 +3101,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
|
||||
{
|
||||
double r = 0;
|
||||
#if ARITHM_USE_IPP
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
|
||||
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
|
||||
#endif
|
||||
int i = 0;
|
||||
|
||||
|
@ -4356,7 +4356,6 @@ template<typename T> static void sortIdx_( const Mat& src, Mat& dst, int flags )
|
||||
}
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
#if !IPP_DISABLE_SORT_IDX
|
||||
typedef IppStatus (CV_STDCALL *IppSortIndexFunc)(const void* pSrc, Ipp32s srcStrideBytes, Ipp32s *pDstIndx, int len, Ipp8u *pBuffer);
|
||||
|
||||
static IppSortIndexFunc getSortIndexFunc(int depth, bool sortDescending)
|
||||
@ -4435,7 +4434,6 @@ static bool ipp_sortIdx( const Mat& src, Mat& dst, int flags )
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef void (*SortFunc)(const Mat& src, Mat& dst, int flags);
|
||||
}
|
||||
@ -4472,9 +4470,8 @@ void cv::sortIdx( InputArray _src, OutputArray _dst, int flags )
|
||||
_dst.release();
|
||||
_dst.create( src.size(), CV_32S );
|
||||
dst = _dst.getMat();
|
||||
#if !IPP_DISABLE_SORT_IDX
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_sortIdx(src, dst, flags));
|
||||
#endif
|
||||
|
||||
static SortFunc tab[] =
|
||||
{
|
||||
|
@ -257,7 +257,8 @@ struct CoreTLSData
|
||||
//#ifdef HAVE_OPENCL
|
||||
device(0), useOpenCL(-1),
|
||||
//#endif
|
||||
useIPP(-1)
|
||||
useIPP(-1),
|
||||
useIPP_NE(-1)
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
,useTegra(-1)
|
||||
#endif
|
||||
@ -272,7 +273,8 @@ struct CoreTLSData
|
||||
ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run
|
||||
int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized
|
||||
//#endif
|
||||
int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
|
||||
int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
|
||||
int useIPP_NE; // 1 - use, 0 - do not use, -1 - auto/not initialized
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
int useTegra; // 1 - use, 0 - do not use, -1 - auto/not initialized
|
||||
#endif
|
||||
|
@ -1294,6 +1294,12 @@ static bool ipp_countNonZero( Mat &src, int &res )
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Poor performance of SSE42
|
||||
if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
Ipp32s count = 0;
|
||||
int depth = src.depth();
|
||||
|
||||
@ -2531,15 +2537,16 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
|
||||
#if IPP_VERSION_X100 >= 700
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_DISABLE_MINMAX_NAN_SSE42
|
||||
#if IPP_VERSION_X100 < 201800
|
||||
// cv::minMaxIdx problem with NaN input
|
||||
// Disable 32F processing only
|
||||
if(src.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX))
|
||||
if(src.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// cv::minMaxIdx problem with index positions on AVX
|
||||
#if IPP_VERSION_X100 < 201810
|
||||
if(!mask.empty() && _maxIdx && ipp::getIppFeatures()&ippCPUID_AVX)
|
||||
if(!mask.empty() && _maxIdx && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
@ -2550,8 +2557,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
|
||||
IppiPoint minIdx = {-1, -1};
|
||||
IppiPoint maxIdx = {-1, -1};
|
||||
|
||||
float *pMinVal = (_minVal)?&minVal:NULL;
|
||||
float *pMaxVal = (_maxVal)?&maxVal:NULL;
|
||||
float *pMinVal = (_minVal || _minIdx)?&minVal:NULL;
|
||||
float *pMaxVal = (_maxVal || _maxIdx)?&maxVal:NULL;
|
||||
IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL;
|
||||
IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL;
|
||||
|
||||
@ -2564,6 +2571,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
|
||||
ippMinMaxFun = ipp_minIdx_wrap;
|
||||
else if(_maxVal && !_maxIdx && _minVal && !_minIdx)
|
||||
ippMinMaxFun = ipp_minMax_wrap;
|
||||
else if(!_maxVal && !_maxIdx && !_minVal && !_minIdx)
|
||||
return false;
|
||||
else
|
||||
ippMinMaxFun = ipp_minMaxIndex_wrap;
|
||||
}
|
||||
@ -2582,8 +2591,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
|
||||
*_maxVal = maxVal;
|
||||
if(_minIdx)
|
||||
{
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
|
||||
if(status == ippStsNoOperation && !mask.empty() && !pMinIdx->x && !pMinIdx->y)
|
||||
#else
|
||||
if(status == ippStsNoOperation)
|
||||
#endif
|
||||
{
|
||||
_minIdx[0] = -1;
|
||||
_minIdx[1] = -1;
|
||||
@ -2596,8 +2609,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
|
||||
}
|
||||
if(_maxIdx)
|
||||
{
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
|
||||
if(status == ippStsNoOperation && !mask.empty() && !pMaxIdx->x && !pMaxIdx->y)
|
||||
#else
|
||||
if(status == ippStsNoOperation)
|
||||
#endif
|
||||
{
|
||||
_maxIdx[0] = -1;
|
||||
_maxIdx[1] = -1;
|
||||
|
@ -1909,55 +1909,146 @@ struct IPPInitSingleton
|
||||
public:
|
||||
IPPInitSingleton()
|
||||
{
|
||||
useIPP = true;
|
||||
ippStatus = 0;
|
||||
funcname = NULL;
|
||||
filename = NULL;
|
||||
linen = 0;
|
||||
ippFeatures = 0;
|
||||
useIPP = true;
|
||||
useIPP_NE = false;
|
||||
ippStatus = 0;
|
||||
funcname = NULL;
|
||||
filename = NULL;
|
||||
linen = 0;
|
||||
cpuFeatures = 0;
|
||||
ippFeatures = 0;
|
||||
ippTopFeatures = 0;
|
||||
pIppLibInfo = NULL;
|
||||
|
||||
ippStatus = ippGetCpuFeatures(&cpuFeatures, NULL);
|
||||
if(ippStatus < 0)
|
||||
{
|
||||
std::cerr << "ERROR: IPP cannot detect CPU features, IPP was disabled " << std::endl;
|
||||
useIPP = false;
|
||||
return;
|
||||
}
|
||||
ippFeatures = cpuFeatures;
|
||||
|
||||
bool unsupported = false;
|
||||
const char* pIppEnv = getenv("OPENCV_IPP");
|
||||
cv::String env = pIppEnv;
|
||||
if(env.size())
|
||||
{
|
||||
env = env.toLowerCase();
|
||||
if(env.substr(0, 2) == "ne")
|
||||
{
|
||||
useIPP_NE = true;
|
||||
env = env.substr(3, env.size());
|
||||
}
|
||||
|
||||
if(env == "disabled")
|
||||
{
|
||||
std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl;
|
||||
useIPP = false;
|
||||
}
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
else if(env == "sse")
|
||||
ippFeatures = ippCPUID_SSE;
|
||||
else if(env == "sse2")
|
||||
ippFeatures = ippCPUID_SSE2;
|
||||
else if(env == "sse3")
|
||||
ippFeatures = ippCPUID_SSE3;
|
||||
else if(env == "ssse3")
|
||||
ippFeatures = ippCPUID_SSSE3;
|
||||
else if(env == "sse41")
|
||||
ippFeatures = ippCPUID_SSE41;
|
||||
else if(env == "sse42")
|
||||
ippFeatures = ippCPUID_SSE42;
|
||||
else if(env == "avx")
|
||||
ippFeatures = ippCPUID_AVX;
|
||||
{
|
||||
if(!(cpuFeatures&ippCPUID_SSE42))
|
||||
unsupported = true;
|
||||
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42;
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AES);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_SHA);
|
||||
}
|
||||
else if(env == "avx2")
|
||||
ippFeatures = ippCPUID_AVX2;
|
||||
{
|
||||
if(!(cpuFeatures&ippCPUID_AVX2))
|
||||
unsupported = true;
|
||||
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2;
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AES);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_F16C);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_MPX);
|
||||
}
|
||||
#if defined (_M_AMD64) || defined (__x86_64__)
|
||||
else if(env == "avx512")
|
||||
{
|
||||
if(!(cpuFeatures&ippCPUID_AVX512F))
|
||||
unsupported = true;
|
||||
|
||||
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2|ippCPUID_AVX512F;
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AES);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_F16C);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_MPX);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AVX512CD);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AVX512VL);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AVX512BW);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AVX512DQ);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AVX512ER);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AVX512PF);
|
||||
ippFeatures |= (cpuFeatures&ippCPUID_AVX512VBMI);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << std::endl;
|
||||
std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << ". Correct values are: disabled, sse42, avx2, avx512 (Intel64 only)" << std::endl;
|
||||
}
|
||||
|
||||
if(unsupported)
|
||||
{
|
||||
std::cerr << "WARNING: selected IPP features are not supported by CPU. IPP was initialized with default features" << std::endl;
|
||||
ippFeatures = cpuFeatures;
|
||||
}
|
||||
|
||||
// Disable AVX1 since we don't track regressions for it. SSE42 will be used instead
|
||||
if(cpuFeatures&ippCPUID_AVX && !(cpuFeatures&ippCPUID_AVX2))
|
||||
ippFeatures &= ~ippCPUID_AVX;
|
||||
|
||||
// IPP integrations in OpenCV support only SSE4.2, AVX2 and AVX-512 optimizations.
|
||||
if(!(
|
||||
cpuFeatures&ippCPUID_AVX512F ||
|
||||
cpuFeatures&ippCPUID_AVX2 ||
|
||||
cpuFeatures&ippCPUID_SSE42
|
||||
))
|
||||
{
|
||||
useIPP = false;
|
||||
return;
|
||||
}
|
||||
|
||||
IPP_INITIALIZER(ippFeatures)
|
||||
ippFeatures = ippGetEnabledCpuFeatures();
|
||||
|
||||
// Detect top level optimizations to make comparison easier for optimizations dependent conditions
|
||||
if(ippFeatures&ippCPUID_AVX512F)
|
||||
{
|
||||
if((ippFeatures&ippCPUID_AVX512_SKX) == ippCPUID_AVX512_SKX)
|
||||
ippTopFeatures = ippCPUID_AVX512_SKX;
|
||||
else if((ippFeatures&ippCPUID_AVX512_KNL) == ippCPUID_AVX512_KNL)
|
||||
ippTopFeatures = ippCPUID_AVX512_KNL;
|
||||
else
|
||||
ippTopFeatures = ippCPUID_AVX512F; // Unknown AVX512 configuration
|
||||
}
|
||||
else if(ippFeatures&ippCPUID_AVX2)
|
||||
ippTopFeatures = ippCPUID_AVX2;
|
||||
else if(ippFeatures&ippCPUID_SSE42)
|
||||
ippTopFeatures = ippCPUID_SSE42;
|
||||
|
||||
pIppLibInfo = ippiGetLibVersion();
|
||||
}
|
||||
|
||||
bool useIPP;
|
||||
public:
|
||||
bool useIPP;
|
||||
bool useIPP_NE;
|
||||
|
||||
int ippStatus; // 0 - all is ok, -1 - IPP functions failed
|
||||
int ippStatus; // 0 - all is ok, -1 - IPP functions failed
|
||||
const char *funcname;
|
||||
const char *filename;
|
||||
int linen;
|
||||
Ipp64u ippFeatures;
|
||||
Ipp64u cpuFeatures;
|
||||
Ipp64u ippTopFeatures;
|
||||
const IppLibraryVersion *pIppLibInfo;
|
||||
};
|
||||
|
||||
static IPPInitSingleton& getIPPSingleton()
|
||||
@ -1983,6 +2074,17 @@ int getIppFeatures()
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned long long getIppTopFeatures();
|
||||
|
||||
unsigned long long getIppTopFeatures()
|
||||
{
|
||||
#ifdef HAVE_IPP
|
||||
return getIPPSingleton().ippTopFeatures;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line)
|
||||
{
|
||||
#ifdef HAVE_IPP
|
||||
@ -2013,6 +2115,19 @@ String getIppErrorLocation()
|
||||
#endif
|
||||
}
|
||||
|
||||
String getIppVersion()
|
||||
{
|
||||
#ifdef HAVE_IPP
|
||||
const IppLibraryVersion *pInfo = getIPPSingleton().pIppLibInfo;
|
||||
if(pInfo)
|
||||
return format("%s %s %s", pInfo->Name, pInfo->Version, pInfo->BuildDate);
|
||||
else
|
||||
return String("error");
|
||||
#else
|
||||
return String("disabled");
|
||||
#endif
|
||||
}
|
||||
|
||||
bool useIPP()
|
||||
{
|
||||
#ifdef HAVE_IPP
|
||||
@ -2038,6 +2153,31 @@ void setUseIPP(bool flag)
|
||||
#endif
|
||||
}
|
||||
|
||||
bool useIPP_NE()
|
||||
{
|
||||
#ifdef HAVE_IPP
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
if(data->useIPP_NE < 0)
|
||||
{
|
||||
data->useIPP_NE = getIPPSingleton().useIPP_NE;
|
||||
}
|
||||
return (data->useIPP_NE > 0);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void setUseIPP_NE(bool flag)
|
||||
{
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
#ifdef HAVE_IPP
|
||||
data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false;
|
||||
#else
|
||||
(void)flag;
|
||||
data->useIPP_NE = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace ipp
|
||||
|
||||
} // namespace cv
|
||||
|
@ -95,7 +95,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst,
|
||||
ippiGetImage(dy_, iwSrcDy);
|
||||
ippiGetImage(dst, iwDst);
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, &iwSrcDx, &iwSrcDy, &iwDst, norm, low, high);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm));
|
||||
}
|
||||
catch (::ipp::IwException ex)
|
||||
{
|
||||
@ -121,7 +121,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst,
|
||||
ippiGetImage(src, iwSrc);
|
||||
ippiGetImage(dst, iwDst);
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, &iwSrc, &iwDst, ippFilterSobel, kernel, norm, low, high, ippBorderRepl);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl);
|
||||
}
|
||||
catch (::ipp::IwException)
|
||||
{
|
||||
|
@ -469,11 +469,32 @@ static ippiGeneralFunc ippiRGB2GrayC4Tab[] =
|
||||
0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
|
||||
};
|
||||
|
||||
static ippiGeneralFunc ippiCopyP3C3RTab[] =
|
||||
|
||||
static IppStatus ippiGrayToRGB_C1C3R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize)
|
||||
{
|
||||
(ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0,
|
||||
0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0
|
||||
};
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
|
||||
}
|
||||
static IppStatus ippiGrayToRGB_C1C3R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize)
|
||||
{
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
|
||||
}
|
||||
static IppStatus ippiGrayToRGB_C1C3R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize)
|
||||
{
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
|
||||
}
|
||||
|
||||
static IppStatus ippiGrayToRGB_C1C4R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize, Ipp8u aval)
|
||||
{
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
|
||||
}
|
||||
static IppStatus ippiGrayToRGB_C1C4R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize, Ipp16u aval)
|
||||
{
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
|
||||
}
|
||||
static IppStatus ippiGrayToRGB_C1C4R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize, Ipp32f aval)
|
||||
{
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
|
||||
}
|
||||
|
||||
#if !IPP_DISABLE_RGB_XYZ
|
||||
static ippiGeneralFunc ippiRGB2XYZTab[] =
|
||||
@ -580,48 +601,31 @@ private:
|
||||
Ipp32f coeffs[3];
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct IPPGray2BGRFunctor
|
||||
{
|
||||
IPPGray2BGRFunctor(ippiGeneralFunc _func) :
|
||||
ippiGrayToBGR(_func)
|
||||
{
|
||||
}
|
||||
IPPGray2BGRFunctor(){}
|
||||
|
||||
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
|
||||
{
|
||||
if (ippiGrayToBGR == 0)
|
||||
return false;
|
||||
|
||||
const void* srcarray[3] = { src, src, src };
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiGrayToBGR, srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
|
||||
return ippiGrayToRGB_C1C3R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows)) >= 0;
|
||||
}
|
||||
private:
|
||||
ippiGeneralFunc ippiGrayToBGR;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct IPPGray2BGRAFunctor
|
||||
{
|
||||
IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) :
|
||||
ippiColorConvertGeneral(_func1), ippiColorConvertReorder(_func2), depth(_depth)
|
||||
IPPGray2BGRAFunctor()
|
||||
{
|
||||
alpha = ColorChannel<T>::max();
|
||||
}
|
||||
|
||||
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
|
||||
{
|
||||
if (ippiColorConvertGeneral == 0 || ippiColorConvertReorder == 0)
|
||||
return false;
|
||||
|
||||
const void* srcarray[3] = { src, src, src };
|
||||
Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
|
||||
if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
|
||||
return false;
|
||||
int order[4] = {0, 1, 2, 3};
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
|
||||
return ippiGrayToRGB_C1C4R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows), alpha) >= 0;
|
||||
}
|
||||
private:
|
||||
ippiGeneralFunc ippiColorConvertGeneral;
|
||||
ippiReorderFunc ippiColorConvertReorder;
|
||||
int depth;
|
||||
|
||||
T alpha;
|
||||
};
|
||||
|
||||
struct IPPReorderGeneralFunctor
|
||||
@ -9744,18 +9748,27 @@ void cvtGraytoBGR(const uchar * src_data, size_t src_step,
|
||||
#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
bool ippres = false;
|
||||
if(dcn == 3)
|
||||
{
|
||||
if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height,
|
||||
IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
|
||||
return;
|
||||
if( depth == CV_8U )
|
||||
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp8u>());
|
||||
else if( depth == CV_16U )
|
||||
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp16u>());
|
||||
else
|
||||
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp32f>());
|
||||
}
|
||||
else if(dcn == 4)
|
||||
{
|
||||
if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height,
|
||||
IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
|
||||
return;
|
||||
if( depth == CV_8U )
|
||||
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp8u>());
|
||||
else if( depth == CV_16U )
|
||||
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp16u>());
|
||||
else
|
||||
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp32f>());
|
||||
}
|
||||
if(ippres)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -312,7 +312,7 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi
|
||||
::ipp::IwiImage iwDstProc = iwDst;
|
||||
::ipp::IwiBorderSize borderSize(maskSize);
|
||||
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
|
||||
if(!ippBorder.m_borderType)
|
||||
if(!ippBorder)
|
||||
return false;
|
||||
|
||||
if(srcType == ipp8u && dstType == ipp8u)
|
||||
@ -324,17 +324,17 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi
|
||||
{
|
||||
iwSrc -= borderSize;
|
||||
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0, ippAlgHintFast);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0, ::ipp::IwiScaleParams(ippAlgHintFast));
|
||||
iwSrcProc += borderSize;
|
||||
}
|
||||
|
||||
if(useScharr)
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
|
||||
else
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
|
||||
|
||||
if(useScale)
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta, ippAlgHintFast);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta, ::ipp::IwiScaleParams(ippAlgHintFast));
|
||||
}
|
||||
catch (::ipp::IwException)
|
||||
{
|
||||
@ -732,7 +732,7 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s
|
||||
::ipp::IwiImage iwDstProc = iwDst;
|
||||
::ipp::IwiBorderSize borderSize(maskSize);
|
||||
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
|
||||
if(!ippBorder.m_borderType)
|
||||
if(!ippBorder)
|
||||
return false;
|
||||
|
||||
if(srcType == ipp8u && dstType == ipp8u)
|
||||
@ -744,14 +744,14 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s
|
||||
{
|
||||
iwSrc -= borderSize;
|
||||
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0);
|
||||
iwSrcProc += borderSize;
|
||||
}
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, &iwSrcProc, &iwDstProc, maskSize, ippBorder);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, iwSrcProc, iwDstProc, maskSize, ::ipp::IwDefault(), ippBorder);
|
||||
|
||||
if(useScale)
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta);
|
||||
|
||||
}
|
||||
catch (::ipp::IwException ex)
|
||||
|
@ -4560,170 +4560,88 @@ static bool replacementFilter2D(int stype, int dtype, int kernel_type,
|
||||
}
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
typedef IppStatus(CV_STDCALL* IppiFilterBorder)(
|
||||
const void* pSrc, int srcStep, void* pDst, int dstStep,
|
||||
IppiSize dstRoiSize, IppiBorderType border, const void* borderValue,
|
||||
const IppiFilterBorderSpec* pSpec, Ipp8u* pBuffer);
|
||||
|
||||
static IppiFilterBorder getIppFunc(int stype)
|
||||
{
|
||||
switch (stype)
|
||||
{
|
||||
case CV_8UC1:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C1R);
|
||||
case CV_8UC3:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C3R);
|
||||
case CV_8UC4:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C4R);
|
||||
case CV_16UC1:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C1R);
|
||||
case CV_16UC3:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C3R);
|
||||
case CV_16UC4:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C4R);
|
||||
case CV_16SC1:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C1R);
|
||||
case CV_16SC3:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C3R);
|
||||
case CV_16SC4:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C4R);
|
||||
case CV_32FC1:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C1R);
|
||||
case CV_32FC3:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C3R);
|
||||
case CV_32FC4:
|
||||
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C4R);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <int kdepth>
|
||||
struct IppFilterTrait { };
|
||||
|
||||
template <>
|
||||
struct IppFilterTrait<CV_16S>
|
||||
{
|
||||
enum { kernel_type_id = CV_16SC1 };
|
||||
typedef Ipp16s kernel_type;
|
||||
typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
|
||||
inline static copy_fun_type get_copy_fun() { return ippiCopy_16s_C1R; }
|
||||
inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
|
||||
{
|
||||
return ippiFilterBorderInit_16s(pKernel, kernelSize, divisor, dataType, numChannels, roundMode, pSpec);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct IppFilterTrait<CV_32F>
|
||||
{
|
||||
enum { kernel_type_id = CV_32FC1 };
|
||||
typedef Ipp32f kernel_type;
|
||||
typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
|
||||
inline static copy_fun_type get_copy_fun() { return ippiCopy_32f_C1R; }
|
||||
inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
|
||||
{
|
||||
CV_UNUSED(divisor);
|
||||
return ippiFilterBorderInit_32f(pKernel, kernelSize, dataType, numChannels, roundMode, pSpec);
|
||||
}
|
||||
};
|
||||
|
||||
template <int kdepth>
|
||||
static bool ippFilter2D(int stype, int dtype,
|
||||
uchar * src_data, size_t src_step,
|
||||
uchar * dst_data, size_t dst_step,
|
||||
int width, int height,
|
||||
uchar * kernel_data, size_t kernel_step,
|
||||
int kernel_width, int kernel_height,
|
||||
int anchor_x, int anchor_y,
|
||||
double delta, int borderType, bool isSubmatrix)
|
||||
static bool ippFilter2D(int stype, int dtype, int kernel_type,
|
||||
uchar * src_data, size_t src_step,
|
||||
uchar * dst_data, size_t dst_step,
|
||||
int width, int height,
|
||||
int full_width, int full_height,
|
||||
int offset_x, int offset_y,
|
||||
uchar * kernel_data, size_t kernel_step,
|
||||
int kernel_width, int kernel_height,
|
||||
int anchor_x, int anchor_y,
|
||||
double delta, int borderType,
|
||||
bool isSubmatrix)
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
typedef IppFilterTrait<kdepth> trait;
|
||||
typedef typename trait::kernel_type kernel_type;
|
||||
::ipp::IwiSize iwSize(width, height);
|
||||
::ipp::IwiSize kernelSize(kernel_width, kernel_height);
|
||||
IppDataType type = ippiGetDataType(CV_MAT_DEPTH(stype));
|
||||
int channels = CV_MAT_CN(stype);
|
||||
|
||||
IppAutoBuffer<IppiFilterBorderSpec> spec;
|
||||
IppAutoBuffer<Ipp8u> buffer;
|
||||
IppAutoBuffer<kernel_type> kernelBuffer;
|
||||
IppiBorderType ippBorderType;
|
||||
int src_type;
|
||||
|
||||
Point anchor(anchor_x, anchor_y);
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
Point ippAnchor((kernel_width - 1) / 2, (kernel_height - 1) / 2);
|
||||
#else
|
||||
Point ippAnchor(kernel_width >> 1, kernel_height >> 1);
|
||||
#endif
|
||||
bool isIsolated = (borderType & BORDER_ISOLATED) != 0;
|
||||
int borderTypeNI = borderType & ~BORDER_ISOLATED;
|
||||
ippBorderType = ippiGetBorderType(borderTypeNI);
|
||||
int ddepth = CV_MAT_DEPTH(dtype);
|
||||
int sdepth = CV_MAT_DEPTH(stype);
|
||||
CV_UNUSED(isSubmatrix);
|
||||
|
||||
#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel
|
||||
if(kernel_width == 1 && kernel_height == 1)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
bool runIpp = true
|
||||
&& (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE)
|
||||
&& (sdepth == ddepth)
|
||||
&& (getIppFunc(stype))
|
||||
&& ((int)ippBorderType > 0)
|
||||
&& (!isSubmatrix || isIsolated)
|
||||
&& (std::fabs(delta - 0) < DBL_EPSILON)
|
||||
&& (ippAnchor == anchor)
|
||||
&& src_data != dst_data;
|
||||
|
||||
if (!runIpp)
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Too big difference compared to OpenCV FFT-based convolution
|
||||
if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7))
|
||||
return false;
|
||||
|
||||
src_type = stype;
|
||||
int cn = CV_MAT_CN(stype);
|
||||
IppiSize kernelSize = { kernel_width, kernel_height };
|
||||
IppDataType dataType = ippiGetDataType(ddepth);
|
||||
IppDataType kernelType = ippiGetDataType(kdepth);
|
||||
Ipp32s specSize = 0;
|
||||
Ipp32s bufsize = 0;
|
||||
IppiSize dstRoiSize = { width, height };
|
||||
IppStatus status;
|
||||
|
||||
status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize);
|
||||
if (status < 0)
|
||||
// Poor optimization for big kernels
|
||||
if(kernel_width > 7 || kernel_height > 7)
|
||||
return false;
|
||||
|
||||
kernel_type* pKerBuffer = (kernel_type*)kernel_data;
|
||||
size_t good_kernel_step = sizeof(kernel_type) * static_cast<size_t>(kernelSize.width);
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
if (kernel_step != good_kernel_step) {
|
||||
kernelBuffer.allocate((int)good_kernel_step * kernelSize.height);
|
||||
status = trait::get_copy_fun()((kernel_type*)kernel_data, (int)kernel_step, kernelBuffer, (int)good_kernel_step, kernelSize);
|
||||
if (status < 0)
|
||||
return false;
|
||||
pKerBuffer = kernelBuffer;
|
||||
}
|
||||
#else
|
||||
kernelBuffer.Alloc(good_kernel_step * kernelSize.height);
|
||||
Mat kerFlip(Size(kernelSize.width, kernelSize.height), trait::kernel_type_id, kernelBuffer, (int)good_kernel_step);
|
||||
Mat kernel(Size(kernel_width, kernel_height), trait::kernel_type_id, kernel_data, kernel_step);
|
||||
flip(kernel, kerFlip, -1);
|
||||
pKerBuffer = kernelBuffer;
|
||||
#endif
|
||||
spec.allocate(specSize);
|
||||
buffer.allocate(bufsize);
|
||||
status = trait::runInit(pKerBuffer, kernelSize, 0, dataType, cn, ippRndFinancial, spec);
|
||||
if (status < 0) {
|
||||
|
||||
if(src_data == dst_data)
|
||||
return false;
|
||||
|
||||
if(stype != dtype)
|
||||
return false;
|
||||
|
||||
if(kernel_type != CV_16SC1 && kernel_type != CV_32FC1)
|
||||
return false;
|
||||
|
||||
// TODO: Implement offset for 8u, 16u
|
||||
if(std::fabs(delta) >= DBL_EPSILON)
|
||||
return false;
|
||||
|
||||
if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiBorderSize iwBorderSize;
|
||||
::ipp::IwiBorderType iwBorderType;
|
||||
::ipp::IwiImage iwKernel(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, (void*)kernel_data, kernel_step);
|
||||
::ipp::IwiImage iwSrc(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)src_data, src_step);
|
||||
::ipp::IwiImage iwDst(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)dst_data, dst_step);
|
||||
|
||||
iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
|
||||
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
|
||||
if(!iwBorderType)
|
||||
return false;
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilter, iwSrc, iwDst, iwKernel, ::ipp::IwiFilterParams(1, 0, ippAlgHintNone, ippRndFinancial), iwBorderType);
|
||||
}
|
||||
catch(::ipp::IwException ex)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
IppiFilterBorder ippiFilterBorder = getIppFunc(src_type);
|
||||
kernel_type borderValue[4] = { 0, 0, 0, 0 };
|
||||
status = CV_INSTRUMENT_FUN_IPP(ippiFilterBorder, src_data, (int)src_step, dst_data, (int)dst_step, dstRoiSize, ippBorderType, borderValue, spec, buffer);
|
||||
if (status >= 0) {
|
||||
CV_IMPL_ADD(CV_IMPL_IPP);
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
CV_UNUSED(stype); CV_UNUSED(dtype); CV_UNUSED(kernel_type); CV_UNUSED(src_data); CV_UNUSED(src_step);
|
||||
CV_UNUSED(dst_data); CV_UNUSED(dst_step); CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(full_width);
|
||||
CV_UNUSED(full_height); CV_UNUSED(offset_x); CV_UNUSED(offset_y); CV_UNUSED(kernel_data); CV_UNUSED(kernel_step);
|
||||
CV_UNUSED(kernel_width); CV_UNUSED(kernel_height); CV_UNUSED(anchor_x); CV_UNUSED(anchor_y); CV_UNUSED(delta);
|
||||
CV_UNUSED(borderType); CV_UNUSED(isSubmatrix);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -4902,34 +4820,18 @@ void filter2D(int stype, int dtype, int kernel_type,
|
||||
delta, borderType, isSubmatrix);
|
||||
if (res)
|
||||
return;
|
||||
#ifdef HAVE_IPP
|
||||
CV_IPP_CHECK()
|
||||
{
|
||||
res = false;
|
||||
if (kernel_type == CV_32FC1) {
|
||||
res = ippFilter2D<CV_32F>(stype, dtype,
|
||||
src_data, src_step,
|
||||
dst_data, dst_step,
|
||||
width, height,
|
||||
kernel_data, kernel_step,
|
||||
kernel_width, kernel_height,
|
||||
anchor_x, anchor_y,
|
||||
delta, borderType, isSubmatrix);
|
||||
}
|
||||
else if (kernel_type == CV_16SC1) {
|
||||
res = ippFilter2D<CV_16S>(stype, dtype,
|
||||
src_data, src_step,
|
||||
dst_data, dst_step,
|
||||
width, height,
|
||||
kernel_data, kernel_step,
|
||||
kernel_width, kernel_height,
|
||||
anchor_x, anchor_y,
|
||||
delta, borderType, isSubmatrix);
|
||||
}
|
||||
if (res)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
CV_IPP_RUN_FAST(ippFilter2D(stype, dtype, kernel_type,
|
||||
src_data, src_step,
|
||||
dst_data, dst_step,
|
||||
width, height,
|
||||
full_width, full_height,
|
||||
offset_x, offset_y,
|
||||
kernel_data, kernel_step,
|
||||
kernel_width, kernel_height,
|
||||
anchor_x, anchor_y,
|
||||
delta, borderType, isSubmatrix))
|
||||
|
||||
res = dftFilter2D(stype, dtype, kernel_type,
|
||||
src_data, src_step,
|
||||
dst_data, dst_step,
|
||||
|
@ -370,6 +370,18 @@ void crossCorr( const Mat& src, const Mat& templ, Mat& dst,
|
||||
Point anchor=Point(0,0), double delta=0,
|
||||
int borderType=BORDER_REFLECT_101 );
|
||||
|
||||
|
||||
}
|
||||
|
||||
#ifdef HAVE_IPP_IW
|
||||
static inline bool ippiCheckAnchor(cv::Point anchor, cv::Size ksize)
|
||||
{
|
||||
anchor = cv::normalizeAnchor(anchor, ksize);
|
||||
if(anchor.x != ((ksize.width-1)/2) || anchor.y != ((ksize.height-1)/2))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1194,10 +1194,8 @@ static IppiHistogram_C1 getIppiHistogramFunction_C1(int type)
|
||||
{
|
||||
IppiHistogram_C1 ippFunction =
|
||||
(type == CV_8UC1) ? (IppiHistogram_C1)ippiHistogram_8u_C1R :
|
||||
#if IPP_VERSION_X100 >= 201700 || !(defined HAVE_IPP_ICV_ONLY)
|
||||
(type == CV_16UC1) ? (IppiHistogram_C1)ippiHistogram_16u_C1R :
|
||||
(type == CV_32FC1) ? (IppiHistogram_C1)ippiHistogram_32f_C1R :
|
||||
#endif
|
||||
NULL;
|
||||
|
||||
return ippFunction;
|
||||
@ -1401,9 +1399,9 @@ static bool ipp_calchist(const Mat &image, Mat &hist, int histSize, const float*
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// No SSE42 optimization for uniform 32f
|
||||
#if IPP_DISABLE_PERF_HISTU32F_SSE42
|
||||
if(uniform && image.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX))
|
||||
if(uniform && image.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
|
@ -2971,8 +2971,8 @@ public:
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(iwiResize, &m_src, &m_dst, &roi);
|
||||
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(iwiResize, m_src, m_dst, ippBorderRepl, tile);
|
||||
}
|
||||
catch(::ipp::IwException)
|
||||
{
|
||||
@ -3007,7 +3007,7 @@ public:
|
||||
{0, scaleY, shift+0.5*scaleY}
|
||||
};
|
||||
|
||||
iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, ippWarpForward, inter, ::ipp::IwiWarpAffineParams(0, 0.75, 0), ippBorderRepl);
|
||||
iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, iwTransForward, inter, ::ipp::IwiWarpAffineParams(0, 0, 0.75), ippBorderRepl);
|
||||
|
||||
m_ok = true;
|
||||
}
|
||||
@ -3021,8 +3021,8 @@ public:
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, &m_src, &m_dst, &roi);
|
||||
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, m_src, m_dst, tile);
|
||||
}
|
||||
catch(::ipp::IwException)
|
||||
{
|
||||
@ -3053,23 +3053,28 @@ static bool ipp_resize(const uchar * src_data, size_t src_step, int src_width, i
|
||||
if(ippInter < 0)
|
||||
return false;
|
||||
|
||||
#if IPP_DISABLE_RESIZE_NEAREST
|
||||
if(ippInter == ippNearest)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
#if IPP_DISABLE_RESIZE_AREA
|
||||
if(ippInter == ippSuper)
|
||||
return false;
|
||||
#endif
|
||||
// Resize which doesn't match OpenCV exactly
|
||||
if(!cv::ipp::useIPP_NE())
|
||||
{
|
||||
if(ippInter == ippNearest || ippInter == ippSuper || (ippDataType == ipp8u && ippInter == ippLinear))
|
||||
return false;
|
||||
}
|
||||
|
||||
if(ippInter != ippLinear && ippDataType == ipp64f)
|
||||
return false;
|
||||
|
||||
// Accuracy mismatch is 1 but affects detectors greatly
|
||||
#if IPP_DISABLE_RESIZE_8U
|
||||
if(ippDataType == ipp8u && ippInter == ippLinear)
|
||||
return false;
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Degradations on int^2 linear downscale
|
||||
if(ippDataType != ipp64f && ippInter == ippLinear && inv_scale_x < 1 && inv_scale_y < 1) // if downscale
|
||||
{
|
||||
int scale_x = (int)(1/inv_scale_x);
|
||||
int scale_y = (int)(1/inv_scale_y);
|
||||
if(1/inv_scale_x - scale_x < DBL_EPSILON && 1/inv_scale_y - scale_y < DBL_EPSILON) // if integer
|
||||
{
|
||||
if(!(scale_x&(scale_x-1)) && !(scale_y&(scale_y-1))) // if power of 2
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool affine = false;
|
||||
|
@ -571,6 +571,12 @@ static bool ipp_moments(Mat &src, Moments &m )
|
||||
#if IPP_VERSION_X100 >= 900
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Degradations for CV_8UC1
|
||||
if(src.type() == CV_8UC1)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
IppiSize roi = { src.cols, src.rows };
|
||||
IppiPoint point = { 0, 0 };
|
||||
int type = src.type();
|
||||
|
@ -1140,20 +1140,41 @@ static bool ippMorph(int op, int src_type, int dst_type,
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
// Problem with SSE42 optimizations
|
||||
#if IPP_DISABLE_PERF_MORPH_SSE42
|
||||
if(!(ipp::getIppFeatures()&ippCPUID_AVX))
|
||||
#if IPP_VERSION_X100 < 201800
|
||||
// Problem with SSE42 optimizations performance
|
||||
if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
|
||||
return false;
|
||||
|
||||
// Different mask flipping
|
||||
if(op == MORPH_GRADIENT)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
::ipp::IwAutoBuffer<Ipp8u> kernelTempBuffer;
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Problem with AVX512 optimizations performance
|
||||
if(cv::ipp::getIppTopFeatures()&ippCPUID_AVX512F)
|
||||
return false;
|
||||
|
||||
// Multiple iterations on small mask is not effective in current integration
|
||||
// Implace imitation for 3x3 kernel is not efficient
|
||||
// Advanced morphology for small mask introduces degradations
|
||||
if((iterations > 1 || src_data == dst_data || (op != MORPH_ERODE && op != MORPH_DILATE)) && kernel_width*kernel_height < 25)
|
||||
return false;
|
||||
|
||||
// Skip even mask sizes for advanced morphology since they can produce out of spec writes
|
||||
if((op != MORPH_ERODE && op != MORPH_DILATE) && (!(kernel_width&1) || !(kernel_height&1)))
|
||||
return false;
|
||||
#endif
|
||||
|
||||
IppAutoBuffer<Ipp8u> kernelTempBuffer;
|
||||
::ipp::IwiBorderSize iwBorderSize;
|
||||
::ipp::IwiBorderSize iwBorderSize2;
|
||||
::ipp::IwiBorderType iwBorderType;
|
||||
::ipp::IwiBorderType iwBorderType2;
|
||||
::ipp::IwiImage iwMask;
|
||||
::ipp::IwiImage iwInter;
|
||||
::ipp::IwiSize initSize(width, height);
|
||||
::ipp::IwiSize kernelSize(kernel_width, kernel_height);
|
||||
::ipp::IwiPoint anchor(anchor_x, anchor_y);
|
||||
IppDataType type = ippiGetDataType(CV_MAT_DEPTH(src_type));
|
||||
int channels = CV_MAT_CN(src_type);
|
||||
IwiMorphologyType morphType = ippiGetMorphologyType(op);
|
||||
@ -1169,68 +1190,99 @@ static bool ippMorph(int op, int src_type, int dst_type,
|
||||
if(src_type != dst_type)
|
||||
return false;
|
||||
|
||||
if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiImage iwSrc(initSize, type, channels, ::ipp::IwiBorderSize(roi_x, roi_y, roi_width-roi_x-width, roi_height-roi_y-height), (void*)src_data, src_step);
|
||||
::ipp::IwiImage iwDst(initSize, type, channels, ::ipp::IwiBorderSize(roi_x2, roi_y2, roi_width2-roi_x2-width, roi_height2-roi_y2-height), (void*)dst_data, dst_step);
|
||||
|
||||
::ipp::iwiFilterMorphology_GetBorderSize(morphType, kernelSize, iwBorderSize);
|
||||
if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
|
||||
iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
|
||||
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
|
||||
if(!iwBorderType)
|
||||
return false;
|
||||
if(iterations > 1)
|
||||
{
|
||||
iwBorderSize.borderLeft /= 2;
|
||||
iwBorderSize.borderTop /= 2;
|
||||
iwBorderSize.borderRight /= 2;
|
||||
iwBorderSize.borderBottom /= 2;
|
||||
// Check dst border for second and later iterations
|
||||
iwBorderSize2 = ::ipp::iwiSizeToBorderSize(kernelSize);
|
||||
iwBorderType2 = ippiGetBorder(iwDst, borderType, iwBorderSize2);
|
||||
if(!iwBorderType2)
|
||||
return false;
|
||||
}
|
||||
|
||||
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
|
||||
if(!iwBorderType.m_borderType || ((iwBorderType.m_borderFlags&ippBorderInMem) && (iwBorderType.m_borderFlags&ippBorderInMem) != ippBorderInMem))
|
||||
return false;
|
||||
if(morphType != iwiMorphErode && morphType != iwiMorphDilate && morphType != iwiMorphGradient)
|
||||
{
|
||||
// For now complex morphology support only InMem around all sides. This will be improved later.
|
||||
if((iwBorderType&ippBorderInMem) && (iwBorderType&ippBorderInMem) != ippBorderInMem)
|
||||
return false;
|
||||
|
||||
if(iwBorderType.m_borderType == ippBorderConst)
|
||||
if((iwBorderType&ippBorderInMem) == ippBorderInMem)
|
||||
{
|
||||
iwBorderType &= ~ippBorderInMem;
|
||||
iwBorderType &= ippBorderFirstStageInMem;
|
||||
}
|
||||
}
|
||||
|
||||
if(iwBorderType.StripFlags() == ippBorderConst)
|
||||
{
|
||||
if(Vec<double, 4>(borderValue) == morphologyDefaultBorderValue())
|
||||
iwBorderType.m_borderType = ippBorderDefault;
|
||||
iwBorderType.SetType(ippBorderDefault);
|
||||
else
|
||||
iwBorderType.SetValue(borderValue[0], borderValue[1], borderValue[2], borderValue[3]);
|
||||
}
|
||||
if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
|
||||
{
|
||||
if((iwBorderType.m_borderFlags&ippBorderInMem) == ippBorderInMem)
|
||||
iwBorderType.m_borderFlags = ippBorderFirstStageInMem;
|
||||
}
|
||||
|
||||
// Test input parameters on dummy structures
|
||||
{
|
||||
::ipp::IwiImage testSrc(initSize, type, channels);
|
||||
::ipp::IwiImage testDst(initSize, type, channels);
|
||||
::ipp::IwiImage testMask(ippiSize(kernel_width, kernel_height), ipp8u, CV_MAT_CN(kernel_type));
|
||||
|
||||
::ipp::iwiFilterMorphology(&testSrc, &testDst, morphType, &testMask, &anchor, iwBorderType);
|
||||
iwBorderType.m_value = ::ipp::IwValueFloat(borderValue[0], borderValue[1], borderValue[2], borderValue[3]);
|
||||
}
|
||||
|
||||
iwMask.Init(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, kernel_data, kernel_step);
|
||||
if((int)kernel_step != kernel_width || CV_MAT_DEPTH(kernel_type) != CV_8U)
|
||||
|
||||
::ipp::IwiImage iwMaskLoc = iwMask;
|
||||
if(morphType == iwiMorphDilate)
|
||||
{
|
||||
kernelTempBuffer.Alloc(kernel_width*kernel_height);
|
||||
::ipp::IwiImage iwMaskTmp(ippiSize(kernel_width, kernel_height), ipp8u, 1, 0, kernelTempBuffer, kernel_width);
|
||||
::ipp::iwiScale(&iwMask, &iwMaskTmp, 1, 0);
|
||||
iwMask = iwMaskTmp;
|
||||
iwMaskLoc.Alloc(iwMask.m_size, iwMask.m_dataType, iwMask.m_channels);
|
||||
::ipp::iwiMirror(iwMask, iwMaskLoc, ippAxsBoth);
|
||||
iwMask = iwMaskLoc;
|
||||
}
|
||||
|
||||
if(iterations > 1)
|
||||
{
|
||||
iwInter.Alloc(initSize, type, channels);
|
||||
// OpenCV uses in mem border from dst for two and more iterations, so we need to keep this border in intermediate image
|
||||
iwInter.Alloc(initSize, type, channels, iwBorderSize2);
|
||||
|
||||
::ipp::IwiImage *pSwap[2] = {&iwInter, &iwDst};
|
||||
::ipp::IwiBorderType iterBorder = iwBorderType;
|
||||
iterBorder.m_borderFlags = 0;
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
|
||||
|
||||
// Copy border only
|
||||
{
|
||||
if(iwBorderSize2.top)
|
||||
{
|
||||
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, -iwBorderSize2.top, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.top);
|
||||
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
|
||||
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
|
||||
}
|
||||
if(iwBorderSize2.bottom)
|
||||
{
|
||||
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, iwDst.m_size.height, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.bottom);
|
||||
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
|
||||
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
|
||||
}
|
||||
if(iwBorderSize2.left)
|
||||
{
|
||||
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, 0, iwBorderSize2.left, iwDst.m_size.height);
|
||||
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
|
||||
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
|
||||
}
|
||||
if(iwBorderSize2.right)
|
||||
{
|
||||
::ipp::IwiRoi borderRoi(iwDst.m_size.width, 0, iwBorderSize2.left, iwDst.m_size.height);
|
||||
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
|
||||
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
|
||||
}
|
||||
}
|
||||
|
||||
iwBorderType2.SetType(iwBorderType);
|
||||
for(int i = 0; i < iterations-1; i++)
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, pSwap[i&0x1], pSwap[(i+1)&0x1], morphType, &iwMask, NULL, iterBorder);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, *pSwap[i&0x1], *pSwap[(i+1)&0x1], morphType, iwMask, ::ipp::IwDefault(), iwBorderType2);
|
||||
if(iterations&0x1)
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1238,11 +1290,11 @@ static bool ippMorph(int op, int src_type, int dst_type,
|
||||
{
|
||||
iwInter.Alloc(initSize, type, channels);
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
|
||||
}
|
||||
else
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwDst, morphType, &iwMask, NULL, iwBorderType);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwDst, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
|
||||
}
|
||||
}
|
||||
catch(::ipp::IwException ex)
|
||||
@ -1912,6 +1964,7 @@ static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op,
|
||||
}
|
||||
#endif
|
||||
|
||||
#define IPP_DISABLE_MORPH_ADV 1
|
||||
#ifdef HAVE_IPP
|
||||
#if !IPP_DISABLE_MORPH_ADV
|
||||
namespace cv {
|
||||
|
@ -1729,80 +1729,47 @@ namespace cv
|
||||
{
|
||||
static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
// Problem with SSE42 optimization for 16s
|
||||
#if IPP_DISABLE_PERF_BOX16S_SSE42
|
||||
if(src.depth() == CV_16S && !(ipp::getIppFeatures()&ippCPUID_AVX))
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Problem with SSE42 optimization for 16s and some 8u modes
|
||||
if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5))))
|
||||
return false;
|
||||
|
||||
// Other optimizations has some degradations too
|
||||
if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5))))
|
||||
return false;
|
||||
#endif
|
||||
|
||||
int stype = src.type(), cn = CV_MAT_CN(stype);
|
||||
IppiBorderType ippBorderType = ippiGetBorderType(borderType & ~BORDER_ISOLATED);
|
||||
IppDataType ippType = ippiGetDataType(stype);
|
||||
Point ocvAnchor, ippAnchor;
|
||||
ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
|
||||
ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
|
||||
ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
|
||||
ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);
|
||||
if(!normalize)
|
||||
return false;
|
||||
|
||||
if(normalize && (!src.isSubmatrix() || borderType&BORDER_ISOLATED) && stype == dst.type() &&
|
||||
(ippBorderType == ippBorderRepl || /* returns ippStsStepErr: Step value is not valid */
|
||||
ippBorderType == ippBorderConst ||
|
||||
ippBorderType == ippBorderMirror) && ocvAnchor == ippAnchor) // returns ippStsMaskSizeErr: mask has an illegal value
|
||||
if(!ippiCheckAnchor(anchor, ksize))
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
IppStatus status;
|
||||
Ipp32s bufSize = 0;
|
||||
IppiSize roiSize = { dst.cols, dst.rows };
|
||||
IppiSize maskSize = { ksize.width, ksize.height };
|
||||
IppAutoBuffer<Ipp8u> buffer;
|
||||
|
||||
if(ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippType, cn, &bufSize) < 0)
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
::ipp::IwiSize iwKSize = ippiGetSize(ksize);
|
||||
::ipp::IwiBorderSize borderSize(iwKSize);
|
||||
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
|
||||
if(!ippBorder)
|
||||
return false;
|
||||
|
||||
buffer.allocate(bufSize);
|
||||
|
||||
#define IPP_FILTER_BOX_BORDER(ippType, flavor)\
|
||||
{\
|
||||
ippType borderValue[4] = { 0, 0, 0, 0 };\
|
||||
status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr<ippType>(), (int)src.step, dst.ptr<ippType>(),\
|
||||
(int)dst.step, roiSize, maskSize,\
|
||||
ippBorderType, borderValue, buffer);\
|
||||
}
|
||||
|
||||
if (stype == CV_8UC1)
|
||||
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C1R)
|
||||
else if (stype == CV_8UC3)
|
||||
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C3R)
|
||||
else if (stype == CV_8UC4)
|
||||
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C4R)
|
||||
else if (stype == CV_16UC1)
|
||||
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C1R)
|
||||
else if (stype == CV_16UC3)
|
||||
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C3R)
|
||||
else if (stype == CV_16UC4)
|
||||
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C4R)
|
||||
else if (stype == CV_16SC1)
|
||||
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C1R)
|
||||
else if (stype == CV_16SC3)
|
||||
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C3R)
|
||||
else if (stype == CV_16SC4)
|
||||
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C4R)
|
||||
else if (stype == CV_32FC1)
|
||||
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C1R)
|
||||
else if (stype == CV_32FC3)
|
||||
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C3R)
|
||||
else if (stype == CV_32FC4)
|
||||
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C4R)
|
||||
else
|
||||
return false;
|
||||
|
||||
if(status >= 0)
|
||||
return true;
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder);
|
||||
}
|
||||
catch (::ipp::IwException)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#undef IPP_FILTER_BOX_BORDER
|
||||
|
||||
return true;
|
||||
#else
|
||||
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2241,8 +2208,11 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
#define IPP_DISABLE_FILTERING_INMEM_PARTIAL 1 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
|
||||
#if IPP_VERSION_X100 == 201702 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
|
||||
#define IPP_GAUSSIANBLUR_PARALLEL 0
|
||||
#else
|
||||
#define IPP_GAUSSIANBLUR_PARALLEL 1
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_IPP_IW
|
||||
|
||||
@ -2266,8 +2236,8 @@ public:
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &m_src, &m_dst, m_kernelSize, m_sigma, m_border, &roi);
|
||||
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile);
|
||||
}
|
||||
catch(::ipp::IwException e)
|
||||
{
|
||||
@ -2295,7 +2265,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_VERSION_X100 <= 201702 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
|
||||
#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
|
||||
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
|
||||
return false; // bug on ia32
|
||||
#else
|
||||
@ -2313,17 +2283,15 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
{
|
||||
Mat src = _src.getMat();
|
||||
Mat dst = _dst.getMat();
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
::ipp::IwiBorderSize borderSize(::ipp::IwiSize(ippiSize(ksize)));
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
|
||||
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
|
||||
if(!ippBorder.m_borderType)
|
||||
if(!ippBorder)
|
||||
return false;
|
||||
|
||||
const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
|
||||
((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
|
||||
const int threads = ippiSuggestThreadsNum(iwDst, 2);
|
||||
if(!disableThreading && IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
|
||||
if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
|
||||
bool ok;
|
||||
ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
|
||||
|
||||
@ -2335,7 +2303,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
if(!ok)
|
||||
return false;
|
||||
} else {
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &iwSrc, &iwDst, ksize.width, (float) sigma1, ippBorder);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder);
|
||||
}
|
||||
}
|
||||
catch (::ipp::IwException ex)
|
||||
@ -3411,6 +3379,12 @@ static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize)
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Degradations for big kernel
|
||||
if(ksize > 7)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
{
|
||||
int bufSize;
|
||||
IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize);
|
||||
@ -4279,8 +4253,8 @@ public:
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &src, &dst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, borderType, &roi);
|
||||
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, dst.m_size.width, range.end - range.start);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, src, dst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), borderType, tile);
|
||||
}
|
||||
catch(::ipp::IwException)
|
||||
{
|
||||
@ -4318,13 +4292,11 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
::ipp::IwiBorderSize borderSize(radius);
|
||||
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
|
||||
if(!ippBorder.m_borderType)
|
||||
if(!ippBorder)
|
||||
return false;
|
||||
|
||||
const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
|
||||
((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
|
||||
const int threads = ippiSuggestThreadsNum(iwDst, 2);
|
||||
if(!disableThreading && IPP_BILATERAL_PARALLEL && threads > 1) {
|
||||
if(IPP_BILATERAL_PARALLEL && threads > 1) {
|
||||
bool ok = true;
|
||||
Range range(0, (int)iwDst.m_size.height);
|
||||
ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok);
|
||||
@ -4336,7 +4308,7 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do
|
||||
if(!ok)
|
||||
return false;
|
||||
} else {
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &iwSrc, &iwDst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, ippBorder);
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), ippBorder);
|
||||
}
|
||||
}
|
||||
catch (::ipp::IwException)
|
||||
|
@ -962,19 +962,18 @@ static bool ipp_getThreshVal_Otsu_8u( const unsigned char* _src, int step, Size
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP()
|
||||
|
||||
#if IPP_VERSION_X100 >= 810
|
||||
int ippStatus = -1;
|
||||
// Performance degradations
|
||||
#if IPP_VERSION_X100 >= 201800
|
||||
IppiSize srcSize = { size.width, size.height };
|
||||
CV_SUPPRESS_DEPRECATED_START
|
||||
ippStatus = CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh);
|
||||
CV_SUPPRESS_DEPRECATED_END
|
||||
|
||||
if(ippStatus >= 0)
|
||||
return true;
|
||||
if(CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh) < 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
#else
|
||||
CV_UNUSED(_src); CV_UNUSED(step); CV_UNUSED(size); CV_UNUSED(thresh);
|
||||
#endif
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -992,7 +991,7 @@ getThreshVal_Otsu_8u( const Mat& _src )
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
unsigned char thresh;
|
||||
CV_IPP_RUN(IPP_VERSION_X100 >= 810, ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
|
||||
CV_IPP_RUN_FAST(ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
|
||||
#endif
|
||||
|
||||
const int N = 256;
|
||||
|
@ -136,7 +136,7 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade )
|
||||
for( i = 0; i < cascade->count; i++ )
|
||||
{
|
||||
if( cascade->ipp_stages[i] )
|
||||
#if IPP_VERSION_X100 < 900
|
||||
#if IPP_VERSION_X100 < 900 && !IPP_DISABLE_HAAR
|
||||
ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] );
|
||||
#else
|
||||
cvFree(&cascade->ipp_stages[i]);
|
||||
|
@ -339,7 +339,7 @@ typedef struct ImplData
|
||||
// convert flags register to more handy variables
|
||||
void flagsToVars(int flags)
|
||||
{
|
||||
#if defined(HAVE_IPP_ICV_ONLY)
|
||||
#if defined(HAVE_IPP_ICV)
|
||||
ipp = 0;
|
||||
icv = ((flags&CV_IMPL_IPP) > 0);
|
||||
#else
|
||||
|
@ -3078,6 +3078,16 @@ void printVersionInfo(bool useStdOut)
|
||||
::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
|
||||
if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
const char * ipp_optimization = cv::ipp::useIPP()? "enabled" : "disabled";
|
||||
::testing::Test::RecordProperty("cv_ipp_optimization", ipp_optimization);
|
||||
if (useStdOut) std::cout << "Intel(R) IPP optimization: " << ipp_optimization << std::endl;
|
||||
|
||||
cv::String ippVer = cv::ipp::getIppVersion();
|
||||
::testing::Test::RecordProperty("cv_ipp_version", ippVer);
|
||||
if(useStdOut) std::cout << "Intel(R) IPP version: " << ippVer.c_str() << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user