ICV2017u3 package update;

- Optimizations set change. Now IPP integrations will provide code for SSE42, AVX2 and AVX512 (SKX) CPUs only. For HW below SSE42 IPP code is disabled.
- Performance regressions fixes for IPP code paths;
- cv::boxFilter integration improvement;
- cv::filter2D integration improvement;
This commit is contained in:
Pavel Vlasov 2017-08-17 14:57:58 +03:00
parent 87c27a074d
commit a57718e1ac
30 changed files with 727 additions and 584 deletions

View File

@ -6,7 +6,7 @@ project(${IPP_IW_LIBRARY})
ocv_include_directories(${IPP_INCLUDE_DIRS} ${IPP_IW_PATH}/include)
add_definitions(-DIW_BUILD)
if(HAVE_IPP_ICV_ONLY)
if(HAVE_IPP_ICV)
add_definitions(-DICV_BASE)
endif()
@ -21,7 +21,10 @@ add_library(${IPP_IW_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
if(UNIX)
if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function -Wno-missing-braces -Wno-missing-field-initializers")
endif()
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-self-assign")
endif()
endif()

View File

@ -2,37 +2,37 @@ function(download_ippicv root_var)
set(${root_var} "" PARENT_SCOPE)
# Commit SHA in the opencv_3rdparty repo
set(IPPICV_COMMIT "a62e20676a60ee0ad6581e217fe7e4bada3b95db")
set(IPPICV_COMMIT "dfe3162c237af211e98b8960018b564bc209261d")
# Define actual ICV versions
if(APPLE)
set(OPENCV_ICV_PLATFORM "macosx")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac")
if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u2_mac_intel64_20170418.tgz")
set(OPENCV_ICV_HASH "0c25953c99dbb499ff502485a9356d8d")
set(OPENCV_ICV_NAME "ippicv_2017u3_mac_intel64_general_20170822.tgz")
set(OPENCV_ICV_HASH "c1ebb5dfa5b7f54b0c44e1917805a463")
else()
set(OPENCV_ICV_NAME "ippicv_2017u2_mac_ia32_20170418.tgz")
set(OPENCV_ICV_HASH "5f225948f3f64067c681293c098d50d8")
set(OPENCV_ICV_NAME "ippicv_2017u3_mac_ia32_general_20170822.tgz")
set(OPENCV_ICV_HASH "49b05a669042753ae75895a445ebd612")
endif()
elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86"))
set(OPENCV_ICV_PLATFORM "linux")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx")
if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_intel64_20170418.tgz")
set(OPENCV_ICV_HASH "87cbdeb627415d8e4bc811156289fa3a")
set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_intel64_general_20170822.tgz")
set(OPENCV_ICV_HASH "4e0352ce96473837b1d671ce87f17359")
else()
set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170418.tgz")
set(OPENCV_ICV_HASH "f2cece00d802d4dea86df52ed095257e")
set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_ia32_general_20170822.tgz")
set(OPENCV_ICV_HASH "dcdb0ba4b123f240596db1840cd59a76")
endif()
elseif(WIN32 AND NOT ARM)
set(OPENCV_ICV_PLATFORM "windows")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win")
if(X86_64)
set(OPENCV_ICV_NAME "ippicv_2017u2_win_intel64_20170418.zip")
set(OPENCV_ICV_HASH "75060a0c662c0800f48995b7e9b085f6")
set(OPENCV_ICV_NAME "ippicv_2017u3_win_intel64_general_20170822.zip")
set(OPENCV_ICV_HASH "0421e642bc7ad741a2236d3ec4190bdd")
else()
set(OPENCV_ICV_NAME "ippicv_2017u2_win_ia32_20170418.zip")
set(OPENCV_ICV_HASH "60fcf3ccd9a2ebc9e432ffb5cb91638b")
set(OPENCV_ICV_NAME "ippicv_2017u3_win_ia32_general_20170822.zip")
set(OPENCV_ICV_HASH "8a7680ae352c192de2e2e34936164bd0")
endif()
else()
return()

View File

@ -255,7 +255,6 @@ OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) )
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) )
OCV_OPTION(WITH_IPP_A "Include Intel IPP_A support" OFF IF (MSVC OR X86 OR X86_64) )
OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT))
OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
@ -1311,15 +1310,16 @@ status(" Other third-party libraries:")
if(WITH_IPP AND HAVE_IPP)
status(" Use Intel IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]")
status(" at:" "${IPP_ROOT_DIR}")
if(NOT HAVE_IPP_ICV_ONLY)
if(NOT HAVE_IPP_ICV)
status(" linked:" BUILD_WITH_DYNAMIC_IPP THEN "dynamic" ELSE "static")
endif()
if(HAVE_IPP_IW)
if(BUILD_IPP_IW)
status(" Use Intel IPP IW:" "build (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
status(" Use Intel IPP IW:" "sources (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
else()
status(" Use Intel IPP IW:" "prebuilt binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
status(" Use Intel IPP IW:" "binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
endif()
status(" at:" "${IPP_IW_PATH}")
else()
status(" Use Intel IPP IW:" NO)
endif()
@ -1328,10 +1328,6 @@ else()
status(" Use Intel IPP IW:" WITH_IPP AND NOT HAVE_IPP AND HAVE_IPP_IW THEN "IPP not found or implicitly disabled" ELSE NO)
endif()
if(DEFINED WITH_IPP_A)
status(" Use Intel IPP Async:" HAVE_IPP_A THEN "YES" ELSE NO)
endif(DEFINED WITH_IPP_A)
if(DEFINED WITH_VA)
status(" Use VA:" HAVE_VA THEN "YES" ELSE NO)
endif(DEFINED WITH_VA)

View File

@ -11,13 +11,13 @@
#
# On return this will define:
#
# HAVE_IPP - True if Intel IPP found
# HAVE_IPP_ICV_ONLY - True if Intel IPP ICV version is available
# IPP_ROOT_DIR - root of IPP installation
# IPP_INCLUDE_DIRS - IPP include folder
# IPP_LIBRARIES - IPP libraries that are used by OpenCV
# IPP_VERSION_STR - string with the newest detected IPP version
# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
# HAVE_IPP - True if Intel IPP found
# HAVE_IPP_ICV - True if Intel IPP ICV version is available
# IPP_ROOT_DIR - root of IPP installation
# IPP_INCLUDE_DIRS - IPP include folder
# IPP_LIBRARIES - IPP libraries that are used by OpenCV
# IPP_VERSION_STR - string with the newest detected IPP version
# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
# IPP_VERSION_MINOR
# IPP_VERSION_BUILD
#
@ -25,7 +25,7 @@
#
unset(HAVE_IPP CACHE)
unset(HAVE_IPP_ICV_ONLY)
unset(HAVE_IPP_ICV)
unset(IPP_ROOT_DIR)
unset(IPP_INCLUDE_DIRS)
unset(IPP_LIBRARIES)
@ -79,7 +79,7 @@ endmacro()
macro(_ipp_not_supported)
message(STATUS ${ARGN})
unset(HAVE_IPP)
unset(HAVE_IPP_ICV_ONLY)
unset(HAVE_IPP_ICV)
unset(IPP_VERSION_STR)
return()
endmacro()
@ -92,7 +92,7 @@ macro(ipp_detect_version)
set(__msg)
if(EXISTS ${IPP_ROOT_DIR}/include/ippicv_redefs.h)
set(__msg " (ICV version)")
set(HAVE_IPP_ICV_ONLY 1)
set(HAVE_IPP_ICV 1)
elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h)
# nothing
else()
@ -118,7 +118,7 @@ macro(ipp_detect_version)
set(IPP_LIBRARY_DIR ${DIR})
endmacro()
if(APPLE AND NOT HAVE_IPP_ICV_ONLY)
if(APPLE AND NOT HAVE_IPP_ICV)
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib)
elseif(IPP_X64)
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64)
@ -128,7 +128,7 @@ macro(ipp_detect_version)
macro(_ipp_add_library name)
# dynamic linking is only supported for standalone version of Intel IPP
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
if (WIN32)
set(IPP_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX})
set(IPP_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
@ -141,7 +141,7 @@ macro(ipp_detect_version)
set(IPP_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
endif ()
if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
# When using dynamic libraries from standalone Intel IPP it is your responsibility to install those on the target system
list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
else ()
@ -167,14 +167,14 @@ macro(ipp_detect_version)
set(IPP_PREFIX "ipp")
if(${IPP_VERSION_STR} VERSION_LESS "8.0")
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 7.x
else ()
set(IPP_SUFFIX "_l") # static not threaded libs suffix Intel IPP 7.x
endif ()
else ()
if(WIN32)
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 8.x for Windows
else ()
set(IPP_SUFFIX "mt") # static not threaded libs suffix Intel IPP 8.x for Windows
@ -184,7 +184,7 @@ macro(ipp_detect_version)
endif()
endif()
if(HAVE_IPP_ICV_ONLY)
if(HAVE_IPP_ICV)
_ipp_add_library(icv)
else()
_ipp_add_library(cv)

View File

@ -27,6 +27,7 @@ macro(ippiw_debugmsg MESSAGE)
message(STATUS "${MESSAGE}")
endif()
endmacro()
file(TO_CMAKE_PATH "${IPPROOT}" IPPROOT)
# This function detects Intel IPP IW version by analyzing .h file
macro(ippiw_setup PATH BUILD)
@ -153,7 +154,7 @@ ippiw_setup("${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" 0)
# take Intel IPP IW from ICV package
if(NOT HAVE_IPP_ICV_ONLY AND BUILD_IPP_IW)
if(NOT HAVE_IPP_ICV AND BUILD_IPP_IW)
message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package")
set(TEMP_ROOT 0)
include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake")

View File

@ -103,7 +103,7 @@
/* Intel Integrated Performance Primitives */
#cmakedefine HAVE_IPP
#cmakedefine HAVE_IPP_ICV_ONLY
#cmakedefine HAVE_IPP_ICV
#cmakedefine HAVE_IPP_IW
/* Intel IPP Async */

View File

@ -693,8 +693,14 @@ CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, c
int line = 0);
CV_EXPORTS int getIppStatus();
CV_EXPORTS String getIppErrorLocation();
CV_EXPORTS_W bool useIPP();
CV_EXPORTS_W void setUseIPP(bool flag);
CV_EXPORTS_W bool useIPP();
CV_EXPORTS_W void setUseIPP(bool flag);
CV_EXPORTS_W String getIppVersion();
// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
CV_EXPORTS_W bool useIPP_NE();
CV_EXPORTS_W void setUseIPP_NE(bool flag);
} // ipp

View File

@ -194,8 +194,6 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define IPP_DISABLE_WARPAFFINE 1 // Different results
#define IPP_DISABLE_WARPPERSPECTIVE 1 // Different results
#define IPP_DISABLE_REMAP 1 // Different results
#define IPP_DISABLE_MORPH_ADV 1 // mask flipping in IPP
#define IPP_DISABLE_SORT_IDX 0 // different order in index tables
#define IPP_DISABLE_YUV_RGB 1 // accuracy difference
#define IPP_DISABLE_RGB_YUV 1 // breaks OCL accuracy tests
#define IPP_DISABLE_RGB_HSV 1 // breaks OCL accuracy tests
@ -205,21 +203,12 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference
#define IPP_DISABLE_HAAR 1 // improper integration/results
#define IPP_DISABLE_HOUGH 1 // improper integration/results
#define IPP_DISABLE_RESIZE_8U 1 // Incompatible accuracy
#define IPP_DISABLE_RESIZE_NEAREST 1 // Accuracy mismatch (max diff 1)
#define IPP_DISABLE_RESIZE_AREA 1 // Accuracy mismatch (max diff 1)
#define IPP_DISABLE_MINMAX_NAN_SSE42 1 // cv::minMaxIdx problem with NaN input
// Temporary disabled named IPP region. Performance
#define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations
#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653)
#define IPP_DISABLE_PERF_TRUE_DIST_MT 1 // cv::distanceTransform OpenCV MT performance is better
#define IPP_DISABLE_PERF_CANNY_MT 1 // cv::Canny OpenCV MT performance is better
#define IPP_DISABLE_PERF_HISTU32F_SSE42 1 // cv::calcHist optimizations problem
#define IPP_DISABLE_PERF_MORPH_SSE42 1 // cv::erode, cv::dilate optimizations problem
#define IPP_DISABLE_PERF_MAG_SSE42 1 // cv::magnitude optimizations problem
#define IPP_DISABLE_PERF_BOX16S_SSE42 1 // cv::boxFilter optimizations problem
#ifdef HAVE_IPP
#include "ippversion.h"
@ -229,7 +218,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE)
#ifdef HAVE_IPP_ICV_ONLY
#ifdef HAVE_IPP_ICV
#define ICV_BASE
#if IPP_VERSION_X100 >= 201700
#include "ippicv.h"
@ -241,6 +230,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#endif
#ifdef HAVE_IPP_IW
#include "iw++/iw.hpp"
#include "iw/iw_ll.h"
#endif
#if IPP_VERSION_X100 >= 201700
@ -251,6 +241,17 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
#define ippCPUID_AVX512_SKX (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ)
#define ippCPUID_AVX512_KNL (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER)
namespace cv
{
namespace ipp
{
CV_EXPORTS unsigned long long getIppTopFeatures(); // Returns top major enabled IPP feature flag
}
}
static inline IppiSize ippiSize(size_t width, size_t height)
{
IppiSize size = { (int)width, (int)height };
@ -322,7 +323,43 @@ static inline IppDataType ippiGetDataType(int depth)
(IppDataType)-1;
}
static inline int ippiSuggestThreadsNum(size_t width, size_t height, size_t elemSize, double multiplier)
{
int threads = cv::getNumThreads();
if(threads > 1 && height >= 64)
{
size_t opMemory = (int)(width*height*elemSize*multiplier);
int l2cache = 0;
#if IPP_VERSION_X100 >= 201700
ippGetL2CacheSize(&l2cache);
#endif
if(!l2cache)
l2cache = 1 << 18;
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
}
return 1;
}
static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
{
return ippiSuggestThreadsNum(image.cols, image.rows, image.elemSize(), multiplier);
}
#ifdef HAVE_IPP_IW
static inline bool ippiCheckAnchor(int x, int y, int kernelWidth, int kernelHeight)
{
if(x != ((kernelWidth-1)/2) || y != ((kernelHeight-1)/2))
return 0;
else
return 1;
}
static inline ::ipp::IwiSize ippiGetSize(const cv::Size & size)
{
return ::ipp::IwiSize((IwSize)size.width, (IwSize)size.height);
}
static inline IwiDerivativeType ippiGetDerivType(int dx, int dy, bool nvert)
{
return (dx == 1 && dy == 0) ? ((nvert)?iwiDerivNVerFirst:iwiDerivVerFirst) :
@ -341,10 +378,10 @@ static inline void ippiGetImage(const cv::Mat &src, ::ipp::IwiImage &dst)
cv::Point offset;
src.locateROI(origSize, offset);
inMemBorder.borderLeft = (Ipp32u)offset.x;
inMemBorder.borderTop = (Ipp32u)offset.y;
inMemBorder.borderRight = (Ipp32u)(origSize.width - src.cols - offset.x);
inMemBorder.borderBottom = (Ipp32u)(origSize.height - src.rows - offset.y);
inMemBorder.left = (IwSize)offset.x;
inMemBorder.top = (IwSize)offset.y;
inMemBorder.right = (IwSize)(origSize.width - src.cols - offset.x);
inMemBorder.bottom = (IwSize)(origSize.height - src.rows - offset.y);
}
dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step);
@ -357,7 +394,7 @@ static inline ::ipp::IwiImage ippiGetImage(const cv::Mat &src)
return image;
}
static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, IppiBorderSize &borderSize)
static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, ipp::IwiBorderSize &borderSize)
{
int inMemFlags = 0;
IppiBorderType border = ippiGetBorderType(ocvBorderType & ~cv::BORDER_ISOLATED);
@ -366,91 +403,60 @@ static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorder
if(!(ocvBorderType & cv::BORDER_ISOLATED))
{
if(image.m_inMemSize.borderLeft)
if(image.m_inMemSize.left)
{
if(image.m_inMemSize.borderLeft >= borderSize.borderLeft)
if(image.m_inMemSize.left >= borderSize.left)
inMemFlags |= ippBorderInMemLeft;
else
return (IppiBorderType)0;
}
else
borderSize.borderLeft = 0;
if(image.m_inMemSize.borderTop)
borderSize.left = 0;
if(image.m_inMemSize.top)
{
if(image.m_inMemSize.borderTop >= borderSize.borderTop)
if(image.m_inMemSize.top >= borderSize.top)
inMemFlags |= ippBorderInMemTop;
else
return (IppiBorderType)0;
}
else
borderSize.borderTop = 0;
if(image.m_inMemSize.borderRight)
borderSize.top = 0;
if(image.m_inMemSize.right)
{
if(image.m_inMemSize.borderRight >= borderSize.borderRight)
if(image.m_inMemSize.right >= borderSize.right)
inMemFlags |= ippBorderInMemRight;
else
return (IppiBorderType)0;
}
else
borderSize.borderRight = 0;
if(image.m_inMemSize.borderBottom)
borderSize.right = 0;
if(image.m_inMemSize.bottom)
{
if(image.m_inMemSize.borderBottom >= borderSize.borderBottom)
if(image.m_inMemSize.bottom >= borderSize.bottom)
inMemFlags |= ippBorderInMemBottom;
else
return (IppiBorderType)0;
}
else
borderSize.borderBottom = 0;
borderSize.bottom = 0;
}
else
borderSize.borderLeft = borderSize.borderRight = borderSize.borderTop = borderSize.borderBottom = 0;
borderSize.left = borderSize.right = borderSize.top = borderSize.bottom = 0;
return (IppiBorderType)(border|inMemFlags);
}
static inline ::ipp::IwValue ippiGetValue(const cv::Scalar &scalar)
static inline ::ipp::IwValueFloat ippiGetValue(const cv::Scalar &scalar)
{
return ::ipp::IwValue(scalar[0], scalar[1], scalar[2], scalar[3]);
return ::ipp::IwValueFloat(scalar[0], scalar[1], scalar[2], scalar[3]);
}
static inline int ippiSuggestThreadsNum(const ::ipp::IwiImage &image, double multiplier)
{
int threads = cv::getNumThreads();
if(image.m_size.height > threads)
{
size_t opMemory = (int)(image.m_step*image.m_size.height*multiplier);
int l2cache = 0;
#if IPP_VERSION_X100 >= 201700
ippGetL2CacheSize(&l2cache);
#endif
if(!l2cache)
l2cache = 1 << 18;
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
}
return 1;
return ippiSuggestThreadsNum(image.m_size.width, image.m_size.height, image.m_typeSize*image.m_channels, multiplier);
}
#endif
static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
{
int threads = cv::getNumThreads();
if(image.rows > threads)
{
size_t opMemory = (int)(image.total()*multiplier);
int l2cache = 0;
#if IPP_VERSION_X100 >= 201700
ippGetL2CacheSize(&l2cache);
#endif
if(!l2cache)
l2cache = 1 << 18;
return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
}
return 1;
}
// IPP temporary buffer helper
template<typename T>
class IppAutoBuffer

View File

@ -86,12 +86,6 @@ static MergeFunc getMergeFunc(int depth)
}
#ifdef HAVE_IPP
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopySplit(const void *pSrc, int srcStep, void* const pDstOrig[], int dstStep,
IppiSize size, int typeSize, int channels);
}
#endif
namespace cv {
static bool ipp_split(const Mat& src, Mat* mv, int channels)
@ -114,7 +108,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels)
return false;
}
return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels) >= 0;
return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels, 0) >= 0;
}
else
{
@ -132,7 +126,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels)
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels) < 0)
if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels, 0) < 0)
return false;
}
return true;
@ -273,12 +267,6 @@ void cv::split(InputArray _m, OutputArrayOfArrays _mv)
}
#ifdef HAVE_IPP
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopyMerge(const void* const pSrc[], int srcStep, void *pDst, int dstStep,
IppiSize size, int typeSize, int channels);
}
#endif
namespace cv {
static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
@ -301,7 +289,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
return false;
}
return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels) >= 0;
return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels, 0) >= 0;
}
else
{
@ -319,7 +307,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels) < 0)
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels, 0) < 0)
return false;
}
return true;
@ -820,16 +808,10 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
}
#ifdef HAVE_IPP
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopyMixed(const void *pSrc, int srcStep, int srcChannels, void *pDst, int dstStep, int dstChannels,
IppiSize size, int typeSize, int channelsShift);
}
#endif
namespace cv
{
static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
@ -840,14 +822,11 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
if(src.dims != dst.dims)
return false;
if(srcChannels == dstChannels || (srcChannels != 1 && dstChannels != 1))
return false;
if(src.dims <= 2)
{
IppiSize size = ippiSize(src.size());
return CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, src.ptr(), (int)src.step, srcChannels, dst.ptr(), (int)dst.step, dstChannels, size, (int)src.elemSize1(), channel) >= 0;
return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, channel, dst.ptr(), (int)dst.step, dstChannels, 0, size, (int)src.elemSize1()) >= 0;
}
else
{
@ -859,7 +838,45 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
if(CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, ptrs[0], 0, srcChannels, ptrs[1], 0, dstChannels, size, (int)src.elemSize1(), channel) < 0)
if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, channel, ptrs[1], 0, dstChannels, 0, size, (int)src.elemSize1()) < 0)
return false;
}
return true;
}
#else
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(channel);
return false;
#endif
}
static bool ipp_insertChannel(const Mat &src, Mat &dst, int channel)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
int srcChannels = src.channels();
int dstChannels = dst.channels();
if(src.dims != dst.dims)
return false;
if(src.dims <= 2)
{
IppiSize size = ippiSize(src.size());
return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, 0, dst.ptr(), (int)dst.step, dstChannels, channel, size, (int)src.elemSize1()) >= 0;
}
else
{
const Mat *arrays[] = {&dst, NULL};
uchar *ptrs[2] = {NULL};
NAryMatIterator it(arrays, ptrs);
IppiSize size = {(int)it.size, 1};
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, 0, ptrs[1], 0, dstChannels, channel, size, (int)src.elemSize1()) < 0)
return false;
}
return true;
@ -893,7 +910,7 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi)
_dst.create(src.dims, &src.size[0], depth);
Mat dst = _dst.getMat();
CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
CV_IPP_RUN_FAST(ipp_extractChannel(src, dst, coi))
mixChannels(&src, 1, &dst, 1, ch, 1);
}
@ -917,7 +934,7 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi)
Mat src = _src.getMat(), dst = _dst.getMat();
CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
CV_IPP_RUN_FAST(ipp_insertChannel(src, dst, coi))
mixChannels(&src, 1, &dst, 1, ch, 1);
}
@ -5152,7 +5169,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step);
iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
}
else
{
@ -5168,7 +5185,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
iwSrc.m_ptr = ptrs[0];
iwDst.m_ptr = ptrs[1];
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
}
}
}

View File

@ -49,18 +49,6 @@
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#ifdef HAVE_IPP_IW
extern "C" {
IW_DECL(IppStatus) llwiCopyMask(const void *pSrc, int srcStep, void *pDst, int dstStep,
IppiSize size, int typeSize, int channels, const Ipp8u *pMask, int maskStep);
IW_DECL(IppStatus) llwiSet(const double *pValue, void *pDst, int dstStep,
IppiSize size, IppDataType dataType, int channels);
IW_DECL(IppStatus) llwiSetMask(const double *pValue, void *pDst, int dstStep,
IppiSize size, IppDataType dataType, int channels, const Ipp8u *pMask, int maskStep);
IW_DECL(IppStatus) llwiCopyMakeBorder(const void *pSrc, IppSizeL srcStep, void *pDst, IppSizeL dstStep,
IppiSizeL size, IppDataType dataType, int channels, IppiBorderSize *pBorderSize, IppiBorderType border, const Ipp64f *pBorderVal);
}
#endif
namespace cv
{
@ -480,9 +468,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
if(dst.dims <= 2)
{
IppiSize size = ippiSize(dst.size());
IppDataType dataType = ippiGetDataType(dst.depth());
::ipp::IwValue s;
IppiSize size = ippiSize(dst.size());
IppDataType dataType = ippiGetDataType(dst.depth());
::ipp::IwValueFloat s;
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0;
@ -493,9 +481,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
uchar *ptrs[2] = {NULL};
NAryMatIterator it(arrays, ptrs);
IppiSize size = {(int)it.size, 1};
IppDataType dataType = ippiGetDataType(dst.depth());
::ipp::IwValue s;
IppiSize size = {(int)it.size, 1};
IppDataType dataType = ippiGetDataType(dst.depth());
::ipp::IwValueFloat s;
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
for( size_t i = 0; i < it.nplanes; i++, ++it)
@ -717,7 +705,7 @@ static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, &iwSrc, &iwDst, ippMode);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
}
catch(::ipp::IwException)
{
@ -1155,13 +1143,13 @@ static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom,
if(_src.dims > 2)
return false;
Rect dstRect(borderSize.borderLeft, borderSize.borderTop,
_dst.cols - borderSize.borderRight - borderSize.borderLeft,
_dst.rows - borderSize.borderBottom - borderSize.borderTop);
Rect dstRect(borderSize.left, borderSize.top,
_dst.cols - borderSize.right - borderSize.left,
_dst.rows - borderSize.bottom - borderSize.top);
Mat subDst = Mat(_dst, dstRect);
Mat *pSrc = &_src;
return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), &borderSize, borderType, &value[0]) >= 0;
return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), borderSize, borderType, &value[0]) >= 0;
#else
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right);
CV_UNUSED(_borderType); CV_UNUSED(value);

View File

@ -44,7 +44,8 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
CV_INSTRUMENT_REGION()
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
// SSE42 performance issues
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
CV_CPU_DISPATCH_MODES_ALL);
@ -55,7 +56,8 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
CV_INSTRUMENT_REGION()
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
// SSE42 performance issues
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len),
CV_CPU_DISPATCH_MODES_ALL);
@ -91,7 +93,6 @@ void sqrt32f(const float* src, float* dst, int len)
CV_INSTRUMENT_REGION()
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0);
CV_CPU_DISPATCH(sqrt32f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL);
@ -103,7 +104,6 @@ void sqrt64f(const double* src, double* dst, int len)
CV_INSTRUMENT_REGION()
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0);
CV_CPU_DISPATCH(sqrt64f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL);

View File

@ -3101,7 +3101,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
{
double r = 0;
#if ARITHM_USE_IPP
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
#endif
int i = 0;

View File

@ -4356,7 +4356,6 @@ template<typename T> static void sortIdx_( const Mat& src, Mat& dst, int flags )
}
#ifdef HAVE_IPP
#if !IPP_DISABLE_SORT_IDX
typedef IppStatus (CV_STDCALL *IppSortIndexFunc)(const void* pSrc, Ipp32s srcStrideBytes, Ipp32s *pDstIndx, int len, Ipp8u *pBuffer);
static IppSortIndexFunc getSortIndexFunc(int depth, bool sortDescending)
@ -4435,7 +4434,6 @@ static bool ipp_sortIdx( const Mat& src, Mat& dst, int flags )
return true;
}
#endif
#endif
typedef void (*SortFunc)(const Mat& src, Mat& dst, int flags);
}
@ -4472,9 +4470,8 @@ void cv::sortIdx( InputArray _src, OutputArray _dst, int flags )
_dst.release();
_dst.create( src.size(), CV_32S );
dst = _dst.getMat();
#if !IPP_DISABLE_SORT_IDX
CV_IPP_RUN_FAST(ipp_sortIdx(src, dst, flags));
#endif
static SortFunc tab[] =
{

View File

@ -257,7 +257,8 @@ struct CoreTLSData
//#ifdef HAVE_OPENCL
device(0), useOpenCL(-1),
//#endif
useIPP(-1)
useIPP(-1),
useIPP_NE(-1)
#ifdef HAVE_TEGRA_OPTIMIZATION
,useTegra(-1)
#endif
@ -272,7 +273,8 @@ struct CoreTLSData
ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run
int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized
//#endif
int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
int useIPP_NE; // 1 - use, 0 - do not use, -1 - auto/not initialized
#ifdef HAVE_TEGRA_OPTIMIZATION
int useTegra; // 1 - use, 0 - do not use, -1 - auto/not initialized
#endif

View File

@ -1294,6 +1294,12 @@ static bool ipp_countNonZero( Mat &src, int &res )
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// Poor performance of SSE42
if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif
Ipp32s count = 0;
int depth = src.depth();
@ -2531,15 +2537,16 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
#if IPP_VERSION_X100 >= 700
CV_INSTRUMENT_REGION_IPP()
#if IPP_DISABLE_MINMAX_NAN_SSE42
#if IPP_VERSION_X100 < 201800
// cv::minMaxIdx problem with NaN input
// Disable 32F processing only
if(src.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX))
if(src.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif
#if IPP_VERSION_X100 < 201801
// cv::minMaxIdx problem with index positions on AVX
#if IPP_VERSION_X100 < 201810
if(!mask.empty() && _maxIdx && ipp::getIppFeatures()&ippCPUID_AVX)
if(!mask.empty() && _maxIdx && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42)
return false;
#endif
@ -2550,8 +2557,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
IppiPoint minIdx = {-1, -1};
IppiPoint maxIdx = {-1, -1};
float *pMinVal = (_minVal)?&minVal:NULL;
float *pMaxVal = (_maxVal)?&maxVal:NULL;
float *pMinVal = (_minVal || _minIdx)?&minVal:NULL;
float *pMaxVal = (_maxVal || _maxIdx)?&maxVal:NULL;
IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL;
IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL;
@ -2564,6 +2571,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
ippMinMaxFun = ipp_minIdx_wrap;
else if(_maxVal && !_maxIdx && _minVal && !_minIdx)
ippMinMaxFun = ipp_minMax_wrap;
else if(!_maxVal && !_maxIdx && !_minVal && !_minIdx)
return false;
else
ippMinMaxFun = ipp_minMaxIndex_wrap;
}
@ -2582,8 +2591,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
*_maxVal = maxVal;
if(_minIdx)
{
#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMinIdx->x && !pMinIdx->y)
#else
if(status == ippStsNoOperation)
#endif
{
_minIdx[0] = -1;
_minIdx[1] = -1;
@ -2596,8 +2609,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI
}
if(_maxIdx)
{
#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMaxIdx->x && !pMaxIdx->y)
#else
if(status == ippStsNoOperation)
#endif
{
_maxIdx[0] = -1;
_maxIdx[1] = -1;

View File

@ -1909,55 +1909,146 @@ struct IPPInitSingleton
public:
IPPInitSingleton()
{
useIPP = true;
ippStatus = 0;
funcname = NULL;
filename = NULL;
linen = 0;
ippFeatures = 0;
useIPP = true;
useIPP_NE = false;
ippStatus = 0;
funcname = NULL;
filename = NULL;
linen = 0;
cpuFeatures = 0;
ippFeatures = 0;
ippTopFeatures = 0;
pIppLibInfo = NULL;
ippStatus = ippGetCpuFeatures(&cpuFeatures, NULL);
if(ippStatus < 0)
{
std::cerr << "ERROR: IPP cannot detect CPU features, IPP was disabled " << std::endl;
useIPP = false;
return;
}
ippFeatures = cpuFeatures;
bool unsupported = false;
const char* pIppEnv = getenv("OPENCV_IPP");
cv::String env = pIppEnv;
if(env.size())
{
env = env.toLowerCase();
if(env.substr(0, 2) == "ne")
{
useIPP_NE = true;
env = env.substr(3, env.size());
}
if(env == "disabled")
{
std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl;
useIPP = false;
}
#if IPP_VERSION_X100 >= 900
else if(env == "sse")
ippFeatures = ippCPUID_SSE;
else if(env == "sse2")
ippFeatures = ippCPUID_SSE2;
else if(env == "sse3")
ippFeatures = ippCPUID_SSE3;
else if(env == "ssse3")
ippFeatures = ippCPUID_SSSE3;
else if(env == "sse41")
ippFeatures = ippCPUID_SSE41;
else if(env == "sse42")
ippFeatures = ippCPUID_SSE42;
else if(env == "avx")
ippFeatures = ippCPUID_AVX;
{
if(!(cpuFeatures&ippCPUID_SSE42))
unsupported = true;
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42;
ippFeatures |= (cpuFeatures&ippCPUID_AES);
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
ippFeatures |= (cpuFeatures&ippCPUID_SHA);
}
else if(env == "avx2")
ippFeatures = ippCPUID_AVX2;
{
if(!(cpuFeatures&ippCPUID_AVX2))
unsupported = true;
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2;
ippFeatures |= (cpuFeatures&ippCPUID_AES);
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
ippFeatures |= (cpuFeatures&ippCPUID_F16C);
ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
ippFeatures |= (cpuFeatures&ippCPUID_MPX);
}
#if defined (_M_AMD64) || defined (__x86_64__)
else if(env == "avx512")
{
if(!(cpuFeatures&ippCPUID_AVX512F))
unsupported = true;
ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2|ippCPUID_AVX512F;
ippFeatures |= (cpuFeatures&ippCPUID_AES);
ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
ippFeatures |= (cpuFeatures&ippCPUID_F16C);
ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
ippFeatures |= (cpuFeatures&ippCPUID_MPX);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512CD);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512VL);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512BW);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512DQ);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512ER);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512PF);
ippFeatures |= (cpuFeatures&ippCPUID_AVX512VBMI);
}
#endif
else
std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << std::endl;
std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << ". Correct values are: disabled, sse42, avx2, avx512 (Intel64 only)" << std::endl;
}
if(unsupported)
{
std::cerr << "WARNING: selected IPP features are not supported by CPU. IPP was initialized with default features" << std::endl;
ippFeatures = cpuFeatures;
}
// Disable AVX1 since we don't track regressions for it. SSE42 will be used instead
if(cpuFeatures&ippCPUID_AVX && !(cpuFeatures&ippCPUID_AVX2))
ippFeatures &= ~ippCPUID_AVX;
// IPP integrations in OpenCV support only SSE4.2, AVX2 and AVX-512 optimizations.
if(!(
cpuFeatures&ippCPUID_AVX512F ||
cpuFeatures&ippCPUID_AVX2 ||
cpuFeatures&ippCPUID_SSE42
))
{
useIPP = false;
return;
}
IPP_INITIALIZER(ippFeatures)
ippFeatures = ippGetEnabledCpuFeatures();
// Detect top level optimizations to make comparison easier for optimizations dependent conditions
if(ippFeatures&ippCPUID_AVX512F)
{
if((ippFeatures&ippCPUID_AVX512_SKX) == ippCPUID_AVX512_SKX)
ippTopFeatures = ippCPUID_AVX512_SKX;
else if((ippFeatures&ippCPUID_AVX512_KNL) == ippCPUID_AVX512_KNL)
ippTopFeatures = ippCPUID_AVX512_KNL;
else
ippTopFeatures = ippCPUID_AVX512F; // Unknown AVX512 configuration
}
else if(ippFeatures&ippCPUID_AVX2)
ippTopFeatures = ippCPUID_AVX2;
else if(ippFeatures&ippCPUID_SSE42)
ippTopFeatures = ippCPUID_SSE42;
pIppLibInfo = ippiGetLibVersion();
}
bool useIPP;
public:
bool useIPP;
bool useIPP_NE;
int ippStatus; // 0 - all is ok, -1 - IPP functions failed
int ippStatus; // 0 - all is ok, -1 - IPP functions failed
const char *funcname;
const char *filename;
int linen;
Ipp64u ippFeatures;
Ipp64u cpuFeatures;
Ipp64u ippTopFeatures;
const IppLibraryVersion *pIppLibInfo;
};
static IPPInitSingleton& getIPPSingleton()
@ -1983,6 +2074,17 @@ int getIppFeatures()
#endif
}
unsigned long long getIppTopFeatures();
unsigned long long getIppTopFeatures()
{
#ifdef HAVE_IPP
return getIPPSingleton().ippTopFeatures;
#else
return 0;
#endif
}
void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line)
{
#ifdef HAVE_IPP
@ -2013,6 +2115,19 @@ String getIppErrorLocation()
#endif
}
String getIppVersion()
{
#ifdef HAVE_IPP
const IppLibraryVersion *pInfo = getIPPSingleton().pIppLibInfo;
if(pInfo)
return format("%s %s %s", pInfo->Name, pInfo->Version, pInfo->BuildDate);
else
return String("error");
#else
return String("disabled");
#endif
}
bool useIPP()
{
#ifdef HAVE_IPP
@ -2038,6 +2153,31 @@ void setUseIPP(bool flag)
#endif
}
bool useIPP_NE()
{
#ifdef HAVE_IPP
CoreTLSData* data = getCoreTlsData().get();
if(data->useIPP_NE < 0)
{
data->useIPP_NE = getIPPSingleton().useIPP_NE;
}
return (data->useIPP_NE > 0);
#else
return false;
#endif
}
void setUseIPP_NE(bool flag)
{
CoreTLSData* data = getCoreTlsData().get();
#ifdef HAVE_IPP
data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false;
#else
(void)flag;
data->useIPP_NE = false;
#endif
}
} // namespace ipp
} // namespace cv

View File

@ -95,7 +95,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst,
ippiGetImage(dy_, iwSrcDy);
ippiGetImage(dst, iwDst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, &iwSrcDx, &iwSrcDy, &iwDst, norm, low, high);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm));
}
catch (::ipp::IwException ex)
{
@ -121,7 +121,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst,
ippiGetImage(src, iwSrc);
ippiGetImage(dst, iwDst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, &iwSrc, &iwDst, ippFilterSobel, kernel, norm, low, high, ippBorderRepl);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl);
}
catch (::ipp::IwException)
{

View File

@ -469,11 +469,32 @@ static ippiGeneralFunc ippiRGB2GrayC4Tab[] =
0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
};
static ippiGeneralFunc ippiCopyP3C3RTab[] =
static IppStatus ippiGrayToRGB_C1C3R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize)
{
(ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0,
0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0
};
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
}
static IppStatus ippiGrayToRGB_C1C3R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
}
static IppStatus ippiGrayToRGB_C1C3R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
}
static IppStatus ippiGrayToRGB_C1C4R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize, Ipp8u aval)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
}
static IppStatus ippiGrayToRGB_C1C4R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize, Ipp16u aval)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
}
static IppStatus ippiGrayToRGB_C1C4R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize, Ipp32f aval)
{
return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
}
#if !IPP_DISABLE_RGB_XYZ
static ippiGeneralFunc ippiRGB2XYZTab[] =
@ -580,48 +601,31 @@ private:
Ipp32f coeffs[3];
};
template <typename T>
struct IPPGray2BGRFunctor
{
IPPGray2BGRFunctor(ippiGeneralFunc _func) :
ippiGrayToBGR(_func)
{
}
IPPGray2BGRFunctor(){}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
if (ippiGrayToBGR == 0)
return false;
const void* srcarray[3] = { src, src, src };
return CV_INSTRUMENT_FUN_IPP(ippiGrayToBGR, srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
return ippiGrayToRGB_C1C3R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows)) >= 0;
}
private:
ippiGeneralFunc ippiGrayToBGR;
};
template <typename T>
struct IPPGray2BGRAFunctor
{
IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) :
ippiColorConvertGeneral(_func1), ippiColorConvertReorder(_func2), depth(_depth)
IPPGray2BGRAFunctor()
{
alpha = ColorChannel<T>::max();
}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
if (ippiColorConvertGeneral == 0 || ippiColorConvertReorder == 0)
return false;
const void* srcarray[3] = { src, src, src };
Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
return false;
int order[4] = {0, 1, 2, 3};
return CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
return ippiGrayToRGB_C1C4R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows), alpha) >= 0;
}
private:
ippiGeneralFunc ippiColorConvertGeneral;
ippiReorderFunc ippiColorConvertReorder;
int depth;
T alpha;
};
struct IPPReorderGeneralFunctor
@ -9744,18 +9748,27 @@ void cvtGraytoBGR(const uchar * src_data, size_t src_step,
#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700
CV_IPP_CHECK()
{
bool ippres = false;
if(dcn == 3)
{
if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height,
IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
return;
if( depth == CV_8U )
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp8u>());
else if( depth == CV_16U )
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp16u>());
else
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp32f>());
}
else if(dcn == 4)
{
if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height,
IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
return;
if( depth == CV_8U )
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp8u>());
else if( depth == CV_16U )
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp16u>());
else
ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp32f>());
}
if(ippres)
return;
}
#endif

View File

@ -312,7 +312,7 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi
::ipp::IwiImage iwDstProc = iwDst;
::ipp::IwiBorderSize borderSize(maskSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType)
if(!ippBorder)
return false;
if(srcType == ipp8u && dstType == ipp8u)
@ -324,17 +324,17 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi
{
iwSrc -= borderSize;
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0, ippAlgHintFast);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0, ::ipp::IwiScaleParams(ippAlgHintFast));
iwSrcProc += borderSize;
}
if(useScharr)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
else
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
if(useScale)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta, ippAlgHintFast);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta, ::ipp::IwiScaleParams(ippAlgHintFast));
}
catch (::ipp::IwException)
{
@ -732,7 +732,7 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s
::ipp::IwiImage iwDstProc = iwDst;
::ipp::IwiBorderSize borderSize(maskSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType)
if(!ippBorder)
return false;
if(srcType == ipp8u && dstType == ipp8u)
@ -744,14 +744,14 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s
{
iwSrc -= borderSize;
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0);
iwSrcProc += borderSize;
}
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, &iwSrcProc, &iwDstProc, maskSize, ippBorder);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, iwSrcProc, iwDstProc, maskSize, ::ipp::IwDefault(), ippBorder);
if(useScale)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta);
}
catch (::ipp::IwException ex)

View File

@ -4560,170 +4560,88 @@ static bool replacementFilter2D(int stype, int dtype, int kernel_type,
}
#ifdef HAVE_IPP
typedef IppStatus(CV_STDCALL* IppiFilterBorder)(
const void* pSrc, int srcStep, void* pDst, int dstStep,
IppiSize dstRoiSize, IppiBorderType border, const void* borderValue,
const IppiFilterBorderSpec* pSpec, Ipp8u* pBuffer);
static IppiFilterBorder getIppFunc(int stype)
{
switch (stype)
{
case CV_8UC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C1R);
case CV_8UC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C3R);
case CV_8UC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C4R);
case CV_16UC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C1R);
case CV_16UC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C3R);
case CV_16UC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C4R);
case CV_16SC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C1R);
case CV_16SC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C3R);
case CV_16SC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C4R);
case CV_32FC1:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C1R);
case CV_32FC3:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C3R);
case CV_32FC4:
return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C4R);
default:
return 0;
}
}
template <int kdepth>
struct IppFilterTrait { };
template <>
struct IppFilterTrait<CV_16S>
{
enum { kernel_type_id = CV_16SC1 };
typedef Ipp16s kernel_type;
typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
inline static copy_fun_type get_copy_fun() { return ippiCopy_16s_C1R; }
inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
{
return ippiFilterBorderInit_16s(pKernel, kernelSize, divisor, dataType, numChannels, roundMode, pSpec);
}
};
template <>
struct IppFilterTrait<CV_32F>
{
enum { kernel_type_id = CV_32FC1 };
typedef Ipp32f kernel_type;
typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
inline static copy_fun_type get_copy_fun() { return ippiCopy_32f_C1R; }
inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
{
CV_UNUSED(divisor);
return ippiFilterBorderInit_32f(pKernel, kernelSize, dataType, numChannels, roundMode, pSpec);
}
};
template <int kdepth>
static bool ippFilter2D(int stype, int dtype,
uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,
uchar * kernel_data, size_t kernel_step,
int kernel_width, int kernel_height,
int anchor_x, int anchor_y,
double delta, int borderType, bool isSubmatrix)
static bool ippFilter2D(int stype, int dtype, int kernel_type,
uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,
int full_width, int full_height,
int offset_x, int offset_y,
uchar * kernel_data, size_t kernel_step,
int kernel_width, int kernel_height,
int anchor_x, int anchor_y,
double delta, int borderType,
bool isSubmatrix)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP();
typedef IppFilterTrait<kdepth> trait;
typedef typename trait::kernel_type kernel_type;
::ipp::IwiSize iwSize(width, height);
::ipp::IwiSize kernelSize(kernel_width, kernel_height);
IppDataType type = ippiGetDataType(CV_MAT_DEPTH(stype));
int channels = CV_MAT_CN(stype);
IppAutoBuffer<IppiFilterBorderSpec> spec;
IppAutoBuffer<Ipp8u> buffer;
IppAutoBuffer<kernel_type> kernelBuffer;
IppiBorderType ippBorderType;
int src_type;
Point anchor(anchor_x, anchor_y);
#if IPP_VERSION_X100 >= 900
Point ippAnchor((kernel_width - 1) / 2, (kernel_height - 1) / 2);
#else
Point ippAnchor(kernel_width >> 1, kernel_height >> 1);
#endif
bool isIsolated = (borderType & BORDER_ISOLATED) != 0;
int borderTypeNI = borderType & ~BORDER_ISOLATED;
ippBorderType = ippiGetBorderType(borderTypeNI);
int ddepth = CV_MAT_DEPTH(dtype);
int sdepth = CV_MAT_DEPTH(stype);
CV_UNUSED(isSubmatrix);
#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel
if(kernel_width == 1 && kernel_height == 1)
return false;
#endif
bool runIpp = true
&& (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE)
&& (sdepth == ddepth)
&& (getIppFunc(stype))
&& ((int)ippBorderType > 0)
&& (!isSubmatrix || isIsolated)
&& (std::fabs(delta - 0) < DBL_EPSILON)
&& (ippAnchor == anchor)
&& src_data != dst_data;
if (!runIpp)
#if IPP_VERSION_X100 < 201801
// Too big difference compared to OpenCV FFT-based convolution
if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7))
return false;
src_type = stype;
int cn = CV_MAT_CN(stype);
IppiSize kernelSize = { kernel_width, kernel_height };
IppDataType dataType = ippiGetDataType(ddepth);
IppDataType kernelType = ippiGetDataType(kdepth);
Ipp32s specSize = 0;
Ipp32s bufsize = 0;
IppiSize dstRoiSize = { width, height };
IppStatus status;
status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize);
if (status < 0)
// Poor optimization for big kernels
if(kernel_width > 7 || kernel_height > 7)
return false;
kernel_type* pKerBuffer = (kernel_type*)kernel_data;
size_t good_kernel_step = sizeof(kernel_type) * static_cast<size_t>(kernelSize.width);
#if IPP_VERSION_X100 >= 900
if (kernel_step != good_kernel_step) {
kernelBuffer.allocate((int)good_kernel_step * kernelSize.height);
status = trait::get_copy_fun()((kernel_type*)kernel_data, (int)kernel_step, kernelBuffer, (int)good_kernel_step, kernelSize);
if (status < 0)
return false;
pKerBuffer = kernelBuffer;
}
#else
kernelBuffer.Alloc(good_kernel_step * kernelSize.height);
Mat kerFlip(Size(kernelSize.width, kernelSize.height), trait::kernel_type_id, kernelBuffer, (int)good_kernel_step);
Mat kernel(Size(kernel_width, kernel_height), trait::kernel_type_id, kernel_data, kernel_step);
flip(kernel, kerFlip, -1);
pKerBuffer = kernelBuffer;
#endif
spec.allocate(specSize);
buffer.allocate(bufsize);
status = trait::runInit(pKerBuffer, kernelSize, 0, dataType, cn, ippRndFinancial, spec);
if (status < 0) {
if(src_data == dst_data)
return false;
if(stype != dtype)
return false;
if(kernel_type != CV_16SC1 && kernel_type != CV_32FC1)
return false;
// TODO: Implement offset for 8u, 16u
if(std::fabs(delta) >= DBL_EPSILON)
return false;
if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
return false;
try
{
::ipp::IwiBorderSize iwBorderSize;
::ipp::IwiBorderType iwBorderType;
::ipp::IwiImage iwKernel(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, (void*)kernel_data, kernel_step);
::ipp::IwiImage iwSrc(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)src_data, src_step);
::ipp::IwiImage iwDst(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)dst_data, dst_step);
iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
if(!iwBorderType)
return false;
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilter, iwSrc, iwDst, iwKernel, ::ipp::IwiFilterParams(1, 0, ippAlgHintNone, ippRndFinancial), iwBorderType);
}
catch(::ipp::IwException ex)
{
return false;
}
IppiFilterBorder ippiFilterBorder = getIppFunc(src_type);
kernel_type borderValue[4] = { 0, 0, 0, 0 };
status = CV_INSTRUMENT_FUN_IPP(ippiFilterBorder, src_data, (int)src_step, dst_data, (int)dst_step, dstRoiSize, ippBorderType, borderValue, spec, buffer);
if (status >= 0) {
CV_IMPL_ADD(CV_IMPL_IPP);
return true;
}
return true;
#else
CV_UNUSED(stype); CV_UNUSED(dtype); CV_UNUSED(kernel_type); CV_UNUSED(src_data); CV_UNUSED(src_step);
CV_UNUSED(dst_data); CV_UNUSED(dst_step); CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(full_width);
CV_UNUSED(full_height); CV_UNUSED(offset_x); CV_UNUSED(offset_y); CV_UNUSED(kernel_data); CV_UNUSED(kernel_step);
CV_UNUSED(kernel_width); CV_UNUSED(kernel_height); CV_UNUSED(anchor_x); CV_UNUSED(anchor_y); CV_UNUSED(delta);
CV_UNUSED(borderType); CV_UNUSED(isSubmatrix);
return false;
#endif
}
#endif
@ -4902,34 +4820,18 @@ void filter2D(int stype, int dtype, int kernel_type,
delta, borderType, isSubmatrix);
if (res)
return;
#ifdef HAVE_IPP
CV_IPP_CHECK()
{
res = false;
if (kernel_type == CV_32FC1) {
res = ippFilter2D<CV_32F>(stype, dtype,
src_data, src_step,
dst_data, dst_step,
width, height,
kernel_data, kernel_step,
kernel_width, kernel_height,
anchor_x, anchor_y,
delta, borderType, isSubmatrix);
}
else if (kernel_type == CV_16SC1) {
res = ippFilter2D<CV_16S>(stype, dtype,
src_data, src_step,
dst_data, dst_step,
width, height,
kernel_data, kernel_step,
kernel_width, kernel_height,
anchor_x, anchor_y,
delta, borderType, isSubmatrix);
}
if (res)
return;
}
#endif
CV_IPP_RUN_FAST(ippFilter2D(stype, dtype, kernel_type,
src_data, src_step,
dst_data, dst_step,
width, height,
full_width, full_height,
offset_x, offset_y,
kernel_data, kernel_step,
kernel_width, kernel_height,
anchor_x, anchor_y,
delta, borderType, isSubmatrix))
res = dftFilter2D(stype, dtype, kernel_type,
src_data, src_step,
dst_data, dst_step,

View File

@ -370,6 +370,18 @@ void crossCorr( const Mat& src, const Mat& templ, Mat& dst,
Point anchor=Point(0,0), double delta=0,
int borderType=BORDER_REFLECT_101 );
}
#ifdef HAVE_IPP_IW
static inline bool ippiCheckAnchor(cv::Point anchor, cv::Size ksize)
{
anchor = cv::normalizeAnchor(anchor, ksize);
if(anchor.x != ((ksize.width-1)/2) || anchor.y != ((ksize.height-1)/2))
return 0;
else
return 1;
}
#endif
#endif

View File

@ -1194,10 +1194,8 @@ static IppiHistogram_C1 getIppiHistogramFunction_C1(int type)
{
IppiHistogram_C1 ippFunction =
(type == CV_8UC1) ? (IppiHistogram_C1)ippiHistogram_8u_C1R :
#if IPP_VERSION_X100 >= 201700 || !(defined HAVE_IPP_ICV_ONLY)
(type == CV_16UC1) ? (IppiHistogram_C1)ippiHistogram_16u_C1R :
(type == CV_32FC1) ? (IppiHistogram_C1)ippiHistogram_32f_C1R :
#endif
NULL;
return ippFunction;
@ -1401,9 +1399,9 @@ static bool ipp_calchist(const Mat &image, Mat &hist, int histSize, const float*
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// No SSE42 optimization for uniform 32f
#if IPP_DISABLE_PERF_HISTU32F_SSE42
if(uniform && image.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX))
if(uniform && image.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif

View File

@ -2971,8 +2971,8 @@ public:
try
{
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(iwiResize, &m_src, &m_dst, &roi);
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(iwiResize, m_src, m_dst, ippBorderRepl, tile);
}
catch(::ipp::IwException)
{
@ -3007,7 +3007,7 @@ public:
{0, scaleY, shift+0.5*scaleY}
};
iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, ippWarpForward, inter, ::ipp::IwiWarpAffineParams(0, 0.75, 0), ippBorderRepl);
iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, iwTransForward, inter, ::ipp::IwiWarpAffineParams(0, 0, 0.75), ippBorderRepl);
m_ok = true;
}
@ -3021,8 +3021,8 @@ public:
try
{
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, &m_src, &m_dst, &roi);
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, m_src, m_dst, tile);
}
catch(::ipp::IwException)
{
@ -3053,23 +3053,28 @@ static bool ipp_resize(const uchar * src_data, size_t src_step, int src_width, i
if(ippInter < 0)
return false;
#if IPP_DISABLE_RESIZE_NEAREST
if(ippInter == ippNearest)
return false;
#endif
#if IPP_DISABLE_RESIZE_AREA
if(ippInter == ippSuper)
return false;
#endif
// Resize which doesn't match OpenCV exactly
if(!cv::ipp::useIPP_NE())
{
if(ippInter == ippNearest || ippInter == ippSuper || (ippDataType == ipp8u && ippInter == ippLinear))
return false;
}
if(ippInter != ippLinear && ippDataType == ipp64f)
return false;
// Accuracy mismatch is 1 but affects detectors greatly
#if IPP_DISABLE_RESIZE_8U
if(ippDataType == ipp8u && ippInter == ippLinear)
return false;
#if IPP_VERSION_X100 < 201801
// Degradations on int^2 linear downscale
if(ippDataType != ipp64f && ippInter == ippLinear && inv_scale_x < 1 && inv_scale_y < 1) // if downscale
{
int scale_x = (int)(1/inv_scale_x);
int scale_y = (int)(1/inv_scale_y);
if(1/inv_scale_x - scale_x < DBL_EPSILON && 1/inv_scale_y - scale_y < DBL_EPSILON) // if integer
{
if(!(scale_x&(scale_x-1)) && !(scale_y&(scale_y-1))) // if power of 2
return false;
}
}
#endif
bool affine = false;

View File

@ -571,6 +571,12 @@ static bool ipp_moments(Mat &src, Moments &m )
#if IPP_VERSION_X100 >= 900
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// Degradations for CV_8UC1
if(src.type() == CV_8UC1)
return false;
#endif
IppiSize roi = { src.cols, src.rows };
IppiPoint point = { 0, 0 };
int type = src.type();

View File

@ -1140,20 +1140,41 @@ static bool ippMorph(int op, int src_type, int dst_type,
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
// Problem with SSE42 optimizations
#if IPP_DISABLE_PERF_MORPH_SSE42
if(!(ipp::getIppFeatures()&ippCPUID_AVX))
#if IPP_VERSION_X100 < 201800
// Problem with SSE42 optimizations performance
if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
// Different mask flipping
if(op == MORPH_GRADIENT)
return false;
#endif
::ipp::IwAutoBuffer<Ipp8u> kernelTempBuffer;
#if IPP_VERSION_X100 < 201801
// Problem with AVX512 optimizations performance
if(cv::ipp::getIppTopFeatures()&ippCPUID_AVX512F)
return false;
// Multiple iterations on small mask is not effective in current integration
// Implace imitation for 3x3 kernel is not efficient
// Advanced morphology for small mask introduces degradations
if((iterations > 1 || src_data == dst_data || (op != MORPH_ERODE && op != MORPH_DILATE)) && kernel_width*kernel_height < 25)
return false;
// Skip even mask sizes for advanced morphology since they can produce out of spec writes
if((op != MORPH_ERODE && op != MORPH_DILATE) && (!(kernel_width&1) || !(kernel_height&1)))
return false;
#endif
IppAutoBuffer<Ipp8u> kernelTempBuffer;
::ipp::IwiBorderSize iwBorderSize;
::ipp::IwiBorderSize iwBorderSize2;
::ipp::IwiBorderType iwBorderType;
::ipp::IwiBorderType iwBorderType2;
::ipp::IwiImage iwMask;
::ipp::IwiImage iwInter;
::ipp::IwiSize initSize(width, height);
::ipp::IwiSize kernelSize(kernel_width, kernel_height);
::ipp::IwiPoint anchor(anchor_x, anchor_y);
IppDataType type = ippiGetDataType(CV_MAT_DEPTH(src_type));
int channels = CV_MAT_CN(src_type);
IwiMorphologyType morphType = ippiGetMorphologyType(op);
@ -1169,68 +1190,99 @@ static bool ippMorph(int op, int src_type, int dst_type,
if(src_type != dst_type)
return false;
if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
return false;
try
{
::ipp::IwiImage iwSrc(initSize, type, channels, ::ipp::IwiBorderSize(roi_x, roi_y, roi_width-roi_x-width, roi_height-roi_y-height), (void*)src_data, src_step);
::ipp::IwiImage iwDst(initSize, type, channels, ::ipp::IwiBorderSize(roi_x2, roi_y2, roi_width2-roi_x2-width, roi_height2-roi_y2-height), (void*)dst_data, dst_step);
::ipp::iwiFilterMorphology_GetBorderSize(morphType, kernelSize, iwBorderSize);
if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
if(!iwBorderType)
return false;
if(iterations > 1)
{
iwBorderSize.borderLeft /= 2;
iwBorderSize.borderTop /= 2;
iwBorderSize.borderRight /= 2;
iwBorderSize.borderBottom /= 2;
// Check dst border for second and later iterations
iwBorderSize2 = ::ipp::iwiSizeToBorderSize(kernelSize);
iwBorderType2 = ippiGetBorder(iwDst, borderType, iwBorderSize2);
if(!iwBorderType2)
return false;
}
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
if(!iwBorderType.m_borderType || ((iwBorderType.m_borderFlags&ippBorderInMem) && (iwBorderType.m_borderFlags&ippBorderInMem) != ippBorderInMem))
return false;
if(morphType != iwiMorphErode && morphType != iwiMorphDilate && morphType != iwiMorphGradient)
{
// For now complex morphology support only InMem around all sides. This will be improved later.
if((iwBorderType&ippBorderInMem) && (iwBorderType&ippBorderInMem) != ippBorderInMem)
return false;
if(iwBorderType.m_borderType == ippBorderConst)
if((iwBorderType&ippBorderInMem) == ippBorderInMem)
{
iwBorderType &= ~ippBorderInMem;
iwBorderType &= ippBorderFirstStageInMem;
}
}
if(iwBorderType.StripFlags() == ippBorderConst)
{
if(Vec<double, 4>(borderValue) == morphologyDefaultBorderValue())
iwBorderType.m_borderType = ippBorderDefault;
iwBorderType.SetType(ippBorderDefault);
else
iwBorderType.SetValue(borderValue[0], borderValue[1], borderValue[2], borderValue[3]);
}
if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
{
if((iwBorderType.m_borderFlags&ippBorderInMem) == ippBorderInMem)
iwBorderType.m_borderFlags = ippBorderFirstStageInMem;
}
// Test input parameters on dummy structures
{
::ipp::IwiImage testSrc(initSize, type, channels);
::ipp::IwiImage testDst(initSize, type, channels);
::ipp::IwiImage testMask(ippiSize(kernel_width, kernel_height), ipp8u, CV_MAT_CN(kernel_type));
::ipp::iwiFilterMorphology(&testSrc, &testDst, morphType, &testMask, &anchor, iwBorderType);
iwBorderType.m_value = ::ipp::IwValueFloat(borderValue[0], borderValue[1], borderValue[2], borderValue[3]);
}
iwMask.Init(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, kernel_data, kernel_step);
if((int)kernel_step != kernel_width || CV_MAT_DEPTH(kernel_type) != CV_8U)
::ipp::IwiImage iwMaskLoc = iwMask;
if(morphType == iwiMorphDilate)
{
kernelTempBuffer.Alloc(kernel_width*kernel_height);
::ipp::IwiImage iwMaskTmp(ippiSize(kernel_width, kernel_height), ipp8u, 1, 0, kernelTempBuffer, kernel_width);
::ipp::iwiScale(&iwMask, &iwMaskTmp, 1, 0);
iwMask = iwMaskTmp;
iwMaskLoc.Alloc(iwMask.m_size, iwMask.m_dataType, iwMask.m_channels);
::ipp::iwiMirror(iwMask, iwMaskLoc, ippAxsBoth);
iwMask = iwMaskLoc;
}
if(iterations > 1)
{
iwInter.Alloc(initSize, type, channels);
// OpenCV uses in mem border from dst for two and more iterations, so we need to keep this border in intermediate image
iwInter.Alloc(initSize, type, channels, iwBorderSize2);
::ipp::IwiImage *pSwap[2] = {&iwInter, &iwDst};
::ipp::IwiBorderType iterBorder = iwBorderType;
iterBorder.m_borderFlags = 0;
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
// Copy border only
{
if(iwBorderSize2.top)
{
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, -iwBorderSize2.top, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.top);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
if(iwBorderSize2.bottom)
{
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, iwDst.m_size.height, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.bottom);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
if(iwBorderSize2.left)
{
::ipp::IwiRoi borderRoi(-iwBorderSize2.left, 0, iwBorderSize2.left, iwDst.m_size.height);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
if(iwBorderSize2.right)
{
::ipp::IwiRoi borderRoi(iwDst.m_size.width, 0, iwBorderSize2.left, iwDst.m_size.height);
::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
}
}
iwBorderType2.SetType(iwBorderType);
for(int i = 0; i < iterations-1; i++)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, pSwap[i&0x1], pSwap[(i+1)&0x1], morphType, &iwMask, NULL, iterBorder);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, *pSwap[i&0x1], *pSwap[(i+1)&0x1], morphType, iwMask, ::ipp::IwDefault(), iwBorderType2);
if(iterations&0x1)
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
}
else
{
@ -1238,11 +1290,11 @@ static bool ippMorph(int op, int src_type, int dst_type,
{
iwInter.Alloc(initSize, type, channels);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
}
else
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwDst, morphType, &iwMask, NULL, iwBorderType);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwDst, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
}
}
catch(::ipp::IwException ex)
@ -1912,6 +1964,7 @@ static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op,
}
#endif
#define IPP_DISABLE_MORPH_ADV 1
#ifdef HAVE_IPP
#if !IPP_DISABLE_MORPH_ADV
namespace cv {

View File

@ -1729,80 +1729,47 @@ namespace cv
{
static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
// Problem with SSE42 optimization for 16s
#if IPP_DISABLE_PERF_BOX16S_SSE42
if(src.depth() == CV_16S && !(ipp::getIppFeatures()&ippCPUID_AVX))
#if IPP_VERSION_X100 < 201801
// Problem with SSE42 optimization for 16s and some 8u modes
if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5))))
return false;
// Other optimizations has some degradations too
if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5))))
return false;
#endif
int stype = src.type(), cn = CV_MAT_CN(stype);
IppiBorderType ippBorderType = ippiGetBorderType(borderType & ~BORDER_ISOLATED);
IppDataType ippType = ippiGetDataType(stype);
Point ocvAnchor, ippAnchor;
ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);
if(!normalize)
return false;
if(normalize && (!src.isSubmatrix() || borderType&BORDER_ISOLATED) && stype == dst.type() &&
(ippBorderType == ippBorderRepl || /* returns ippStsStepErr: Step value is not valid */
ippBorderType == ippBorderConst ||
ippBorderType == ippBorderMirror) && ocvAnchor == ippAnchor) // returns ippStsMaskSizeErr: mask has an illegal value
if(!ippiCheckAnchor(anchor, ksize))
return false;
try
{
IppStatus status;
Ipp32s bufSize = 0;
IppiSize roiSize = { dst.cols, dst.rows };
IppiSize maskSize = { ksize.width, ksize.height };
IppAutoBuffer<Ipp8u> buffer;
if(ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippType, cn, &bufSize) < 0)
::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiSize iwKSize = ippiGetSize(ksize);
::ipp::IwiBorderSize borderSize(iwKSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder)
return false;
buffer.allocate(bufSize);
#define IPP_FILTER_BOX_BORDER(ippType, flavor)\
{\
ippType borderValue[4] = { 0, 0, 0, 0 };\
status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr<ippType>(), (int)src.step, dst.ptr<ippType>(),\
(int)dst.step, roiSize, maskSize,\
ippBorderType, borderValue, buffer);\
}
if (stype == CV_8UC1)
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C1R)
else if (stype == CV_8UC3)
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C3R)
else if (stype == CV_8UC4)
IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C4R)
else if (stype == CV_16UC1)
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C1R)
else if (stype == CV_16UC3)
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C3R)
else if (stype == CV_16UC4)
IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C4R)
else if (stype == CV_16SC1)
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C1R)
else if (stype == CV_16SC3)
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C3R)
else if (stype == CV_16SC4)
IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C4R)
else if (stype == CV_32FC1)
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C1R)
else if (stype == CV_32FC3)
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C3R)
else if (stype == CV_32FC4)
IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C4R)
else
return false;
if(status >= 0)
return true;
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder);
}
catch (::ipp::IwException)
{
return false;
}
#undef IPP_FILTER_BOX_BORDER
return true;
#else
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType);
return false;
#endif
}
}
#endif
@ -2241,8 +2208,11 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
#endif
#ifdef HAVE_IPP
#define IPP_DISABLE_FILTERING_INMEM_PARTIAL 1 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
#if IPP_VERSION_X100 == 201702 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
#define IPP_GAUSSIANBLUR_PARALLEL 0
#else
#define IPP_GAUSSIANBLUR_PARALLEL 1
#endif
#ifdef HAVE_IPP_IW
@ -2266,8 +2236,8 @@ public:
try
{
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &m_src, &m_dst, m_kernelSize, m_sigma, m_border, &roi);
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile);
}
catch(::ipp::IwException e)
{
@ -2295,7 +2265,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 <= 201702 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
return false; // bug on ia32
#else
@ -2313,17 +2283,15 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
{
Mat src = _src.getMat();
Mat dst = _dst.getMat();
::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize(::ipp::IwiSize(ippiSize(ksize)));
::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType)
if(!ippBorder)
return false;
const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
const int threads = ippiSuggestThreadsNum(iwDst, 2);
if(!disableThreading && IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
bool ok;
ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
@ -2335,7 +2303,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
if(!ok)
return false;
} else {
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &iwSrc, &iwDst, ksize.width, (float) sigma1, ippBorder);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder);
}
}
catch (::ipp::IwException ex)
@ -3411,6 +3379,12 @@ static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize)
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// Degradations for big kernel
if(ksize > 7)
return false;
#endif
{
int bufSize;
IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize);
@ -4279,8 +4253,8 @@ public:
try
{
::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &src, &dst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, borderType, &roi);
::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, dst.m_size.width, range.end - range.start);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, src, dst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), borderType, tile);
}
catch(::ipp::IwException)
{
@ -4318,13 +4292,11 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do
::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize(radius);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder.m_borderType)
if(!ippBorder)
return false;
const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
const int threads = ippiSuggestThreadsNum(iwDst, 2);
if(!disableThreading && IPP_BILATERAL_PARALLEL && threads > 1) {
if(IPP_BILATERAL_PARALLEL && threads > 1) {
bool ok = true;
Range range(0, (int)iwDst.m_size.height);
ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok);
@ -4336,7 +4308,7 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do
if(!ok)
return false;
} else {
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &iwSrc, &iwDst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, ippBorder);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), ippBorder);
}
}
catch (::ipp::IwException)

View File

@ -962,19 +962,18 @@ static bool ipp_getThreshVal_Otsu_8u( const unsigned char* _src, int step, Size
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 810
int ippStatus = -1;
// Performance degradations
#if IPP_VERSION_X100 >= 201800
IppiSize srcSize = { size.width, size.height };
CV_SUPPRESS_DEPRECATED_START
ippStatus = CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh);
CV_SUPPRESS_DEPRECATED_END
if(ippStatus >= 0)
return true;
if(CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh) < 0)
return false;
return true;
#else
CV_UNUSED(_src); CV_UNUSED(step); CV_UNUSED(size); CV_UNUSED(thresh);
#endif
return false;
#endif
}
#endif
@ -992,7 +991,7 @@ getThreshVal_Otsu_8u( const Mat& _src )
#ifdef HAVE_IPP
unsigned char thresh;
CV_IPP_RUN(IPP_VERSION_X100 >= 810, ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
CV_IPP_RUN_FAST(ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
#endif
const int N = 256;

View File

@ -136,7 +136,7 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade )
for( i = 0; i < cascade->count; i++ )
{
if( cascade->ipp_stages[i] )
#if IPP_VERSION_X100 < 900
#if IPP_VERSION_X100 < 900 && !IPP_DISABLE_HAAR
ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] );
#else
cvFree(&cascade->ipp_stages[i]);

View File

@ -339,7 +339,7 @@ typedef struct ImplData
// convert flags register to more handy variables
void flagsToVars(int flags)
{
#if defined(HAVE_IPP_ICV_ONLY)
#if defined(HAVE_IPP_ICV)
ipp = 0;
icv = ((flags&CV_IMPL_IPP) > 0);
#else

View File

@ -3078,6 +3078,16 @@ void printVersionInfo(bool useStdOut)
::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
#endif
#ifdef HAVE_IPP
const char * ipp_optimization = cv::ipp::useIPP()? "enabled" : "disabled";
::testing::Test::RecordProperty("cv_ipp_optimization", ipp_optimization);
if (useStdOut) std::cout << "Intel(R) IPP optimization: " << ipp_optimization << std::endl;
cv::String ippVer = cv::ipp::getIppVersion();
::testing::Test::RecordProperty("cv_ipp_version", ippVer);
if(useStdOut) std::cout << "Intel(R) IPP version: " << ippVer.c_str() << std::endl;
#endif
}