mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
imgproc: enable GaussianBlur IPP parallel processing
This commit is contained in:
parent
a1b09a3734
commit
a3b109eca0
@ -208,8 +208,6 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); }
|
||||
#define IPP_DISABLE_HOUGH 1 // improper integration/results
|
||||
#define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7
|
||||
|
||||
#define IPP_DISABLE_GAUSSIANBLUR_PARALLEL 1 // not supported (2017u2 / 2017u3)
|
||||
|
||||
// Temporary disabled named IPP region. Performance
|
||||
#define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations
|
||||
#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653)
|
||||
|
@ -12,3 +12,9 @@ ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2)
|
||||
ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX)
|
||||
ocv_add_dispatched_file(undistort SSE2 AVX2)
|
||||
ocv_define_module(imgproc opencv_core WRAP java python js)
|
||||
|
||||
ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR)
|
||||
option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF)
|
||||
if(OPENCV_IPP_GAUSSIAN_BLUR)
|
||||
ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1")
|
||||
endif()
|
||||
|
@ -470,9 +470,14 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
|
||||
#endif
|
||||
|
||||
#if 0 //defined HAVE_IPP
|
||||
#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
|
||||
|
||||
#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
|
||||
#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
|
||||
#define IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH 1
|
||||
|
||||
// IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
|
||||
#if IPP_DISABLE_GAUSSIANBLUR_PARALLEL
|
||||
#if IPP_VERSION_X100 < 201900
|
||||
#define IPP_GAUSSIANBLUR_PARALLEL 0
|
||||
#else
|
||||
#define IPP_GAUSSIANBLUR_PARALLEL 1
|
||||
@ -555,6 +560,14 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
return false;
|
||||
|
||||
const int threads = ippiSuggestThreadsNum(iwDst, 2);
|
||||
|
||||
if (IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH && (threads == 1 && ksize.width > 25))
|
||||
return false;
|
||||
if (IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH && (threads == 1 && src.type() == CV_16SC4))
|
||||
return false;
|
||||
if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4))
|
||||
return false;
|
||||
|
||||
if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
|
||||
bool ok;
|
||||
ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
|
||||
@ -655,8 +668,6 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
CV_OVX_RUN(true,
|
||||
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
|
||||
|
||||
//CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
|
||||
|
||||
if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix()))
|
||||
{
|
||||
std::vector<ufixedpoint16> fkx, fky;
|
||||
@ -681,6 +692,11 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
}
|
||||
}
|
||||
|
||||
#if defined ENABLE_IPP_GAUSSIAN_BLUR
|
||||
// IPP is not bit-exact to OpenCV implementation
|
||||
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
|
||||
#endif
|
||||
|
||||
sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user