From fc8e848a540ebade40f5092bd135c754f7c04c30 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 25 Dec 2017 21:06:52 +0000 Subject: [PATCH] Add basic plumbing for AVX512 support The opencv infrastructure mostly has the basics for supporting avx512 math functions, but it wasn't hooked up (likely due to lack of users) In order to compile the DNN functions for AVX512, a few things need to be hooked up and this patch does that Signed-off-by: Arjan van de Ven --- cmake/OpenCVCompilerOptimizations.cmake | 4 ++-- .../core/include/opencv2/core/cv_cpu_dispatch.h | 4 ++++ modules/core/include/opencv2/core/cv_cpu_helper.h | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 0c77f4505d..cc57cbdcae 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -26,7 +26,7 @@ # # CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (.avx2.cpp) -set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512 +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX512") list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) list(APPEND CPU_ALL_OPTIMIZATIONS VSX) list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) @@ -145,7 +145,7 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") endif() if(X86 OR X86_64) - ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2") # without AVX512 + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512") ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index 5261a41439..75f6ca9c7d 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -82,6 +82,10 @@ # include # define CV_AVX2 1 #endif +#ifdef CV_CPU_COMPILE_AVX512 +# include +# define CV_AVX512 1 +#endif #ifdef CV_CPU_COMPILE_FMA3 # define CV_FMA3 1 #endif diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h index 66a473f1e1..1c7dbaf852 100644 --- a/modules/core/include/opencv2/core/cv_cpu_helper.h +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -165,6 +165,21 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512 +# define CV_TRY_AVX512 1 +# define CV_CPU_HAS_SUPPORT_AVX512 1 +# define CV_CPU_CALL_AVX512(fn, args) return (opt_AVX512::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512 +# define CV_TRY_AVX512 1 +# define CV_CPU_HAS_SUPPORT_AVX512 (cv::checkHardwareSupport(CV_CPU_AVX512)) +# define CV_CPU_CALL_AVX512(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512) return (opt_AVX512::fn args) +#else +# define CV_TRY_AVX512 0 +# define CV_CPU_HAS_SUPPORT_AVX512 0 +# define CV_CPU_CALL_AVX512(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512(fn, args, mode, ...) CV_CPU_CALL_AVX512(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON # define CV_TRY_NEON 1 # define CV_CPU_HAS_SUPPORT_NEON 1