mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 14:06:27 +08:00
Add basic plumbing for AVX512 support
The opencv infrastructure mostly has the basics for supporting avx512 math functions, but it wasn't hooked up (likely due to lack of users) In order to compile the DNN functions for AVX512, a few things need to be hooked up and this patch does that Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
This commit is contained in:
parent
047764f476
commit
fc8e848a54
@ -26,7 +26,7 @@
|
||||
#
|
||||
# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (<name>.avx2.cpp)
|
||||
|
||||
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512
|
||||
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX512")
|
||||
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
|
||||
list(APPEND CPU_ALL_OPTIMIZATIONS VSX)
|
||||
list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
|
||||
@ -145,7 +145,7 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
|
||||
endif()
|
||||
|
||||
if(X86 OR X86_64)
|
||||
ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2") # without AVX512
|
||||
ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512")
|
||||
|
||||
ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp")
|
||||
ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp")
|
||||
|
@ -82,6 +82,10 @@
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX2 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX512 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_FMA3
|
||||
# define CV_FMA3 1
|
||||
#endif
|
||||
|
@ -165,6 +165,21 @@
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512
|
||||
# define CV_TRY_AVX512 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512 1
|
||||
# define CV_CPU_CALL_AVX512(fn, args) return (opt_AVX512::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512
|
||||
# define CV_TRY_AVX512 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512 (cv::checkHardwareSupport(CV_CPU_AVX512))
|
||||
# define CV_CPU_CALL_AVX512(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512) return (opt_AVX512::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512 0
|
||||
# define CV_CPU_CALL_AVX512(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512(fn, args, mode, ...) CV_CPU_CALL_AVX512(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
|
||||
# define CV_TRY_NEON 1
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 1
|
||||
|
Loading…
Reference in New Issue
Block a user