diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 0c77f4505d..cc57cbdcae 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -26,7 +26,7 @@ # # CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (.avx2.cpp) -set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512 +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX512") list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) list(APPEND CPU_ALL_OPTIMIZATIONS VSX) list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) @@ -145,7 +145,7 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") endif() if(X86 OR X86_64) - ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2") # without AVX512 + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512") ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index 5261a41439..75f6ca9c7d 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -82,6 +82,10 @@ # include # define CV_AVX2 1 #endif +#ifdef CV_CPU_COMPILE_AVX512 +# include +# define CV_AVX512 1 +#endif #ifdef CV_CPU_COMPILE_FMA3 # define CV_FMA3 1 #endif diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h index 66a473f1e1..1c7dbaf852 100644 --- a/modules/core/include/opencv2/core/cv_cpu_helper.h +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -165,6 +165,21 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512 +# define CV_TRY_AVX512 1 +# define CV_CPU_HAS_SUPPORT_AVX512 1 +# define CV_CPU_CALL_AVX512(fn, args) return (opt_AVX512::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512 +# define CV_TRY_AVX512 1 +# define CV_CPU_HAS_SUPPORT_AVX512 (cv::checkHardwareSupport(CV_CPU_AVX512)) +# define CV_CPU_CALL_AVX512(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512) return (opt_AVX512::fn args) +#else +# define CV_TRY_AVX512 0 +# define CV_CPU_HAS_SUPPORT_AVX512 0 +# define CV_CPU_CALL_AVX512(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512(fn, args, mode, ...) CV_CPU_CALL_AVX512(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON # define CV_TRY_NEON 1 # define CV_CPU_HAS_SUPPORT_NEON 1