diff --git a/CMakeLists.txt b/CMakeLists.txt index 16e039d324..dadbf1dc31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,10 @@ if(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) endif() +if(POLICY CMP0051) + cmake_policy(SET CMP0051 NEW) +endif() + include(cmake/OpenCVUtils.cmake) # must go before the project command @@ -280,16 +284,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) -OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF ) @@ -299,6 +293,9 @@ OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function c OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF ) OCV_OPTION(ENABLE_GNU_STL_DEBUG "Enable GNU STL Debug mode (defines _GLIBCXX_DEBUG)" OFF IF ((NOT CMAKE_VERSION VERSION_LESS "2.8.11") AND CMAKE_COMPILER_IS_GNUCXX) ) OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX) +OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON ) +OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF ) + OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF ) @@ -499,6 +496,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL set(CMAKE_BUILD_TYPE Release) endif() +# --- Python Support --- +include(cmake/OpenCVDetectPython.cmake) + include(cmake/OpenCVCompilerOptions.cmake) @@ -576,9 +576,6 @@ else() unset(DOXYGEN_FOUND CACHE) endif() -# --- Python Support --- -include(cmake/OpenCVDetectPython.cmake) - # --- Java Support --- include(cmake/OpenCVDetectApacheAnt.cmake) if(ANDROID) @@ -867,6 +864,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio") status(" Configuration:" ${CMAKE_BUILD_TYPE}) endif() + +# ========================= CPU code generation mode ========================= +status("") +status(" CPU/HW features:") +status(" Baseline:" "${CPU_BASELINE_FINAL}") +if(NOT CPU_BASELINE STREQUAL CPU_BASELINE_FINAL) + status(" requested:" "${CPU_BASELINE}") +endif() +if(CPU_BASELINE_REQUIRE) + status(" required:" "${CPU_BASELINE_REQUIRE}") +endif() +if(CPU_BASELINE_DISABLE) + status(" disabled:" "${CPU_BASELINE_DISABLE}") +endif() +if(CPU_DISPATCH_FINAL OR CPU_DISPATCH) + status(" Dispatched code generation:" "${CPU_DISPATCH_FINAL}") + if(NOT CPU_DISPATCH STREQUAL CPU_DISPATCH_FINAL) + status(" requested:" "${CPU_DISPATCH}") + endif() + if(CPU_DISPATCH_REQUIRE) + status(" required:" "${CPU_DISPATCH_REQUIRE}") + endif() + foreach(OPT ${CPU_DISPATCH_FINAL}) + status(" ${OPT} (${CPU_${OPT}_USAGE_COUNT} files):" "+ ${CPU_DISPATCH_${OPT}_INCLUDED}") + endforeach() +endif() + # ========================== C/C++ options ========================== if(CMAKE_CXX_COMPILER_VERSION) set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})") diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake new file mode 100644 index 0000000000..b849f02b14 --- /dev/null +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -0,0 +1,651 @@ +# x86/x86-64 arch: +# SSE / SSE2 (always available on 64-bit CPUs) +# SSE3 / SSSE3 +# SSE4_1 / SSE4_2 / POPCNT +# AVX / AVX2 / AVX512 +# FMA3 + +# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag) +# CPU_{opt}_IMPLIES= +# CPU_{opt}_FORCE= - subset of "implies" list +# CPU_{opt}_FLAGS_ON="" +# CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum + +# Input variables: +# CPU_BASELINE= - preferred list of baseline optimizations +# CPU_DISPATCH= - preferred list of dispatched optimizations + +# Advanced input variables: +# CPU_BASELINE_REQUIRE= - list of required baseline optimizations +# CPU_DISPATCH_REQUIRE= - list of required dispatched optimizations +# CPU_BASELINE_DISABLE= - list of disabled baseline optimizations + +# Output variables: +# CPU_BASELINE_FINAL= - final list of enabled compiler optimizations +# CPU_DISPATCH_FINAL= - final list of dispatched optimizations +# +# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (_opt_avx2.cpp) + +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512 +list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) +list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) + +ocv_update(CPU_VFPV3_FEATURE_ALIAS "") + + +set(HELP_CPU_BASELINE "Specify list of enabled baseline CPU optimizations") +set(HELP_CPU_BASELINE_REQUIRE "Specify list of required baseline CPU optimizations") +set(HELP_CPU_BASELINE_DISABLE "Specify list of forbidden baseline CPU optimizations") +set(HELP_CPU_DISPATCH "Specify list of dispatched CPU optimizations") +set(HELP_CPU_DISPATCH_REQUIRE "Specify list of required dispatched CPU optimizations") + +foreach(var CPU_BASELINE CPU_BASELINE_REQUIRE CPU_BASELINE_DISABLE CPU_DISPATCH CPU_DISPATCH_REQUIRE) + if(DEFINED ${var}) + string(REPLACE "," ";" _list "${${var}}") + set(${var} "${_list}" CACHE STRING "${HELP_${var}}" FORCE) + endif() +endforeach() + +# process legacy flags +macro(ocv_optimization_process_obsolete_option legacy_flag OPT legacy_warn) + if(DEFINED ${legacy_flag}) + if(${legacy_warn}) + message(STATUS "WARNING: Option ${legacy_flag}='${${legacy_flag}}' is deprecated and should not be used anymore") + message(STATUS " Behaviour of this option is not backward compatible") + message(STATUS " Refer to 'CPU_BASELINE'/'CPU_DISPATCH' CMake options documentation") + endif() + if(${legacy_flag}) + if(NOT ";${CPU_BASELINE_REQUIRE};" MATCHES ";${OPT};") + set(CPU_BASELINE_REQUIRE "${CPU_BASELINE_REQUIRE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_REQUIRE}" FORCE) + endif() + else() + if(NOT ";${CPU_BASELINE_DISABLE};" MATCHES ";${OPT};") + set(CPU_BASELINE_DISABLE "${CPU_BASELINE_DISABLE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_DISABLE}" FORCE) + endif() + endif() + endif() +endmacro() +ocv_optimization_process_obsolete_option(ENABLE_SSE SSE ON) +ocv_optimization_process_obsolete_option(ENABLE_SSE2 SSE2 ON) +ocv_optimization_process_obsolete_option(ENABLE_SSE3 SSE3 ON) +ocv_optimization_process_obsolete_option(ENABLE_SSSE3 SSSE3 ON) +ocv_optimization_process_obsolete_option(ENABLE_SSE41 SSE4_1 ON) +ocv_optimization_process_obsolete_option(ENABLE_SSE42 SSE4_2 ON) +ocv_optimization_process_obsolete_option(ENABLE_POPCNT POPCNT ON) +ocv_optimization_process_obsolete_option(ENABLE_AVX AVX ON) +ocv_optimization_process_obsolete_option(ENABLE_AVX2 AVX2 ON) +ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON) + +ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF) +ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF) + + +macro(ocv_is_optimization_in_list resultvar check_opt) + set(__checked "") + set(__queue ${ARGN}) + set(${resultvar} 0) + while(__queue AND NOT ${resultvar}) + list(REMOVE_DUPLICATES __queue) + set(__queue_current ${__queue}) + set(__queue "") + foreach(OPT ${__queue_current}) + if("x${OPT}" STREQUAL "x${check_opt}") + set(${resultvar} 1) + break() + elseif(NOT ";${__checked};" MATCHES ";${OPT};") + list(APPEND __queue ${CPU_${OPT}_IMPLIES}) + endif() + list(APPEND __checked ${OPT}) + endforeach() + endwhile() +endmacro() + +macro(ocv_is_optimization_in_force_list resultvar check_opt) + set(__checked "") + set(__queue ${ARGN}) + set(${resultvar} 0) + while(__queue AND NOT ${resultvar}) + list(REMOVE_DUPLICATES __queue) + set(__queue_current ${__queue}) + set(__queue "") + foreach(OPT ${__queue_current}) + if(OPT STREQUAL "${check_opt}") + set(${resultvar} 1) + break() + elseif(NOT ";${__checked};" MATCHES ";${OPT};") + list(APPEND __queue ${CPU_${OPT}_FORCE}) + endif() + list(APPEND __checked ${OPT}) + endforeach() + endwhile() +endmacro() + +macro(ocv_append_optimization_flag var OPT) + if(CPU_${OPT}_FLAGS_CONFLICT) + string(REGEX REPLACE " ${CPU_${OPT}_FLAGS_CONFLICT}" "" ${var} " ${${var}}") + string(REGEX REPLACE "^ +" "" ${var} "${${var}}") + endif() + set(${var} "${${var}} ${CPU_${OPT}_FLAGS_ON}") +endmacro() + +# Support GCC -march=native or Intel Compiler -xHost flags +if(";${CPU_BASELINE};" MATCHES ";NATIVE;" OR ";${CPU_BASELINE};" MATCHES ";HOST;") + set(CPU_BASELINE_DETECT ON) + set(_add_native_flag ON) +elseif(";${CPU_BASELINE};" MATCHES ";DETECT;") + set(CPU_BASELINE_DETECT ON) +elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") + if(DEFINED CPU_BASELINE) + message(STATUS "CPU: Detected '-march=native' or '-xHost' compiler flag. Force CPU_BASELINE=DETECT.") + endif() + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") + set(CPU_BASELINE_DETECT ON) +endif() + +if(X86 OR X86_64) + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512") + + ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") + ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") + ocv_update(CPU_SSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse3.cpp") + ocv_update(CPU_SSSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_ssse3.cpp") + ocv_update(CPU_SSE4_1_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse41.cpp") + ocv_update(CPU_SSE4_2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse42.cpp") + ocv_update(CPU_POPCNT_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_popcnt.cpp") + ocv_update(CPU_AVX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx.cpp") + ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp") + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") + ocv_update(CPU_AVX512_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp") + + if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE) + ocv_update(CPU_AVX512_IMPLIES "AVX2") + ocv_update(CPU_AVX512_FORCE "") # Don't force other optimizations + ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16") + ocv_update(CPU_FMA3_IMPLIES "AVX2") + ocv_update(CPU_FMA3_FORCE "") # Don't force other optimizations + ocv_update(CPU_FP16_IMPLIES "AVX") + ocv_update(CPU_FP16_FORCE "") # Don't force other optimizations + ocv_update(CPU_AVX_IMPLIES "SSE4_2") + ocv_update(CPU_SSE4_2_IMPLIES "SSE4_1;POPCNT") + ocv_update(CPU_POPCNT_IMPLIES "SSE4_1") + ocv_update(CPU_POPCNT_FORCE "") # Don't force other optimizations + ocv_update(CPU_SSE4_1_IMPLIES "SSE3;SSSE3") + ocv_update(CPU_SSSE3_IMPLIES "SSE3") + ocv_update(CPU_SSE3_IMPLIES "SSE2") + ocv_update(CPU_SSE2_IMPLIES "SSE") + endif() + + if(CV_ICC) + macro(ocv_intel_compiler_optimization_option name unix_flags msvc_flags) + ocv_update(CPU_${name}_FLAGS_NAME "${name}") + if(MSVC) + set(enable_flags "${msvc_flags}") + set(flags_conflict "/arch:[^ ]+") + else() + set(enable_flags "${unix_flags}") + set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+") + endif() + ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}") + if(flags_conflict) + ocv_update(CPU_${name}_FLAGS_CONFLICT "${flags_conflict}") + endif() + endmacro() + ocv_intel_compiler_optimization_option(AVX2 "-march=core-avx2" "/arch:CORE-AVX2") + ocv_intel_compiler_optimization_option(FP16 "-mavx" "/arch:AVX") + ocv_intel_compiler_optimization_option(AVX "-mavx" "/arch:AVX") + ocv_intel_compiler_optimization_option(FMA3 "" "") + ocv_intel_compiler_optimization_option(POPCNT "" "") + ocv_intel_compiler_optimization_option(SSE4_2 "-msse4.2" "/arch:SSE4.2") + ocv_intel_compiler_optimization_option(SSE4_1 "-msse4.1" "/arch:SSE4.1") + ocv_intel_compiler_optimization_option(SSE3 "-msse3" "/arch:SSE3") + ocv_intel_compiler_optimization_option(SSSE3 "-mssse3" "/arch:SSSE3") + ocv_intel_compiler_optimization_option(SSE2 "-msse2" "/arch:SSE2") + if(NOT X86_64) # x64 compiler doesn't support /arch:sse + ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE") + endif() + #ocv_intel_compiler_optimization_option(AVX512 "-march=core-avx512") + elseif(CMAKE_COMPILER_IS_GNUCXX) + ocv_update(CPU_AVX2_FLAGS_ON "-mavx2") + ocv_update(CPU_FP16_FLAGS_ON "-mf16c") + ocv_update(CPU_AVX_FLAGS_ON "-mavx") + ocv_update(CPU_FMA3_FLAGS_ON "-mfma") + ocv_update(CPU_POPCNT_FLAGS_ON "-mpopcnt") + ocv_update(CPU_SSE4_2_FLAGS_ON "-msse4.2") + ocv_update(CPU_SSE4_1_FLAGS_ON "-msse4.1") + ocv_update(CPU_SSE3_FLAGS_ON "-msse3") + ocv_update(CPU_SSSE3_FLAGS_ON "-mssse3") + ocv_update(CPU_SSE2_FLAGS_ON "-msse2") + ocv_update(CPU_SSE_FLAGS_ON "-msse") + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") + ocv_update(CPU_AVX512_FLAGS_ON "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi") + endif() + elseif(MSVC) + ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2") + ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX") + if(NOT MSVC64) + # 64-bit MSVC compiler uses SSE/SSE2 by default + ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE") + ocv_update(CPU_SSE_SUPPORTED ON) + ocv_update(CPU_SSE2_FLAGS_ON "/arch:SSE2") + ocv_update(CPU_SSE2_SUPPORTED ON) + else() + ocv_update(CPU_SSE_SUPPORTED ON) + ocv_update(CPU_SSE2_SUPPORTED ON) + endif() + # Other instruction sets are supported by default since MSVC 2008 at least + else() + message(WARNING "TODO: Unsupported compiler") + endif() + + if(NOT DEFINED CPU_DISPATCH) + set(CPU_DISPATCH "SSE4_1;AVX;FP16;AVX2" CACHE STRING "${HELP_CPU_DISPATCH}") + endif() + + if(NOT DEFINED CPU_BASELINE) + if(X86_64) + set(CPU_BASELINE "SSSE3" CACHE STRING "${HELP_CPU_BASELINE}") + else() + set(CPU_BASELINE "SSE2" CACHE STRING "${HELP_CPU_BASELINE}") + endif() + endif() + +elseif(ARM OR AARCH64) + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") + if(NOT AARCH64) + ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16") + ocv_update(CPU_NEON_FLAGS_ON "-mfpu=neon") + ocv_update(CPU_VFPV3_FLAGS_ON "-mfpu=vfpv3") + ocv_update(CPU_FP16_FLAGS_ON "-mfpu=neon-fp16") + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") + else() + ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16") + ocv_update(CPU_NEON_FLAGS_ON "") + set(CPU_BASELINE "NEON" CACHE STRING "${HELP_CPU_BASELINE}") + endif() +endif() + +# Helper values for cmake-gui +set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") +set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}") +set_property(CACHE CPU_BASELINE PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS}) +set_property(CACHE CPU_DISPATCH PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS}) + +set(CPU_BASELINE_FLAGS "") + +set(CPU_BASELINE_FINAL "") +set(CPU_DISPATCH_FINAL "") + +macro(ocv_check_compiler_optimization OPT) + if(NOT DEFINED CPU_${OPT}_SUPPORTED) + if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE) + set(_varname "") + if(CPU_${OPT}_TEST_FILE) + set(__available 0) + if(CPU_BASELINE_DETECT) + set(_varname "HAVE_CPU_${OPT}_SUPPORT") + ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") + if(${_varname}) + list(APPEND CPU_BASELINE_FINAL ${OPT}) + set(__available 1) + endif() + endif() + if(NOT __available) + if(NOT "x${CPU_${OPT}_FLAGS_NAME}" STREQUAL "x") + set(_varname "HAVE_CPU_${CPU_${OPT}_FLAGS_NAME}") + set(_compile_flags "${CPU_BASELINE_FLAGS}") + ocv_append_optimization_flag(_compile_flags ${OPT}) + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") + elseif(NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "" "${CPU_${OPT}_TEST_FILE}") + else() + set(_varname "HAVE_CPU_${OPT}_SUPPORT") + set(_compile_flags "${CPU_BASELINE_FLAGS}") + ocv_append_optimization_flag(_compile_flags ${OPT}) + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") + endif() + endif() + else() + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "") + endif() + if(_varname AND ${_varname}) + set(CPU_${OPT}_SUPPORTED ON) + elseif(NOT CPU_${OPT}_SUPPORTED) + message(STATUS "${OPT} is not supported by C++ compiler") + endif() + else() + set(CPU_${OPT}_SUPPORTED ON) + endif() + endif() +endmacro() + +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) + set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE) + if(NOT DEFINED CPU_${OPT}_FORCE) + set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}") + endif() +endforeach() + +if(_add_native_flag) + set(_varname "HAVE_CPU_NATIVE_SUPPORT") + ocv_check_compiler_flag(CXX "-march=native" "${_varname}" "") + if(_varname) + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} -march=native") + else() + set(_varname "HAVE_CPU_HOST_SUPPORT") + if(MSVC) + set(_flag "/QxHost") + else() + set(_flag "-xHost") + endif() + ocv_check_compiler_flag(CXX "${_flag}" "${_varname}" "") + if(_varname) + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} ${flag}") + endif() + endif() +endif() + +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) + set(__is_disabled 0) + foreach(OPT2 ${CPU_BASELINE_DISABLE}) + ocv_is_optimization_in_list(__is_disabled ${OPT2} ${OPT}) + if(__is_disabled) + break() + endif() + endforeach() + if(__is_disabled) + set(__is_from_baseline 0) + else() + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE}) + if(NOT __is_from_baseline) + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE}) + endif() + endif() + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH_REQUIRE}) + if(NOT __is_from_dispatch) + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH}) + endif() + if(__is_from_dispatch OR __is_from_baseline OR CPU_BASELINE_DETECT) + ocv_check_compiler_optimization(${OPT}) + endif() + if(CPU_BASELINE_DETECT AND NOT __is_from_baseline AND NOT __is_disabled) + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_FINAL}) + endif() + if(CPU_${OPT}_SUPPORTED) + if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline) + list(APPEND CPU_DISPATCH_FINAL ${OPT}) + elseif(__is_from_baseline AND NOT CPU_BASELINE_DETECT) + list(APPEND CPU_BASELINE_FINAL ${OPT}) + ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT}) + endif() + endif() +endforeach() + +foreach(OPT ${CPU_BASELINE_REQUIRE}) + if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") + message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})") + endif() +endforeach() + +foreach(OPT ${CPU_BASELINE}) + if(OPT STREQUAL "DETECT" OR OPT STREQUAL "HOST" OR OPT STREQUAL "NATIVE") + # nothing + elseif(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") + message(STATUS "Optimization ${OPT} is not available, skipped") + endif() +endforeach() + +foreach(OPT ${CPU_DISPATCH_REQUIRE}) + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};") + # OK + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") + message(SEND_ERROR "Dispatched optimization ${OPT} is in baseline list (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})") + else() + message(SEND_ERROR "Required dispatch optimization is not supported: ${OPT} (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})") + endif() +endforeach() + +foreach(OPT ${CPU_DISPATCH}) + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};") + # OK + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") + # OK + else() + message(STATUS "Dispatch optimization ${OPT} is not available, skipped") + endif() +endforeach() + +#message(STATUS "CPU_BASELINE_FINAL=${CPU_BASELINE_FINAL}") +#message(STATUS "CPU_DISPATCH_FINAL=${CPU_DISPATCH_FINAL}") + +#if(CPU_DISPATCH_FINAL AND NOT PYTHON_DEFAULT_EXECUTABLE) +# message(FATAL_ERROR "Python is required for CPU dispatched optimization support") +#endif() + +macro(ocv_compiler_optimization_options) + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${CPU_BASELINE_FLAGS}") + if(NOT __flags STREQUAL CACHED_CPU_BASELINE_FLAGS) + set(CACHED_CPU_BASELINE_FLAGS "${__flags}" CACHE INTERNAL "" FORCE) + ocv_clear_vars(HAVE_CPU_BASELINE_FLAGS) + endif() + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_BASELINE_FLAGS) + if(NOT HAVE_CPU_BASELINE_FLAGS) + message(FATAL_ERROR "Compiler doesn't support baseline optimization flags: ${CPU_BASELINE_FLAGS}") + endif() + add_extra_compiler_option_force("${CPU_BASELINE_FLAGS}") + + foreach(OPT ${CPU_DISPATCH_FINAL}) + set(__dispatch_flags "") + set(__dispatch_definitions "") + set(__dispatch_opts "") + set(__dispatch_opts_force "") + foreach(OPT2 ${CPU_KNOWN_OPTIMIZATIONS}) + if(NOT CPU_${OPT2}_SUPPORTED) + #continue() + else() + ocv_is_optimization_in_list(__is_from_baseline ${OPT2} ${CPU_BASELINE_FINAL}) + if(NOT __is_from_baseline) + ocv_is_optimization_in_list(__is_active ${OPT2} ${OPT}) + if(__is_active) + ocv_append_optimization_flag(__dispatch_flags ${OPT2}) + list(APPEND __dispatch_definitions "CV_CPU_COMPILE_${OPT2}=1") + list(APPEND __dispatch_opts "${OPT2}") + endif() + ocv_is_optimization_in_force_list(__is_force ${OPT2} ${OPT}) + if(__is_force) + list(APPEND __dispatch_opts_force "${OPT2}") + endif() + endif() + endif() + endforeach() + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${__dispatch_flags}") + if(NOT __flags STREQUAL CACHED_CPU_DISPATCH_${OPT}_FLAGS) + set(CACHED_CPU_DISPATCH_${OPT}_FLAGS "${__flags}" CACHE INTERNAL "" FORCE) + ocv_clear_vars(HAVE_CPU_DISPATCH_FLAGS_${OPT}) + endif() + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_DISPATCH_FLAGS_${OPT}) + if(NOT HAVE_CPU_DISPATCH_FLAGS_${OPT}) + message(FATAL_ERROR "Compiler doesn't support optimization flags for ${OPT} dispatch mode: ${__dispatch_flags}") + endif() + set(CPU_DISPATCH_FLAGS_${OPT} "${__dispatch_flags}") + set(CPU_DISPATCH_DEFINITIONS_${OPT} "${__dispatch_definitions}") + set(CPU_DISPATCH_${OPT}_INCLUDED "${__dispatch_opts}") + set(CPU_DISPATCH_${OPT}_FORCED "${__dispatch_opts_force}") + endforeach() + + if(ENABLE_POWERPC) + add_extra_compiler_option("-mcpu=G3 -mtune=G5") + endif() + if(ARM) + add_extra_compiler_option("-mfp16-format=ieee") + endif(ARM) + if(ENABLE_NEON) + add_extra_compiler_option("-mfpu=neon") + endif() + if(ENABLE_VFPV3 AND NOT ENABLE_NEON) + add_extra_compiler_option("-mfpu=vfpv3") + endif() +endmacro() + +macro(ocv_compiler_optimization_options_finalize) + if(CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) + if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4) + if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)") + add_extra_compiler_option(-mfpmath=sse) # !! important - be on the same wave with x64 compilers + else() + add_extra_compiler_option(-mfpmath=387) + endif() + endif() + endif() + + if(MSVC) + # Generate Intrinsic Functions + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi") + + if((X86 OR X86_64) AND CMAKE_SIZEOF_VOID_P EQUAL 4 AND ";${CPU_BASELINE_FINAL};" MATCHES ";SSE;") + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers + endif() + endif(MSVC) +endmacro() + +macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME TARGET_BASE_NAME) + set(__result "") + set(__result_libs "") + foreach(OPT ${CPU_DISPATCH_FINAL}) + set(__result_${OPT} "") + endforeach() + foreach(fname ${${SOURCES_VAR_NAME}}) + string(TOLOWER "${fname}" fname_LOWER) + if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$") + if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) + message(STATUS "Excluding from source files list: ${fname}") + #continue() + else() + set(__opt_found 0) + foreach(OPT ${CPU_BASELINE_FINAL}) + string(TOLOWER "${OPT}" OPT_LOWER) + if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$") +#message("${fname} BASELINE-${OPT}") + set(__opt_found 1) + list(APPEND __result "${fname}") + break() + endif() + endforeach() + foreach(OPT ${CPU_DISPATCH_FINAL}) + foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED}) + string(TOLOWER "${OPT2}" OPT2_LOWER) + if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$") + list(APPEND __result_${OPT} "${fname}") + math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1") + set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE) +#message("${fname} ${OPT}") +#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}") +#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}") +#message(" ${CPU_DISPATCH_FLAGS_${OPT}}") + set(__opt_found 1) + break() + endif() + endforeach() + if(__opt_found) + set(__opt_found 1) + break() + endif() + endforeach() + if(NOT __opt_found) + message(STATUS "Excluding from source files list: ${fname}") + endif() + endif() + else() + list(APPEND __result "${fname}") + endif() + endforeach() + + foreach(OPT ${CPU_DISPATCH_FINAL}) + if(__result_${OPT}) +#message("${OPT}: ${__result_${OPT}}") + if(CMAKE_GENERATOR MATCHES "^Visual") + # extra flags are added before common flags, so switching between optimizations doesn't work correctly + # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required) + add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}}) + ocv_append_dependant_targets(${TARGET_BASE_NAME} ${TARGET_BASE_NAME}_${OPT}) + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}") + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}") + #list(APPEND __result_libs ${TARGET_BASE_NAME}_${OPT}) + list(APPEND __result "$") + else() + foreach(fname ${__result_${OPT}}) + set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}") + set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}") + endforeach() + list(APPEND __result ${__result_${OPT}}) + endif() + endif() + endforeach() + set(${SOURCES_VAR_NAME} "${__result}") + list(APPEND ${LIBS_VAR_NAME} ${__result_libs}) +endmacro() + +macro(ocv_compiler_optimization_fill_cpu_config) + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "") + foreach(OPT ${CPU_BASELINE_FINAL}) + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} +#define CV_CPU_COMPILE_${OPT} 1 +#define CV_CPU_BASELINE_COMPILE_${OPT} 1 +") + endforeach() + + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} +#define CV_CPU_BASELINE_FEATURES 0 \\") + foreach(OPT ${CPU_BASELINE_FINAL}) + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} + , CV_CPU_${OPT} \\") + endif() + endforeach() + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}\n") + + set(__dispatch_modes "") + foreach(OPT ${CPU_DISPATCH_FINAL}) + list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT}) + endforeach() + list(REMOVE_DUPLICATES __dispatch_modes) + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "") + foreach(OPT ${__dispatch_modes}) + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE} +#define CV_CPU_DISPATCH_COMPILE_${OPT} 1") + endforeach() + + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n") + foreach(OPT ${CPU_ALL_OPTIMIZATIONS}) + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE} +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT} +# define CV_CPU_HAS_SUPPORT_${OPT} 1 +# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT} +# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT})) +# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_${OPT} 0 +# define CV_CPU_CALL_${OPT}(...) +#endif +") + endif() + endforeach() + + set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h") + if(EXISTS "${__file}") + file(READ "${__file}" __content) + endif() + if(__content STREQUAL OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE) + #message(STATUS "${__file} contains same content") + else() + file(WRITE "${__file}" "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}") + message(WARNING "${__file} is updated") + endif() +endmacro() + +if(CV_DISABLE_OPTIMIZATION OR CV_ICC) + ocv_update(CV_ENABLE_UNROLLED 0) +else() + ocv_update(CV_ENABLE_UNROLLED 1) +endif() diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 5bb0479113..0eb68b656e 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -31,24 +31,21 @@ endif() if(MINGW OR (X86 AND UNIX AND NOT APPLE)) # mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead if(CMAKE_COMPILER_IS_GNUCXX) - foreach(flags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) - string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") - endforeach() - endif() - - if(CMAKE_COMPILER_IS_GNUCC) - foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG) + foreach(flags + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG) string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") endforeach() endif() endif() if(MSVC) - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT}") + string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS) + string(STRIP "${CMAKE_CXX_FLAGS_INIT}" CMAKE_CXX_FLAGS_INIT) if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT) # override cmake default exception handling option - string(REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "/EHsc" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE) endif() endif() @@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "") set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "") macro(add_extra_compiler_option option) - if(CMAKE_BUILD_TYPE) - set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) - endif() ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}") if(${_varname}) set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}") @@ -77,6 +71,12 @@ macro(add_extra_compiler_option option) endif() endmacro() +macro(add_extra_compiler_option_force option) + set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}") + set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} ${option}") +endmacro() + + # Gets environment variable and puts its value to the corresponding preprocessor definition # Useful for WINRT that has no access to environment variables macro(add_env_definitions option) @@ -102,7 +102,11 @@ if(MINGW) endif() if(CV_ICC AND NOT ENABLE_FAST_MATH) - add_extra_compiler_option("-fp-model precise") + if(MSVC) + add_extra_compiler_option("/fp:precise") + else() + add_extra_compiler_option("-fp-model precise") + endif() endif() if(CMAKE_COMPILER_IS_GNUCXX) @@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) endif() # We need pthread's - if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) + if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) # TODO add_extra_compiler_option(-pthread) endif() @@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(ENABLE_FAST_MATH) add_extra_compiler_option(-ffast-math) endif() - if(ENABLE_POWERPC) - add_extra_compiler_option("-mcpu=G3 -mtune=G5") - endif() - if(ENABLE_SSE) - add_extra_compiler_option(-msse) - endif() - if(ENABLE_SSE2) - add_extra_compiler_option(-msse2) - elseif(X86 OR X86_64) - add_extra_compiler_option(-mno-sse2) - endif() - if(ARM) - add_extra_compiler_option("-mfp16-format=ieee") - endif(ARM) - if(ENABLE_NEON) - add_extra_compiler_option("-mfpu=neon") - endif() - if(ENABLE_VFPV3 AND NOT ENABLE_NEON) - add_extra_compiler_option("-mfpu=vfpv3") - endif() - - # SSE3 and further should be disabled under MingW because it generates compiler errors - if(NOT MINGW) - if(ENABLE_AVX) - add_extra_compiler_option(-mavx) - elseif(X86 OR X86_64) - add_extra_compiler_option(-mno-avx) - endif() - if(ENABLE_AVX2) - add_extra_compiler_option(-mavx2) - - if(ENABLE_FMA3) - add_extra_compiler_option(-mfma) - endif() - endif() - - # GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed. - if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx") - if(ENABLE_SSE3) - add_extra_compiler_option(-msse3) - elseif(X86 OR X86_64) - add_extra_compiler_option(-mno-sse3) - endif() - - if(ENABLE_SSSE3) - add_extra_compiler_option(-mssse3) - elseif(X86 OR X86_64) - add_extra_compiler_option(-mno-ssse3) - endif() - - if(ENABLE_SSE41) - add_extra_compiler_option(-msse4.1) - elseif(X86 OR X86_64) - add_extra_compiler_option(-mno-sse4.1) - endif() - - if(ENABLE_SSE42) - add_extra_compiler_option(-msse4.2) - elseif(X86 OR X86_64) - add_extra_compiler_option(-mno-sse4.2) - endif() - - if(ENABLE_POPCNT) - add_extra_compiler_option(-mpopcnt) - endif() - endif() - endif(NOT MINGW) - - if(X86 OR X86_64) - if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4) - if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)") - add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers - else() - add_extra_compiler_option(-mfpmath=387) - endif() - endif() - endif() # Profiling? if(ENABLE_PROFILING) @@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}") string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}") endforeach() - elseif(NOT APPLE AND NOT ANDROID) + elseif(NOT ((IOS OR ANDROID) AND NOT BUILD_SHARED_LIBS)) # Remove unreferenced functions: function level linking add_extra_compiler_option(-ffunction-sections) endif() @@ -296,41 +223,6 @@ if(MSVC) set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi") endif() - if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800) - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2") - endif() - if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX") - endif() - - if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1") - endif() - - if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3") - endif() - - if(NOT MSVC64) - # 64-bit MSVC compiler uses SSE/SSE2 by default - if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2") - endif() - if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE") - endif() - endif() - - if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX OR ENABLE_AVX2) - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi") - endif() - - if(X86 OR X86_64) - if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2) - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers - endif() - endif() - if(OPENCV_WARNINGS_ARE_ERRORS) set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX") endif() @@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID) set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}") endif() +include(cmake/OpenCVCompilerOptimizations.cmake) + +if(COMMAND ocv_compiler_optimization_options) + ocv_compiler_optimization_options() +endif() + +if(COMMAND ocv_compiler_optimization_options_finalize) + ocv_compiler_optimization_options_finalize() +endif() + # Add user supplied extra options (optimization, etc...) # ========================================================== set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options") @@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) add_extra_compiler_option(-fvisibility-inlines-hidden) endif() +# TODO !!!!! if(NOT OPENCV_FP16_DISABLE AND NOT IOS) if(ARM AND ENABLE_NEON) set(FP16_OPTION "-mfpu=neon-fp16") @@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS) endif() try_compile(__VALID_FP16 "${OpenCV_BINARY_DIR}" - "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" + "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp" COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" OUTPUT_VARIABLE TRY_OUT ) diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake index 2988979045..477b910558 100644 --- a/cmake/OpenCVGenHeaders.cmake +++ b/cmake/OpenCVGenHeaders.cmake @@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h") install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev) +# platform-specific config file +ocv_compiler_optimization_fill_cpu_config() +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cv_cpu_config.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cv_cpu_config.h") + # ---------------------------------------------------------------------------- # opencv_modules.hpp based on actual modules list # ---------------------------------------------------------------------------- diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index ce2bc7e08a..10e1f7397c 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE) unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE) unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE) + unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE) endforeach() # clean modules info which needs to be recalculated @@ -648,6 +649,8 @@ macro(ocv_set_module_sources) # use full paths for module to be independent from the module location ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS) + ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module}) + set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") endmacro() diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake index 29f21d8015..8db3cb9522 100644 --- a/cmake/OpenCVPCHSupport.cmake +++ b/cmake/OpenCVPCHSupport.cmake @@ -328,7 +328,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input) get_target_property(_sources ${_targetName} SOURCES) foreach(src ${_sources}) - if(NOT "${src}" MATCHES "\\.mm$") + if(NOT "${src}" MATCHES "\\.mm$" + AND NOT "${src}" MATCHES "\\.h$" AND NOT "${src}" MATCHES "\\.hpp$" # header files + AND NOT "${src}" MATCHES "^\$" # CMake generator expressions + ) get_source_file_property(oldProps "${src}" COMPILE_FLAGS) if(NOT oldProps) set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"") diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 474f7db609..935bfc2a01 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -37,7 +37,11 @@ endmacro() macro(ocv_update VAR) if(NOT DEFINED ${VAR}) - set(${VAR} ${ARGN}) + if("x${ARGN}" STREQUAL "x") + set(${VAR} "") + else() + set(${VAR} ${ARGN}) + endif() else() #ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}") endif() @@ -151,8 +155,15 @@ function(ocv_append_target_property target prop) endif() endfunction() +function(ocv_append_dependant_targets target) + #ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})") + _ocv_fix_target(target) + set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE) +endfunction() + # adds include directories in such way that directories from the OpenCV source tree go first function(ocv_target_include_directories target) + #ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})") _ocv_fix_target(target) set(__params "") if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND @@ -173,6 +184,11 @@ function(ocv_target_include_directories target) else() if(TARGET ${target}) target_include_directories(${target} PRIVATE ${__params}) + if(OPENCV_DEPENDANT_TARGETS_${target}) + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}}) + target_include_directories(${t} PRIVATE ${__params}) + endforeach() + endif() else() set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}") set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "") @@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX ) MACRO(ocv_check_compiler_flag LANG FLAG RESULT) + set(_fname "${ARGN}") if(NOT DEFINED ${RESULT}) - if("_${LANG}_" MATCHES "_CXX_") + if(_fname) + # nothing + elseif("_${LANG}_" MATCHES "_CXX_") set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx") if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ") FILE(WRITE "${_fname}" "int main() { return 0; }\n") @@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) unset(_fname) endif() if(_fname) - MESSAGE(STATUS "Performing Test ${RESULT}") + if(NOT "x${ARGN}" STREQUAL "x") + file(RELATIVE_PATH __msg "${CMAKE_SOURCE_DIR}" "${ARGN}") + set(__msg " (check file: ${__msg})") + else() + set(__msg "") + endif() + MESSAGE(STATUS "Performing Test ${RESULT}${__msg}") TRY_COMPILE(${RESULT} "${CMAKE_BINARY_DIR}" "${_fname}" @@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) endif() ENDMACRO() -macro(ocv_check_flag_support lang flag varname) +macro(ocv_check_flag_support lang flag varname base_options) + if(CMAKE_BUILD_TYPE) + set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) + endif() + if("_${lang}_" MATCHES "_CXX_") set(_lang CXX) elseif("_${lang}_" MATCHES "_C_") @@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname) string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}") string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}") - ocv_check_compiler_flag("${_lang}" "${ARGN} ${flag}" ${${varname}}) + ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN}) endmacro() # turns off warnings @@ -327,7 +356,7 @@ macro(ocv_warnings_disable) string(REPLACE "${warning}" "" ${var} "${${var}}") string(REPLACE "-W" "-Wno-" warning "${warning}") endif() - ocv_check_flag_support(${var} "${warning}" _varname) + ocv_check_flag_support(${var} "${warning}" _varname "") if(${_varname}) set(${var} "${${var}} ${warning}") endif() @@ -342,7 +371,7 @@ macro(ocv_warnings_disable) else() string(REPLACE "-wd" "-Qwd" warning "${warning}") endif() - ocv_check_flag_support(${var} "${warning}" _varname) + ocv_check_flag_support(${var} "${warning}" _varname "") if(${_varname}) set(${var} "${${var}} ${warning}") endif() @@ -357,7 +386,7 @@ macro(ocv_warnings_disable) endmacro() macro(add_apple_compiler_options the_module) - ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS) + ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS "") if(HAVE_OBJC_EXCEPTIONS) foreach(source ${OPENCV_MODULE_${the_module}_SOURCES}) if("${source}" MATCHES "\\.mm$") @@ -903,6 +932,11 @@ function(_ocv_append_target_includes target) if (TARGET ${target}_object) target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}}) endif() + if(OPENCV_DEPENDANT_TARGETS_${target}) + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}}) + target_include_directories(${t} PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}}) + endforeach() + endif() unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE) endif() endfunction() diff --git a/cmake/checks/cpu_avx.cpp b/cmake/checks/cpu_avx.cpp new file mode 100644 index 0000000000..05536f443f --- /dev/null +++ b/cmake/checks/cpu_avx.cpp @@ -0,0 +1,9 @@ +#if !defined __AVX__ // MSVC supports this flag since MSVS 2013 +#error "__AVX__ define is missing" +#endif +#include +void test() +{ + __m256 a = _mm256_set1_ps(0.0f); +} +int main() { return 0; } diff --git a/cmake/checks/cpu_avx2.cpp b/cmake/checks/cpu_avx2.cpp new file mode 100644 index 0000000000..3ab1143b8f --- /dev/null +++ b/cmake/checks/cpu_avx2.cpp @@ -0,0 +1,10 @@ +#if !defined __AVX2__ // MSVC supports this flag since MSVS 2013 +#error "__AVX2__ define is missing" +#endif +#include +void test() +{ + int data[8] = {0,0,0,0, 0,0,0,0}; + __m256i a = _mm256_loadu_si256((const __m256i *)data); +} +int main() { return 0; } diff --git a/cmake/checks/cpu_avx512.cpp b/cmake/checks/cpu_avx512.cpp new file mode 100644 index 0000000000..d0898ab3ee --- /dev/null +++ b/cmake/checks/cpu_avx512.cpp @@ -0,0 +1,10 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i zmm = _mm512_setzero_si512(); +} +#else +#error "AVX512 is not supported" +#endif +int main() { return 0; } diff --git a/cmake/checks/fp16.cpp b/cmake/checks/cpu_fp16.cpp similarity index 86% rename from cmake/checks/fp16.cpp rename to cmake/checks/cpu_fp16.cpp index c77c844834..6951f1c4f7 100644 --- a/cmake/checks/fp16.cpp +++ b/cmake/checks/cpu_fp16.cpp @@ -1,6 +1,6 @@ #include -#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) +#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__) #include int test() { diff --git a/cmake/checks/cpu_popcnt.cpp b/cmake/checks/cpu_popcnt.cpp new file mode 100644 index 0000000000..f55c9f3c54 --- /dev/null +++ b/cmake/checks/cpu_popcnt.cpp @@ -0,0 +1,8 @@ +#include +#ifndef _MSC_VER +#include +#endif +int main() { + int i = _mm_popcnt_u64(1); + return 0; +} diff --git a/cmake/checks/cpu_sse.cpp b/cmake/checks/cpu_sse.cpp new file mode 100644 index 0000000000..c6269acdb5 --- /dev/null +++ b/cmake/checks/cpu_sse.cpp @@ -0,0 +1,2 @@ +#include +int main() { return 0; } diff --git a/cmake/checks/cpu_sse2.cpp b/cmake/checks/cpu_sse2.cpp new file mode 100644 index 0000000000..68a69f88cb --- /dev/null +++ b/cmake/checks/cpu_sse2.cpp @@ -0,0 +1,2 @@ +#include +int main() { return 0; } diff --git a/cmake/checks/cpu_sse3.cpp b/cmake/checks/cpu_sse3.cpp new file mode 100644 index 0000000000..98ce2191ec --- /dev/null +++ b/cmake/checks/cpu_sse3.cpp @@ -0,0 +1,7 @@ +#include +int main() { + __m128 u, v; + u = _mm_set1_ps(0.0f); + v = _mm_moveldup_ps(u); // SSE3 + return 0; +} diff --git a/cmake/checks/cpu_sse41.cpp b/cmake/checks/cpu_sse41.cpp new file mode 100644 index 0000000000..ddd835b0e7 --- /dev/null +++ b/cmake/checks/cpu_sse41.cpp @@ -0,0 +1,6 @@ +#include +int main() { + __m128i a = _mm_setzero_si128(), b = _mm_setzero_si128(); + __m128i c = _mm_packus_epi32(a, b); + return 0; +} diff --git a/cmake/checks/cpu_sse42.cpp b/cmake/checks/cpu_sse42.cpp new file mode 100644 index 0000000000..56f56658ab --- /dev/null +++ b/cmake/checks/cpu_sse42.cpp @@ -0,0 +1,5 @@ +#include +int main() { + int i = _mm_popcnt_u64(1); + return 0; +} diff --git a/cmake/checks/cpu_ssse3.cpp b/cmake/checks/cpu_ssse3.cpp new file mode 100644 index 0000000000..e583199bcd --- /dev/null +++ b/cmake/checks/cpu_ssse3.cpp @@ -0,0 +1,7 @@ +#include +const double v = 0; +int main() { + __m128i a = _mm_setzero_si128(); + __m128i b = _mm_abs_epi32(a); + return 0; +} diff --git a/cmake/templates/cv_cpu_config.h.in b/cmake/templates/cv_cpu_config.h.in new file mode 100644 index 0000000000..27b27315cf --- /dev/null +++ b/cmake/templates/cv_cpu_config.h.in @@ -0,0 +1,5 @@ +// OpenCV CPU baseline features +@OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE@ + +// OpenCV supported CPU dispatched features +@OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE@ diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 05add9e2c5..658d12c14c 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -1,6 +1,15 @@ +#ifndef OPENCV_CVCONFIG_H_INCLUDED +#define OPENCV_CVCONFIG_H_INCLUDED + /* OpenCV compiled as static or dynamic libs */ #cmakedefine BUILD_SHARED_LIBS +/* OpenCV intrinsics optimized code */ +#cmakedefine CV_ENABLE_INTRINSICS + +/* OpenCV additional optimized code */ +#cmakedefine CV_DISABLE_OPTIMIZATION + /* Compile for 'real' NVIDIA GPU architectures */ #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" @@ -206,3 +215,7 @@ /* OpenVX */ #cmakedefine HAVE_OPENVX + + + +#endif // OPENCV_CVCONFIG_H_INCLUDED diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h new file mode 100644 index 0000000000..9a8537f909 --- /dev/null +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -0,0 +1,166 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#if defined __OPENCV_BUILD \ + +#include "cv_cpu_config.h" +#include "cv_cpu_helper.h" + +#if defined CV_ENABLE_INTRINSICS \ + && !defined CV_DISABLE_OPTIMIZATION \ + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ + +#ifdef CV_CPU_COMPILE_SSE2 +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#endif +#ifdef CV_CPU_COMPILE_SSE3 +# include +# define CV_SSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSSE3 +# include +# define CV_SSSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_1 +# include +# define CV_SSE4_1 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_2 +# include +# define CV_SSE4_2 1 +#endif +#ifdef CV_CPU_COMPILE_POPCNT +# ifdef _MSC_VER +# include +# if defined(_M_X64) +# define CV_POPCNT_U64 _mm_popcnt_u64 +# endif +# define CV_POPCNT_U32 _mm_popcnt_u32 +# else +# include +# if defined(__x86_64__) +# define CV_POPCNT_U64 __builtin_popcountll +# endif +# define CV_POPCNT_U32 __builtin_popcount +# endif +# define CV_POPCNT 1 +#endif +#ifdef CV_CPU_COMPILE_AVX +# include +# define CV_AVX 1 +#endif +#ifdef CV_CPU_COMPILE_AVX2 +# include +# define CV_AVX2 1 +#endif +#ifdef CV_CPU_COMPILE_FMA3 +# define CV_FMA3 1 +#endif + +#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#endif + +#if defined(__ARM_NEON__) || defined(__aarch64__) +# include +#endif + +#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ + +#endif // __OPENCV_BUILD + + + +#if !defined __OPENCV_BUILD // Compatibility code + +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#endif + +#endif // !__OPENCV_BUILD (Compatibility code) + + + +#ifndef CV_MMX +# define CV_MMX 0 +#endif +#ifndef CV_SSE +# define CV_SSE 0 +#endif +#ifndef CV_SSE2 +# define CV_SSE2 0 +#endif +#ifndef CV_SSE3 +# define CV_SSE3 0 +#endif +#ifndef CV_SSSE3 +# define CV_SSSE3 0 +#endif +#ifndef CV_SSE4_1 +# define CV_SSE4_1 0 +#endif +#ifndef CV_SSE4_2 +# define CV_SSE4_2 0 +#endif +#ifndef CV_POPCNT +# define CV_POPCNT 0 +#endif +#ifndef CV_AVX +# define CV_AVX 0 +#endif +#ifndef CV_AVX2 +# define CV_AVX2 0 +#endif +#ifndef CV_FMA3 +# define CV_FMA3 0 +#endif +#ifndef CV_AVX_512F +# define CV_AVX_512F 0 +#endif +#ifndef CV_AVX_512BW +# define CV_AVX_512BW 0 +#endif +#ifndef CV_AVX_512CD +# define CV_AVX_512CD 0 +#endif +#ifndef CV_AVX_512DQ +# define CV_AVX_512DQ 0 +#endif +#ifndef CV_AVX_512ER +# define CV_AVX_512ER 0 +#endif +#ifndef CV_AVX_512IFMA512 +# define CV_AVX_512IFMA512 0 +#endif +#ifndef CV_AVX_512PF +# define CV_AVX_512PF 0 +#endif +#ifndef CV_AVX_512VBMI +# define CV_AVX_512VBMI 0 +#endif +#ifndef CV_AVX_512VL +# define CV_AVX_512VL 0 +#endif + +#ifndef CV_NEON +# define CV_NEON 0 +#endif diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h new file mode 100644 index 0000000000..cb755d615e --- /dev/null +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -0,0 +1,133 @@ +// AUTOGENERATED, DO NOT EDIT + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE +# define CV_CPU_HAS_SUPPORT_SSE 1 +# define CV_CPU_CALL_SSE(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE +# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE)) +# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_SSE 0 +# define CV_CPU_CALL_SSE(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2 +# define CV_CPU_HAS_SUPPORT_SSE2 1 +# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2 +# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2)) +# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_SSE2 0 +# define CV_CPU_CALL_SSE2(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3 +# define CV_CPU_HAS_SUPPORT_SSE3 1 +# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3 +# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3)) +# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_SSE3 0 +# define CV_CPU_CALL_SSE3(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3 +# define CV_CPU_HAS_SUPPORT_SSSE3 1 +# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3 +# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3)) +# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_SSSE3 0 +# define CV_CPU_CALL_SSSE3(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1 +# define CV_CPU_HAS_SUPPORT_SSE4_1 1 +# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1 +# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1)) +# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_SSE4_1 0 +# define CV_CPU_CALL_SSE4_1(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2 +# define CV_CPU_HAS_SUPPORT_SSE4_2 1 +# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2 +# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) +# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_SSE4_2 0 +# define CV_CPU_CALL_SSE4_2(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT +# define CV_CPU_HAS_SUPPORT_POPCNT 1 +# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT +# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT)) +# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_POPCNT 0 +# define CV_CPU_CALL_POPCNT(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX +# define CV_CPU_HAS_SUPPORT_AVX 1 +# define CV_CPU_CALL_AVX(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX +# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) +# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_AVX 0 +# define CV_CPU_CALL_AVX(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16 +# define CV_CPU_HAS_SUPPORT_FP16 1 +# define CV_CPU_CALL_FP16(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16 +# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16)) +# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_FP16 0 +# define CV_CPU_CALL_FP16(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2 +# define CV_CPU_HAS_SUPPORT_AVX2 1 +# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2 +# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) +# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_AVX2 0 +# define CV_CPU_CALL_AVX2(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3 +# define CV_CPU_HAS_SUPPORT_FMA3 1 +# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3 +# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3)) +# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_FMA3 0 +# define CV_CPU_CALL_FMA3(...) +#endif + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON +# define CV_CPU_HAS_SUPPORT_NEON 1 +# define CV_CPU_CALL_NEON(...) return __VA_ARGS__ +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON +# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON)) +# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__ +#else +# define CV_CPU_HAS_SUPPORT_NEON 0 +# define CV_CPU_CALL_NEON(...) +#endif diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 9e2b1ed7fd..faa7ef0a6a 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -48,6 +48,10 @@ //! @addtogroup core_utils //! @{ +#ifdef __OPENCV_BUILD +#include "cvconfig.h" +#endif + #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ #endif @@ -59,10 +63,6 @@ #undef abs #undef Complex -#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 -# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ -#endif - #include #include "opencv2/core/hal/interface.h" @@ -88,7 +88,7 @@ # endif #endif -#if defined CV_ICC && !defined CV_ENABLE_UNROLLED +#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED) # define CV_ENABLE_UNROLLED 0 #else # define CV_ENABLE_UNROLLED 1 @@ -161,150 +161,9 @@ enum CpuFeatures { CPU_NEON = 100 }; -// do not include SSE/AVX/NEON headers for NVCC compiler -#ifndef __CUDACC__ -#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) -# include -# define CV_MMX 1 -# define CV_SSE 1 -# define CV_SSE2 1 -# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE3 1 -# endif -# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSSE3 1 -# endif -# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_1 1 -# endif -# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_2 1 -# endif -# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) -# ifdef _MSC_VER -# include -# if defined(_M_X64) -# define CV_POPCNT_U64 _mm_popcnt_u64 -# endif -# define CV_POPCNT_U32 _mm_popcnt_u32 -# else -# include -# if defined(__x86_64__) -# define CV_POPCNT_U64 __builtin_popcountll -# endif -# define CV_POPCNT_U32 __builtin_popcount -# endif -# define CV_POPCNT 1 -# endif -# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) -// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX -// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 -# include -# define CV_AVX 1 -# if defined(_XCR_XFEATURE_ENABLED_MASK) -# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) -# else -# define __xgetbv() 0 -# endif -# endif -# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) -# include -# define CV_AVX2 1 -# if defined __FMA__ -# define CV_FMA3 1 -# endif -# endif -#endif +#include "cv_cpu_dispatch.h" -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) -# include -# include -# define CV_NEON 1 -# define CPU_HAS_NEON_FEATURE (true) -#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) -# include -# define CV_NEON 1 -#endif - -#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ -# define CV_VFP 1 -#endif - -#endif // __CUDACC__ - -#ifndef CV_POPCNT -#define CV_POPCNT 0 -#endif -#ifndef CV_MMX -# define CV_MMX 0 -#endif -#ifndef CV_SSE -# define CV_SSE 0 -#endif -#ifndef CV_SSE2 -# define CV_SSE2 0 -#endif -#ifndef CV_SSE3 -# define CV_SSE3 0 -#endif -#ifndef CV_SSSE3 -# define CV_SSSE3 0 -#endif -#ifndef CV_SSE4_1 -# define CV_SSE4_1 0 -#endif -#ifndef CV_SSE4_2 -# define CV_SSE4_2 0 -#endif -#ifndef CV_AVX -# define CV_AVX 0 -#endif -#ifndef CV_AVX2 -# define CV_AVX2 0 -#endif -#ifndef CV_FMA3 -# define CV_FMA3 0 -#endif -#ifndef CV_AVX_512F -# define CV_AVX_512F 0 -#endif -#ifndef CV_AVX_512BW -# define CV_AVX_512BW 0 -#endif -#ifndef CV_AVX_512CD -# define CV_AVX_512CD 0 -#endif -#ifndef CV_AVX_512DQ -# define CV_AVX_512DQ 0 -#endif -#ifndef CV_AVX_512ER -# define CV_AVX_512ER 0 -#endif -#ifndef CV_AVX_512IFMA512 -# define CV_AVX_512IFMA512 0 -#endif -#ifndef CV_AVX_512PF -# define CV_AVX_512PF 0 -#endif -#ifndef CV_AVX_512VBMI -# define CV_AVX_512VBMI 0 -#endif -#ifndef CV_AVX_512VL -# define CV_AVX_512VL 0 -#endif - -#ifndef CV_NEON -# define CV_NEON 0 -#endif - -#ifndef CV_VFP -# define CV_VFP 0 -#endif /* fundamental constants */ #define CV_PI 3.1415926535897932384626433832795 diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp index c76936afe4..31c1062320 100644 --- a/modules/core/include/opencv2/core/fast_math.hpp +++ b/modules/core/include/opencv2/core/fast_math.hpp @@ -47,6 +47,12 @@ #include "opencv2/core/cvdef.h" +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) +#include +#endif + + //! @addtogroup core_utils //! @{ @@ -66,7 +72,7 @@ # include "tegra_round.hpp" #endif -#if CV_VFP +#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ // 1. general scheme #define ARM_ROUND(_value, _asm_string) \ int res; \ @@ -82,7 +88,7 @@ #endif // 3. version for float #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") -#endif // CV_VFP +#endif /** @brief Rounds floating-point number to the nearest integer @@ -93,7 +99,7 @@ CV_INLINE int cvRound( double value ) { #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) __m128d t = _mm_set_sd( value ); return _mm_cvtsd_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -108,7 +114,7 @@ cvRound( double value ) defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION TEGRA_ROUND_DBL(value); #elif defined CV_ICC || defined __GNUC__ -# if CV_VFP +# if defined ARM_ROUND_DBL ARM_ROUND_DBL(value); # else return (int)lrint(value); @@ -130,18 +136,8 @@ cvRound( double value ) */ CV_INLINE int cvFloor( double value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128d t = _mm_set_sd( value ); - int i = _mm_cvtsd_si32(t); - return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i))); -#elif defined __GNUC__ int i = (int)value; return i - (i > value); -#else - int i = cvRound(value); - float diff = (float)(value - i); - return i - (diff < 0); -#endif } /** @brief Rounds floating-point number to the nearest integer not smaller than the original. @@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value ) */ CV_INLINE int cvCeil( double value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) - __m128d t = _mm_set_sd( value ); - int i = _mm_cvtsd_si32(t); - return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); -#elif defined __GNUC__ int i = (int)value; return i + (i < value); -#else - int i = cvRound(value); - float diff = (float)(i - value); - return i + (diff < 0); -#endif } /** @brief Determines if the argument is Not A Number. @@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value ) /** @overload */ CV_INLINE int cvRound(float value) { -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ - defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) __m128 t = _mm_set_ss( value ); return _mm_cvtss_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value) defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION TEGRA_ROUND_FLT(value); #elif defined CV_ICC || defined __GNUC__ -# if CV_VFP +# if defined ARM_ROUND_FLT ARM_ROUND_FLT(value); # else return (int)lrintf(value); @@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value ) /** @overload */ CV_INLINE int cvFloor( float value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128 t = _mm_set_ss( value ); - int i = _mm_cvtss_si32(t); - return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i))); -#elif defined __GNUC__ int i = (int)value; return i - (i > value); -#else - int i = cvRound(value); - float diff = (float)(value - i); - return i - (diff < 0); -#endif } /** @overload */ @@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value ) /** @overload */ CV_INLINE int cvCeil( float value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) - __m128 t = _mm_set_ss( value ); - int i = _mm_cvtss_si32(t); - return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t)); -#elif defined __GNUC__ int i = (int)value; return i + (i < value); -#else - int i = cvRound(value); - float diff = (float)(i - value); - return i + (diff < 0); -#endif } /** @overload */ diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 3c8f39d74b..a9838382f0 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -237,24 +237,81 @@ void Exception::formatMessage() msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str()); } +static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL }; + +static const char* getHWFeatureName(int id) +{ + return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL; +} +static const char* getHWFeatureNameSafe(int id) +{ + const char* name = getHWFeatureName(id); + return name ? name : "Unknown feature"; +} + struct HWFeatures { enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE }; - HWFeatures(void) + HWFeatures(bool run_initialize = false) { - memset( have, 0, sizeof(have) ); - x86_family = 0; + memset( have, 0, sizeof(have[0]) * MAX_FEATURE ); + if (run_initialize) + initialize(); } - static HWFeatures initialize(void) + static void initializeNames() { - HWFeatures f; + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++) + { + g_hwFeatureNames[i] = 0; + } + g_hwFeatureNames[CPU_MMX] = "MMX"; + g_hwFeatureNames[CPU_SSE] = "SSE"; + g_hwFeatureNames[CPU_SSE2] = "SSE2"; + g_hwFeatureNames[CPU_SSE3] = "SSE3"; + g_hwFeatureNames[CPU_SSSE3] = "SSSE3"; + g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1"; + g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2"; + g_hwFeatureNames[CPU_POPCNT] = "POPCNT"; + g_hwFeatureNames[CPU_FP16] = "FP16"; + g_hwFeatureNames[CPU_AVX] = "AVX"; + g_hwFeatureNames[CPU_AVX2] = "AVX2"; + g_hwFeatureNames[CPU_FMA3] = "FMA3"; + + g_hwFeatureNames[CPU_AVX_512F] = "AVX512F"; + g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW"; + g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD"; + g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ"; + g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER"; + g_hwFeatureNames[CPU_AVX_512IFMA512] = "AVX512IFMA"; + g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF"; + g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI"; + g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL"; + + g_hwFeatureNames[CPU_NEON] = "NEON"; + } + + void initialize(void) + { +#ifndef WINRT + if (getenv("OPENCV_DUMP_CONFIG")) + { + fprintf(stderr, "\nOpenCV build configuration is:\n%s\n", + cv::getBuildInformation().c_str()); + } +#endif + + initializeNames(); + int cpuid_data[4] = { 0, 0, 0, 0 }; + int cpuid_data_ex[4] = { 0, 0, 0, 0 }; #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) + #define OPENCV_HAVE_X86_CPUID 1 __cpuid(cpuid_data, 1); #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) + #define OPENCV_HAVE_X86_CPUID 1 #ifdef __x86_64__ asm __volatile__ ( @@ -278,33 +335,36 @@ struct HWFeatures #endif #endif - f.x86_family = (cpuid_data[0] >> 8) & 15; - if( f.x86_family >= 6 ) + #ifdef OPENCV_HAVE_X86_CPUID + int x86_family = (cpuid_data[0] >> 8) & 15; + if( x86_family >= 6 ) { - f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0; - f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; - f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; - f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; - f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; - f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; - f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; - f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; - f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; - f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX - f.have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0; + have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0; + have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; + have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; + have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; + have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; + have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; + have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; + have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; + have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; + have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0; + have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0; // make the second call to the cpuid command in order to get // information about extended features like AVX2 #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) - __cpuidex(cpuid_data, 7, 0); + #define OPENCV_HAVE_X86_CPUID_EX 1 + __cpuidex(cpuid_data_ex, 7, 0); #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) + #define OPENCV_HAVE_X86_CPUID_EX 1 #ifdef __x86_64__ asm __volatile__ ( "movl $7, %%eax\n\t" "movl $0, %%ecx\n\t" "cpuid\n\t" - :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3]) + :[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3]) : : "cc" ); @@ -317,29 +377,76 @@ struct HWFeatures "cpuid\n\t" "movl %%ebx, %0\n\t" "popl %%ebx\n\t" - : "=r"(cpuid_data[1]), "=c"(cpuid_data[2]) + : "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2]) : : "cc" ); #endif #endif - f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0; - f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0; - f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0; - f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0; - f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0; - f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0; - f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0; - f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0; - f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0; - f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0; + #ifdef OPENCV_HAVE_X86_CPUID_EX + have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0; + + have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0; + have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0; + have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0; + have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0; + have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0; + have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0; + have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0; + have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0; + have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0; + #else + CV_UNUSED(cpuid_data_ex); + #endif + + bool have_AVX_OS_support = true; + bool have_AVX512_OS_support = true; + if (!(cpuid_data[2] & (1<<27))) + have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX + else + { + int xcr0 = 0; + #ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h + xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); + #endif + if ((xcr0 & 0x6) != 0x6) + have_AVX_OS_support = false; // YMM registers + if ((xcr0 & 0xe6) != 0xe6) + have_AVX512_OS_support = false; // ZMM registers + } + + if (!have_AVX_OS_support) + { + have[CV_CPU_AVX] = false; + have[CV_CPU_FP16] = false; + have[CV_CPU_AVX2] = false; + have[CV_CPU_FMA3] = false; + } + if (!have_AVX_OS_support || !have_AVX512_OS_support) + { + have[CV_CPU_AVX_512F] = false; + have[CV_CPU_AVX_512BW] = false; + have[CV_CPU_AVX_512CD] = false; + have[CV_CPU_AVX_512DQ] = false; + have[CV_CPU_AVX_512ER] = false; + have[CV_CPU_AVX_512IFMA512] = false; + have[CV_CPU_AVX_512PF] = false; + have[CV_CPU_AVX_512VBMI] = false; + have[CV_CPU_AVX_512VL] = false; + } } + #else + CV_UNUSED(cpuid_data); + CV_UNUSED(cpuid_data_ex); + #endif // OPENCV_HAVE_X86_CPUID #if defined ANDROID || defined __linux__ #ifdef __aarch64__ - f.have[CV_CPU_NEON] = true; - f.have[CV_CPU_FP16] = true; + have[CV_CPU_NEON] = true; + have[CV_CPU_FP16] = true; #elif defined __arm__ int cpufile = open("/proc/self/auxv", O_RDONLY); @@ -352,8 +459,8 @@ struct HWFeatures { if (auxv.a_type == AT_HWCAP) { - f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; - f.have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0; + have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; + have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0; break; } } @@ -363,21 +470,133 @@ struct HWFeatures #endif #elif (defined __clang__ || defined __APPLE__) #if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__)) - f.have[CV_CPU_NEON] = true; + have[CV_CPU_NEON] = true; #endif #if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__)) - f.have[CV_CPU_FP16] = true; + have[CV_CPU_FP16] = true; #endif #endif - return f; + int baseline_features[] = { CV_CPU_BASELINE_FEATURES }; + if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]))) + { + fprintf(stderr, "\n" + "******************************************************************\n" + "* FATAL ERROR: *\n" + "* This OpenCV build doesn't support current CPU/HW configuration *\n" + "* *\n" + "* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n" + "******************************************************************\n"); + fprintf(stderr, "\nRequired baseline features:\n"); + checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true); + CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup."); + } + + readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])); + } + + bool checkFeatures(const int* features, int count, bool dump = false) + { + bool result = true; + for (int i = 0; i < count; i++) + { + int feature = features[i]; + if (feature) + { + if (have[feature]) + { + if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature)); + } + else + { + result = false; + if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature)); + } + } + } + return result; + } + + static inline bool isSymbolSeparator(char c) + { + return c == ',' || c == ';' || c == '-'; + } + + void readSettings(const int* baseline_features, int baseline_count) + { + bool dump = true; + const char* disabled_features = +#ifndef WINRT + getenv("OPENCV_CPU_DISABLE"); +#else + NULL; +#endif + if (disabled_features && disabled_features[0] != 0) + { + const char* start = disabled_features; + for (;;) + { + while (start[0] != 0 && isSymbolSeparator(start[0])) + { + start++; + } + if (start[0] == 0) + break; + const char* end = start; + while (end[0] != 0 && !isSymbolSeparator(end[0])) + { + end++; + } + if (end == start) + continue; + cv::String feature(start, end); + start = end; + + CV_Assert(feature.size() > 0); + + bool found = false; + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++) + { + if (!g_hwFeatureNames[i]) continue; + size_t len = strlen(g_hwFeatureNames[i]); + if (len != feature.size()) continue; + if (feature.compare(g_hwFeatureNames[i]) == 0) + { + bool isBaseline = false; + for (int k = 0; k < baseline_count; k++) + { + if (baseline_features[k] == i) + { + isBaseline = true; + break; + } + } + if (isBaseline) + { + if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i)); + } + if (!have[i]) + { + if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i)); + } + have[i] = false; + + found = true; + break; + } + } + if (!found) + { + if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str()); + } + } + } } - int x86_family; bool have[MAX_FEATURE+1]; }; -static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures(); +static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false); static HWFeatures* currentFeatures = &featuresEnabled; bool checkHardwareSupport(int feature) diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index b0363c4482..60df4ed7b9 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -65,7 +65,7 @@ elseif(HAVE_QT) list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES}) list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES}) - ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag) + ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag "") if(${_have_flag}) set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) endif() diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 0fa520228e..dcf2e44b5e 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -1649,7 +1649,7 @@ struct VResizeLanczos4 { CastOp castOp; VecOp vecOp; - int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); + int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); #if CV_ENABLE_UNROLLED for( ; x <= width - 4; x += 4 ) { @@ -1657,7 +1657,7 @@ struct VResizeLanczos4 const WT* S = src[0]; WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b; - for( k = 1; k < 8; k++ ) + for( int k = 1; k < 8; k++ ) { b = beta[k]; S = src[k]; s0 += S[x]*b; s1 += S[x+1]*b; diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp index 51843fa488..bb37ee91e0 100644 --- a/modules/objdetect/src/haar.cpp +++ b/modules/objdetect/src/haar.cpp @@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, CvPoint pt, double& stage_sum, int start_stage ) { #ifdef CV_HAAR_USE_AVX - bool haveAVX = false; - if(cv::checkHardwareSupport(CV_CPU_AVX)) - if(__xgetbv()&0x6)// Check if the OS will save the YMM registers - haveAVX = true; + bool haveAVX = cv::checkHardwareSupport(CV_CPU_AVX); #else # ifdef CV_HAAR_USE_SSE bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);