select the architecture based on nvcc result

* cache the result
  * DRY
  * brush up based on review
This commit is contained in:
Tomoaki Teshima 2020-06-02 05:07:53 +09:00
parent ea0d1424d8
commit 156406b56c

View File

@ -53,6 +53,12 @@ if(CUDA_FOUND)
message(STATUS "CUDA detected: " ${CUDA_VERSION}) message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing") set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing")
set(_arch_fermi "2.0")
set(_arch_kepler "3.0;3.5;3.7")
set(_arch_maxwell "5.0;5.2")
set(_arch_pascal "6.0;6.1")
set(_arch_volta "7.0")
set(_arch_turing "7.5")
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
list(APPEND _generations "Auto") list(APPEND _generations "Auto")
endif() endif()
@ -70,29 +76,57 @@ if(CUDA_FOUND)
unset(CUDA_ARCH_PTX CACHE) unset(CUDA_ARCH_PTX CACHE)
endif() endif()
set(__cuda_arch_ptx "") macro(ocv_filter_available_architecture result_list)
if(CUDA_GENERATION STREQUAL "Fermi") if(DEFINED CUDA_SUPPORTED_CC)
set(__cuda_arch_bin "2.0") set(${result_list} "${CUDA_SUPPORTED_CC}")
elseif(CUDA_GENERATION STREQUAL "Kepler") else()
set(__cuda_arch_bin "3.0 3.5 3.7") set(CC_LIST ${ARGN})
elseif(CUDA_GENERATION STREQUAL "Maxwell") foreach(target_arch ${CC_LIST})
set(__cuda_arch_bin "5.0 5.2") string(REPLACE "." "" target_arch_short ${target_arch})
elseif(CUDA_GENERATION STREQUAL "Pascal") set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}")
set(__cuda_arch_bin "6.0 6.1") execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu"
elseif(CUDA_GENERATION STREQUAL "Volta")
set(__cuda_arch_bin "7.0")
elseif(CUDA_GENERATION STREQUAL "Turing")
set(__cuda_arch_bin "7.5")
elseif(CUDA_GENERATION STREQUAL "Auto")
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(_nvcc_res EQUAL 0)
set(${result_list} "${${result_list}} ${target_arch}")
endif()
endforeach()
string(STRIP ${${result_list}} ${result_list})
set(CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability")
endif()
endmacro()
macro(ocv_detect_native_cuda_arch status output)
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE ${status} OUTPUT_VARIABLE ${output}
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
endmacro()
macro(ocv_wipeout_deprecated _arch_bin_list)
string(REPLACE "2.1" "2.1(2.0)" ${_arch_bin_list} ${${_arch_bin_list}})
endmacro()
set(__cuda_arch_ptx "")
if(CUDA_GENERATION STREQUAL "Fermi")
set(__cuda_arch_bin ${_arch_fermi})
elseif(CUDA_GENERATION STREQUAL "Kepler")
set(__cuda_arch_bin ${_arch_kepler})
elseif(CUDA_GENERATION STREQUAL "Maxwell")
set(__cuda_arch_bin ${_arch_maxwell})
elseif(CUDA_GENERATION STREQUAL "Pascal")
set(__cuda_arch_bin ${_arch_pascal})
elseif(CUDA_GENERATION STREQUAL "Volta")
set(__cuda_arch_bin ${_arch_volta})
elseif(CUDA_GENERATION STREQUAL "Turing")
set(__cuda_arch_bin ${_arch_turing})
elseif(CUDA_GENERATION STREQUAL "Auto")
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
else() else()
set(__cuda_arch_bin "${_nvcc_out}") string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
endif() endif()
endif() endif()
@ -101,28 +135,26 @@ if(CUDA_FOUND)
set(__cuda_arch_bin "3.2") set(__cuda_arch_bin "3.2")
set(__cuda_arch_ptx "") set(__cuda_arch_ptx "")
elseif(AARCH64) elseif(AARCH64)
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
set(__cuda_arch_bin "5.3 6.2 7.2") set(__cuda_arch_bin "5.3 6.2 7.2")
else() else()
set(__cuda_arch_bin "${_nvcc_out}") set(__cuda_arch_bin "${_nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
endif() endif()
set(__cuda_arch_ptx "") set(__cuda_arch_ptx "")
else() else()
if(CUDA_VERSION VERSION_LESS "9.0") ocv_filter_available_architecture(__cuda_arch_bin
set(__cuda_arch_bin "2.0 3.0 3.5 3.7 5.0 5.2 6.0 6.1") ${_arch_fermi}
elseif(CUDA_VERSION VERSION_LESS "10.0") ${_arch_kepler}
set(__cuda_arch_bin "3.0 3.5 3.7 5.0 5.2 6.0 6.1 7.0") ${_arch_maxwell}
else() ${_arch_pascal}
set(__cuda_arch_bin "3.0 3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5") ${_arch_volta}
endif() ${_arch_turing}
)
endif() endif()
endif() endif()
ocv_wipeout_deprecated(__cuda_arch_bin)
set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")