cmake: backport CUDA scripts

This commit is contained in:
Alexander Alekhin 2020-07-08 07:28:40 +00:00
parent 950a916952
commit e0f9eac521
3 changed files with 143 additions and 30 deletions

View File

@ -1,13 +1,14 @@
if(WIN32 AND NOT MSVC) if((WIN32 AND NOT MSVC) OR OPENCV_CMAKE_FORCE_CUDA)
message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).") message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).")
return() return()
endif() endif()
if(NOT UNIX AND CV_CLANG) if((NOT UNIX AND CV_CLANG) OR OPENCV_CMAKE_FORCE_CUDA)
message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).") message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).")
return() return()
endif() endif()
#set(OPENCV_CMAKE_CUDA_DEBUG 1)
if(((NOT CMAKE_VERSION VERSION_LESS "3.9.0") # requires https://gitlab.kitware.com/cmake/cmake/merge_requests/663 if(((NOT CMAKE_VERSION VERSION_LESS "3.9.0") # requires https://gitlab.kitware.com/cmake/cmake/merge_requests/663
OR OPENCV_CUDA_FORCE_EXTERNAL_CMAKE_MODULE) OR OPENCV_CUDA_FORCE_EXTERNAL_CMAKE_MODULE)
@ -43,7 +44,7 @@ if(CUDA_FOUND)
endif() endif()
if(WITH_NVCUVID) if(WITH_NVCUVID)
macro(SEARCH_NVCUVID_HEADER _filename _result) macro(ocv_cuda_SEARCH_NVCUVID_HEADER _filename _result)
# place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR # place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR
find_path(_header_result find_path(_header_result
${_filename} ${_filename}
@ -60,8 +61,8 @@ if(CUDA_FOUND)
endif() endif()
unset(_header_result CACHE) unset(_header_result CACHE)
endmacro() endmacro()
SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER) ocv_cuda_SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER)
SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER) ocv_cuda_SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
find_cuda_helper_libs(nvcuvid) find_cuda_helper_libs(nvcuvid)
if(WIN32) if(WIN32)
find_cuda_helper_libs(nvcuvenc) find_cuda_helper_libs(nvcuvenc)
@ -102,32 +103,89 @@ if(CUDA_FOUND)
unset(CUDA_ARCH_PTX CACHE) unset(CUDA_ARCH_PTX CACHE)
endif() endif()
if(OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin")
# already specified by user
elseif(CUDA_HOST_COMPILER AND EXISTS "${CUDA_HOST_COMPILER}")
LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER}")
elseif(WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path
get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir}")
else()
if(CUDA_HOST_COMPILER)
message(STATUS "CUDA: CUDA_HOST_COMPILER='${CUDA_HOST_COMPILER}' is not valid, autodetection may not work. Specify OPENCV_CUDA_DETECTION_NVCC_FLAGS with -ccbin option for fix that")
endif()
endif()
macro(ocv_filter_available_architecture result_list) macro(ocv_filter_available_architecture result_list)
if(DEFINED CUDA_SUPPORTED_CC) set(__cache_key_check "${ARGN} : ${CUDA_NVCC_EXECUTABLE} ${OPENCV_CUDA_DETECTION_NVCC_FLAGS}")
set(${result_list} "${CUDA_SUPPORTED_CC}") if(DEFINED OPENCV_CACHE_CUDA_SUPPORTED_CC AND OPENCV_CACHE_CUDA_SUPPORTED_CC_check STREQUAL __cache_key_check)
set(${result_list} "${OPENCV_CACHE_CUDA_SUPPORTED_CC}")
else() else()
set(CC_LIST ${ARGN}) set(CC_LIST ${ARGN})
foreach(target_arch ${CC_LIST}) foreach(target_arch ${CC_LIST})
string(REPLACE "." "" target_arch_short "${target_arch}") string(REPLACE "." "" target_arch_short "${target_arch}")
set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}") set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}")
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" set(_cmd "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" --compile)
execute_process(
COMMAND ${_cmd}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out RESULT_VARIABLE _nvcc_res
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) OUTPUT_VARIABLE _nvcc_out
ERROR_VARIABLE _nvcc_err
#ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(OPENCV_CMAKE_CUDA_DEBUG)
message(WARNING "COMMAND: ${_cmd}")
message(STATUS "Result: ${_nvcc_res}")
message(STATUS "Out: ${_nvcc_out}")
message(STATUS "Err: ${_nvcc_err}")
endif()
if(_nvcc_res EQUAL 0) if(_nvcc_res EQUAL 0)
set(${result_list} "${${result_list}} ${target_arch}") LIST(APPEND ${result_list} "${target_arch}")
endif() endif()
endforeach() endforeach()
string(STRIP "${${result_list}}" ${result_list}) string(STRIP "${${result_list}}" ${result_list})
set(CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability") if(" ${${result_list}}" STREQUAL " ")
message(WARNING "CUDA: Autodetection arch list is empty. Please enable OPENCV_CMAKE_CUDA_DEBUG=1 and check/specify OPENCV_CUDA_DETECTION_NVCC_FLAGS variable")
endif()
# cache detected values
set(OPENCV_CACHE_CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "")
set(OPENCV_CACHE_CUDA_SUPPORTED_CC_check "${__cache_key_check}" CACHE INTERNAL "")
endif() endif()
endmacro() endmacro()
macro(ocv_detect_native_cuda_arch status output) macro(ocv_detect_native_cuda_arch status output)
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" set(OPENCV_CUDA_DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
set(__cache_key_check "${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
if(DEFINED OPENCV_CACHE_CUDA_ACTIVE_CC AND OPENCV_CACHE_CUDA_ACTIVE_CC_check STREQUAL __cache_key_check)
set(${output} "${OPENCV_CACHE_CUDA_ACTIVE_CC}")
set(${status} 0)
else()
execute_process(
COMMAND ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE ${status} OUTPUT_VARIABLE ${output} RESULT_VARIABLE ${status}
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) OUTPUT_VARIABLE _nvcc_out
ERROR_VARIABLE _nvcc_err
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(OPENCV_CMAKE_CUDA_DEBUG)
message(WARNING "COMMAND: ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
message(STATUS "Result: ${${status}}")
message(STATUS "Out: ${_nvcc_out}")
message(STATUS "Err: ${_nvcc_err}")
endif()
string(REGEX REPLACE ".*\n" "" ${output} "${_nvcc_out}") #Strip leading warning messages, if any
if(${status} EQUAL 0)
# cache detected values
set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${result_list}} CACHE INTERNAL "")
set(OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check}" CACHE INTERNAL "")
endif()
endif()
endmacro() endmacro()
macro(ocv_wipeout_deprecated _arch_bin_list) macro(ocv_wipeout_deprecated _arch_bin_list)
@ -156,6 +214,9 @@ if(CUDA_FOUND)
else() else()
string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}") string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}")
endif() endif()
elseif(CUDA_ARCH_BIN)
message(STATUS "CUDA: Using CUDA_ARCH_BIN=${CUDA_ARCH_BIN}")
set(__cuda_arch_bin ${CUDA_ARCH_BIN})
endif() endif()
if(NOT DEFINED __cuda_arch_bin) if(NOT DEFINED __cuda_arch_bin)
@ -163,7 +224,11 @@ if(CUDA_FOUND)
set(__cuda_arch_bin "3.2") set(__cuda_arch_bin "3.2")
set(__cuda_arch_ptx "") set(__cuda_arch_ptx "")
elseif(AARCH64) elseif(AARCH64)
if(NOT CMAKE_CROSSCOMPILING)
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
else()
set(_nvcc_res -1) # emulate error, see below
endif()
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0)
@ -197,11 +262,9 @@ if(CUDA_FOUND)
string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}") string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
# Ckeck if user specified 1.0 compute capability: we don't support it # Check if user specified 1.0 compute capability: we don't support it
string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}") if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " 1.0")
set(CUDA_ARCH_BIN_OR_PTX_10 0) message(SEND_ERROR "CUDA: 1.0 compute capability is not supported - exclude it from ARCH/PTX list are re-run CMake")
if(NOT ${HAS_ARCH_10} STREQUAL "")
set(CUDA_ARCH_BIN_OR_PTX_10 1)
endif() endif()
# NVCC flags to be set # NVCC flags to be set
@ -312,6 +375,16 @@ if(CUDA_FOUND)
if(UNIX OR APPLE) if(UNIX OR APPLE)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC)
if(
ENABLE_CXX11
AND NOT " ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_DEBUG} ${CUDA_NVCC_FLAGS}" MATCHES "-std="
)
if(CUDA_VERSION VERSION_LESS "11.0")
list(APPEND CUDA_NVCC_FLAGS "--std=c++11")
else()
list(APPEND CUDA_NVCC_FLAGS "--std=c++14")
endif()
endif()
endif() endif()
if(APPLE) if(APPLE)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
@ -379,7 +452,53 @@ if(HAVE_CUDA)
if(CMAKE_GENERATOR MATCHES "Visual Studio" if(CMAKE_GENERATOR MATCHES "Visual Studio"
AND NOT OPENCV_SKIP_CUDA_CMAKE_SUPPRESS_REGENERATION AND NOT OPENCV_SKIP_CUDA_CMAKE_SUPPRESS_REGENERATION
) )
message(WARNING "CUDA with MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.") message(STATUS "CUDA: MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.")
set(CMAKE_SUPPRESS_REGENERATION ON) set(CMAKE_SUPPRESS_REGENERATION ON)
endif() endif()
endif() endif()
# ----------------------------------------------------------------------------
# Add CUDA libraries (needed for apps/tools, samples)
# ----------------------------------------------------------------------------
if(HAVE_CUDA)
# details: https://github.com/NVIDIA/nvidia-docker/issues/775
if(" ${CUDA_CUDA_LIBRARY}" MATCHES "/stubs/libcuda.so" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND)
set(CUDA_STUB_ENABLED_LINK_WORKAROUND 1)
if(EXISTS "${CUDA_CUDA_LIBRARY}" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND_RPATH_LINK)
set(CUDA_STUB_TARGET_PATH "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/")
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
RESULT_VARIABLE CUDA_STUB_SYMLINK_RESULT)
if(NOT CUDA_STUB_SYMLINK_RESULT EQUAL 0)
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
RESULT_VARIABLE CUDA_STUB_COPY_RESULT)
if(NOT CUDA_STUB_COPY_RESULT EQUAL 0)
set(CUDA_STUB_ENABLED_LINK_WORKAROUND 0)
endif()
endif()
if(CUDA_STUB_ENABLED_LINK_WORKAROUND)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,\"${CUDA_STUB_TARGET_PATH}\"")
endif()
else()
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-shlib-undefined")
endif()
if(NOT CUDA_STUB_ENABLED_LINK_WORKAROUND)
message(WARNING "CUDA: workaround for stubs/libcuda.so.1 is not applied")
endif()
endif()
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
if(HAVE_CUBLAS)
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cublas_LIBRARY})
endif()
if(HAVE_CUFFT)
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cufft_LIBRARY})
endif()
foreach(p ${CUDA_LIBS_PATH})
if(MSVC AND CMAKE_GENERATOR MATCHES "Ninja|JOM")
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}"${p}")
else()
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}${p})
endif()
endforeach()
endif()

View File

@ -13,9 +13,6 @@
/* Compile for 'real' NVIDIA GPU architectures */ /* Compile for 'real' NVIDIA GPU architectures */
#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
/* Create PTX or BIN for 1.0 compute capability */
#cmakedefine CUDA_ARCH_BIN_OR_PTX_10
/* NVIDIA GPU features are used */ /* NVIDIA GPU features are used */
#define CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES}" #define CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES}"

View File

@ -82,9 +82,6 @@
# error "Insufficient Cuda Runtime library version, please update it." # error "Insufficient Cuda Runtime library version, please update it."
# endif # endif
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
# error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0"
# endif
#endif #endif
//! @cond IGNORED //! @cond IGNORED