Merge pull request #12224 from cv3d:improvments/CUDA_detection

This commit is contained in:
Alexander Alekhin 2018-08-16 09:51:13 +00:00
commit 4e59d6fd12
2 changed files with 26 additions and 7 deletions

View File

@ -70,6 +70,12 @@ if(CUDA_FOUND)
unset(CUDA_ARCH_PTX CACHE) unset(CUDA_ARCH_PTX CACHE)
endif() endif()
SET(DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
if(WIN32 AND CMAKE_LINKER) #Workaround for VS cl.exe not being in the env. path
get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
SET(DETECT_ARCHS_COMMAND ${DETECT_ARCHS_COMMAND} "-ccbin" "${host_compiler_bindir}")
endif()
set(__cuda_arch_ptx "") set(__cuda_arch_ptx "")
if(CUDA_GENERATION STREQUAL "Fermi") if(CUDA_GENERATION STREQUAL "Fermi")
set(__cuda_arch_bin "2.0") set(__cuda_arch_bin "2.0")
@ -82,10 +88,11 @@ if(CUDA_FOUND)
elseif(CUDA_GENERATION STREQUAL "Volta") elseif(CUDA_GENERATION STREQUAL "Volta")
set(__cuda_arch_bin "7.0") set(__cuda_arch_bin "7.0")
elseif(CUDA_GENERATION STREQUAL "Auto") elseif(CUDA_GENERATION STREQUAL "Auto")
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" execute_process( COMMAND ${DETECT_ARCHS_COMMAND}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
else() else()
@ -99,10 +106,11 @@ if(CUDA_FOUND)
set(__cuda_arch_bin "3.2") set(__cuda_arch_bin "3.2")
set(__cuda_arch_ptx "") set(__cuda_arch_ptx "")
elseif(AARCH64) elseif(AARCH64)
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" execute_process( COMMAND ${DETECT_ARCHS_COMMAND}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
set(__cuda_arch_bin "5.3 6.2 7.0") set(__cuda_arch_bin "5.3 6.2 7.0")

View File

@ -1,14 +1,25 @@
#include <stdio.h> #include <iostream>
#include <sstream>
#include <list>
int main() int main()
{ {
std::ostringstream arch;
std::list<std::string> archs;
int count = 0; int count = 0;
if (cudaSuccess != cudaGetDeviceCount(&count)){return -1;} if (cudaSuccess != cudaGetDeviceCount(&count)){ return -1; }
if (count == 0) {return -1;} if (count == 0) { return -1; }
for (int device = 0; device < count; ++device) for (int device = 0; device < count; ++device)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue;} if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue; }
printf("%d.%d ", prop.major, prop.minor); arch << prop.major << "." << prop.minor;
archs.push_back(arch.str());
arch.str("");
} }
archs.unique(); #Some devices might have the same arch
for (std::list<std::string>::iterator it=archs.begin(); it!=archs.end(); ++it)
std::cout << *it << " ";
return 0; return 0;
} }