mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #12224 from cv3d:improvments/CUDA_detection
This commit is contained in:
commit
4e59d6fd12
@ -70,6 +70,12 @@ if(CUDA_FOUND)
|
||||
unset(CUDA_ARCH_PTX CACHE)
|
||||
endif()
|
||||
|
||||
SET(DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
|
||||
if(WIN32 AND CMAKE_LINKER) #Workaround for VS cl.exe not being in the env. path
|
||||
get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
|
||||
SET(DETECT_ARCHS_COMMAND ${DETECT_ARCHS_COMMAND} "-ccbin" "${host_compiler_bindir}")
|
||||
endif()
|
||||
|
||||
set(__cuda_arch_ptx "")
|
||||
if(CUDA_GENERATION STREQUAL "Fermi")
|
||||
set(__cuda_arch_bin "2.0")
|
||||
@ -82,10 +88,11 @@ if(CUDA_FOUND)
|
||||
elseif(CUDA_GENERATION STREQUAL "Volta")
|
||||
set(__cuda_arch_bin "7.0")
|
||||
elseif(CUDA_GENERATION STREQUAL "Auto")
|
||||
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
|
||||
execute_process( COMMAND ${DETECT_ARCHS_COMMAND}
|
||||
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
|
||||
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
|
||||
if(NOT _nvcc_res EQUAL 0)
|
||||
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
|
||||
else()
|
||||
@ -99,10 +106,11 @@ if(CUDA_FOUND)
|
||||
set(__cuda_arch_bin "3.2")
|
||||
set(__cuda_arch_ptx "")
|
||||
elseif(AARCH64)
|
||||
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
|
||||
execute_process( COMMAND ${DETECT_ARCHS_COMMAND}
|
||||
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
|
||||
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
|
||||
if(NOT _nvcc_res EQUAL 0)
|
||||
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
|
||||
set(__cuda_arch_bin "5.3 6.2 7.0")
|
||||
|
@ -1,14 +1,25 @@
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <list>
|
||||
|
||||
int main()
|
||||
{
|
||||
std::ostringstream arch;
|
||||
std::list<std::string> archs;
|
||||
|
||||
int count = 0;
|
||||
if (cudaSuccess != cudaGetDeviceCount(&count)){return -1;}
|
||||
if (count == 0) {return -1;}
|
||||
if (cudaSuccess != cudaGetDeviceCount(&count)){ return -1; }
|
||||
if (count == 0) { return -1; }
|
||||
for (int device = 0; device < count; ++device)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue;}
|
||||
printf("%d.%d ", prop.major, prop.minor);
|
||||
if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue; }
|
||||
arch << prop.major << "." << prop.minor;
|
||||
archs.push_back(arch.str());
|
||||
arch.str("");
|
||||
}
|
||||
archs.unique(); #Some devices might have the same arch
|
||||
for (std::list<std::string>::iterator it=archs.begin(); it!=archs.end(); ++it)
|
||||
std::cout << *it << " ";
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user