From 4b2a33a5c64a9925c210fc287278f0e659b4c5fb Mon Sep 17 00:00:00 2001 From: Johnny Date: Sat, 25 Jan 2025 07:51:27 +0100 Subject: [PATCH] Merge pull request #26820 from johnnynunez:patch-1 Initial support Blackwell GPU arch #26820 10.0 blackwell b100/b200 12.0 blackwell rtx50 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- cmake/OpenCVDetectCUDAUtils.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmake/OpenCVDetectCUDAUtils.cmake b/cmake/OpenCVDetectCUDAUtils.cmake index 97676628ab..187740a664 100644 --- a/cmake/OpenCVDetectCUDAUtils.cmake +++ b/cmake/OpenCVDetectCUDAUtils.cmake @@ -95,7 +95,7 @@ endfunction() macro(ocv_initialize_nvidia_device_generations) OCV_OPTION(CUDA_ENABLE_DEPRECATED_GENERATION "Enable deprecated generations in the list" OFF) - set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper") + set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper" "Blackwell") if(CUDA_ENABLE_DEPRECATED_GENERATION) set(_generations "Fermi" "${_generations}") set(_generations "Kepler" "${_generations}") @@ -109,6 +109,7 @@ macro(ocv_initialize_nvidia_device_generations) set(_arch_ampere "8.0;8.6") set(_arch_lovelace "8.9") set(_arch_hopper "9.0") + set(_arch_blackwell "10.0;12.0") if(NOT CMAKE_CROSSCOMPILING) list(APPEND _generations "Auto") endif() @@ -246,6 +247,8 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable) set(__cuda_arch_bin ${_arch_lovelace}) elseif(CUDA_GENERATION STREQUAL "Hopper") set(__cuda_arch_bin ${_arch_hopper}) + elseif(CUDA_GENERATION STREQUAL "Blackwell") + set(__cuda_arch_bin ${_arch_blackwell}) elseif(CUDA_GENERATION STREQUAL "Auto") ocv_detect_native_cuda_arch(${nvcc_executable} _nvcc_res _nvcc_out) if(NOT _nvcc_res EQUAL 0) @@ -270,13 +273,14 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable) endif() if(NOT _nvcc_res EQUAL 0) message(STATUS "CUDA: Automatic detection of CUDA generation failed. Going to build for all known architectures") - # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7) + # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7) Thor (10.1) ocv_filter_available_architecture(${nvcc_executable} __cuda_arch_bin 5.3 6.2 7.2 7.0 8.7 + 10.1 ) else() set(__cuda_arch_bin "${_nvcc_out}") @@ -293,6 +297,7 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable) ${_arch_ampere} ${_arch_lovelace} ${_arch_hopper} + ${_arch_blackwell} ) list(GET __cuda_arch_bin -1 __cuda_arch_ptx) endif()