Merge pull request #26820 from johnnynunez:patch-1

Initial support Blackwell GPU arch #26820 
 
10.0 blackwell b100/b200
12.0 blackwell rtx50

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Johnny 2025-01-25 07:51:27 +01:00 committed by GitHub
parent 6bffa64af4
commit 4b2a33a5c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -95,7 +95,7 @@ endfunction()
macro(ocv_initialize_nvidia_device_generations)
OCV_OPTION(CUDA_ENABLE_DEPRECATED_GENERATION "Enable deprecated generations in the list" OFF)
set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper")
set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper" "Blackwell")
if(CUDA_ENABLE_DEPRECATED_GENERATION)
set(_generations "Fermi" "${_generations}")
set(_generations "Kepler" "${_generations}")
@ -109,6 +109,7 @@ macro(ocv_initialize_nvidia_device_generations)
set(_arch_ampere "8.0;8.6")
set(_arch_lovelace "8.9")
set(_arch_hopper "9.0")
set(_arch_blackwell "10.0;12.0")
if(NOT CMAKE_CROSSCOMPILING)
list(APPEND _generations "Auto")
endif()
@ -246,6 +247,8 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
set(__cuda_arch_bin ${_arch_lovelace})
elseif(CUDA_GENERATION STREQUAL "Hopper")
set(__cuda_arch_bin ${_arch_hopper})
elseif(CUDA_GENERATION STREQUAL "Blackwell")
set(__cuda_arch_bin ${_arch_blackwell})
elseif(CUDA_GENERATION STREQUAL "Auto")
ocv_detect_native_cuda_arch(${nvcc_executable} _nvcc_res _nvcc_out)
if(NOT _nvcc_res EQUAL 0)
@ -270,13 +273,14 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
endif()
if(NOT _nvcc_res EQUAL 0)
message(STATUS "CUDA: Automatic detection of CUDA generation failed. Going to build for all known architectures")
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7)
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7) Thor (10.1)
ocv_filter_available_architecture(${nvcc_executable} __cuda_arch_bin
5.3
6.2
7.2
7.0
8.7
10.1
)
else()
set(__cuda_arch_bin "${_nvcc_out}")
@ -293,6 +297,7 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
${_arch_ampere}
${_arch_lovelace}
${_arch_hopper}
${_arch_blackwell}
)
list(GET __cuda_arch_bin -1 __cuda_arch_ptx)
endif()