diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index d7056eb0cd..6740fdcf1a 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -82,13 +82,14 @@ if(CUDA_FOUND) message(STATUS "CUDA detected: " ${CUDA_VERSION}) - set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing") + set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere") set(_arch_fermi "2.0") set(_arch_kepler "3.0;3.5;3.7") set(_arch_maxwell "5.0;5.2") set(_arch_pascal "6.0;6.1") set(_arch_volta "7.0") set(_arch_turing "7.5") + set(_arch_ampere "8.0") if(NOT CMAKE_CROSSCOMPILING) list(APPEND _generations "Auto") endif() @@ -163,6 +164,8 @@ if(CUDA_FOUND) set(__cuda_arch_bin ${_arch_volta}) elseif(CUDA_GENERATION STREQUAL "Turing") set(__cuda_arch_bin ${_arch_turing}) + elseif(CUDA_GENERATION STREQUAL "Ampere") + set(__cuda_arch_bin ${_arch_ampere}) elseif(CUDA_GENERATION STREQUAL "Auto") ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) if(NOT _nvcc_res EQUAL 0) @@ -180,7 +183,13 @@ if(CUDA_FOUND) ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) if(NOT _nvcc_res EQUAL 0) message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") - set(__cuda_arch_bin "5.3 6.2 7.2") + # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) + ocv_filter_available_architecture(__cuda_arch_bin + 5.3 + 6.2 + 7.2 + 7.0 + ) else() set(__cuda_arch_bin "${_nvcc_out}") endif() @@ -193,6 +202,7 @@ if(CUDA_FOUND) ${_arch_pascal} ${_arch_volta} ${_arch_turing} + ${_arch_ampere} ) endif() endif() diff --git a/doc/opencv.bib b/doc/opencv.bib index 901fbe86ee..73c4668ff8 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -346,7 +346,8 @@ year = {2003}, pages = {363--370}, publisher = {Springer}, - url = {https://arxiv.org/pdf/1808.01752} + url = {https://doi.org/10.1007/3-540-45103-X_50}, + doi = {10.1007/3-540-45103-X_50} } @inproceedings{Farsiu03, author = {Farsiu, Sina and Robinson, Dirk and Elad, Michael and Milanfar, Peyman}, diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 699b7447ef..664a1d617b 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -797,7 +797,7 @@ namespace cv { int classes = getParam(layer_params, "classes", -1); int num_of_anchors = getParam(layer_params, "num", -1); float thresh = getParam(layer_params, "thresh", 0.2); - float nms_threshold = getParam(layer_params, "nms_threshold", 0.4); + float nms_threshold = getParam(layer_params, "nms_threshold", 0.0); float scale_x_y = getParam(layer_params, "scale_x_y", 1.0); std::string anchors_values = getParam(layer_params, "anchors", std::string()); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 85b39ddfea..3765472d80 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -763,8 +763,11 @@ struct MishFunctor : public BaseFunctor { for( int i = 0; i < len; i++ ) { + // Use fast approximation introduced in https://github.com/opencv/opencv/pull/17200 float x = srcptr[i]; - dstptr[i] = x * tanh(log(1.0f + exp(x))); + float eX = exp(std::min(x, 20.f)); + float n = (eX + 2) * eX; + dstptr[i] = (x * n) / (n + 2); } } }