Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2020-06-23 19:15:47 +00:00
commit c81d785ada
4 changed files with 19 additions and 5 deletions

View File

@ -82,13 +82,14 @@ if(CUDA_FOUND)
message(STATUS "CUDA detected: " ${CUDA_VERSION}) message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing") set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere")
set(_arch_fermi "2.0") set(_arch_fermi "2.0")
set(_arch_kepler "3.0;3.5;3.7") set(_arch_kepler "3.0;3.5;3.7")
set(_arch_maxwell "5.0;5.2") set(_arch_maxwell "5.0;5.2")
set(_arch_pascal "6.0;6.1") set(_arch_pascal "6.0;6.1")
set(_arch_volta "7.0") set(_arch_volta "7.0")
set(_arch_turing "7.5") set(_arch_turing "7.5")
set(_arch_ampere "8.0")
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
list(APPEND _generations "Auto") list(APPEND _generations "Auto")
endif() endif()
@ -163,6 +164,8 @@ if(CUDA_FOUND)
set(__cuda_arch_bin ${_arch_volta}) set(__cuda_arch_bin ${_arch_volta})
elseif(CUDA_GENERATION STREQUAL "Turing") elseif(CUDA_GENERATION STREQUAL "Turing")
set(__cuda_arch_bin ${_arch_turing}) set(__cuda_arch_bin ${_arch_turing})
elseif(CUDA_GENERATION STREQUAL "Ampere")
set(__cuda_arch_bin ${_arch_ampere})
elseif(CUDA_GENERATION STREQUAL "Auto") elseif(CUDA_GENERATION STREQUAL "Auto")
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
@ -180,7 +183,13 @@ if(CUDA_FOUND)
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
set(__cuda_arch_bin "5.3 6.2 7.2") # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0)
ocv_filter_available_architecture(__cuda_arch_bin
5.3
6.2
7.2
7.0
)
else() else()
set(__cuda_arch_bin "${_nvcc_out}") set(__cuda_arch_bin "${_nvcc_out}")
endif() endif()
@ -193,6 +202,7 @@ if(CUDA_FOUND)
${_arch_pascal} ${_arch_pascal}
${_arch_volta} ${_arch_volta}
${_arch_turing} ${_arch_turing}
${_arch_ampere}
) )
endif() endif()
endif() endif()

View File

@ -346,7 +346,8 @@
year = {2003}, year = {2003},
pages = {363--370}, pages = {363--370},
publisher = {Springer}, publisher = {Springer},
url = {https://arxiv.org/pdf/1808.01752} url = {https://doi.org/10.1007/3-540-45103-X_50},
doi = {10.1007/3-540-45103-X_50}
} }
@inproceedings{Farsiu03, @inproceedings{Farsiu03,
author = {Farsiu, Sina and Robinson, Dirk and Elad, Michael and Milanfar, Peyman}, author = {Farsiu, Sina and Robinson, Dirk and Elad, Michael and Milanfar, Peyman},

View File

@ -797,7 +797,7 @@ namespace cv {
int classes = getParam<int>(layer_params, "classes", -1); int classes = getParam<int>(layer_params, "classes", -1);
int num_of_anchors = getParam<int>(layer_params, "num", -1); int num_of_anchors = getParam<int>(layer_params, "num", -1);
float thresh = getParam<float>(layer_params, "thresh", 0.2); float thresh = getParam<float>(layer_params, "thresh", 0.2);
float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.4); float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.0);
float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0); float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0);
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string()); std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());

View File

@ -763,8 +763,11 @@ struct MishFunctor : public BaseFunctor
{ {
for( int i = 0; i < len; i++ ) for( int i = 0; i < len; i++ )
{ {
// Use fast approximation introduced in https://github.com/opencv/opencv/pull/17200
float x = srcptr[i]; float x = srcptr[i];
dstptr[i] = x * tanh(log(1.0f + exp(x))); float eX = exp(std::min(x, 20.f));
float n = (eX + 2) * eX;
dstptr[i] = (x * n) / (n + 2);
} }
} }
} }