enable tensor cores for fp16 convolutions

2025-08-06 14:36:36 +08:00 · 2019-12-16 15:38:12 +05:30 · 2019-12-16 15:38:12 +05:30 · cf93df41fc
commit cf93df41fc
parent c2b6c67431
1 changed files with 2 additions and 0 deletions
--- a/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp
+++ b/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp
@ -224,6 +224,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
                    );
                }
                CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionGroupCount(descriptor, group_count));
+                if (std::is_same<T, half>::value)
+                    CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionMathType(descriptor, CUDNN_TENSOR_OP_MATH));
            } catch (...) {
                /* cudnnDestroyConvolutionDescriptor will not fail for a valid desriptor object */
                CUDA4DNN_CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(descriptor));