From 82616eec41f6a6989a3b507822c17fc81a10e296 Mon Sep 17 00:00:00 2001 From: zihaomu Date: Mon, 9 Jan 2023 13:40:04 +0800 Subject: [PATCH] fix possible segmentation fault error in winograd on x86 --- .../src/layers/fast_convolution/fast_convolution.avx2.cpp | 2 +- .../dnn/src/layers/fast_convolution/fast_convolution.cpp | 8 +++++++- .../src/layers/fast_convolution/winograd_3x3s1_f63.cpp | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp index 0d3c144762..c98fbe72bd 100644 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp +++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp @@ -119,7 +119,7 @@ void convBlock_AVX2(int np, const float* a, const float* b, float* c, int ldc, b void _fx_winograd_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock) { - CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4);// && _FX_WINO_ATOM_F32 == 8); + CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 8); if (iblock > 3) { for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp index 1cde7b324f..946980bebe 100644 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp +++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp @@ -105,6 +105,12 @@ Ptr initFastConv( conv->conv_type = _FX_CONV_TYPE_GENERIC; #endif +#if CV_TRY_AVX2 + // Disabel Winograd when CV_TRY_AVX2 is true, but conv->useAVX2 is false. + if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3 && !conv->useAVX2) + conv->conv_type = _FX_CONV_TYPE_GENERIC; +#endif + Mat weightsMat = _weightsMat.getMat(); auto wShape = shape(weightsMat); const size_t wstep = weightsMat.step1(); @@ -257,7 +263,7 @@ Ptr initFastConv( // we can always read MR elements starting from any valid index { int k = 0, nbias = K + VEC_ALIGN; - conv->biasBuf.reserve(nbias); + conv->biasBuf.resize(nbias); float* biasBufPtr = conv->biasBuf.data(); for(; k < K; k++) biasBufPtr[k] = srcBias ? srcBias[k] : 0.f; diff --git a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp index e3b8088410..b0ccfd0cd2 100644 --- a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp +++ b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp @@ -22,7 +22,7 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock) { #if CV_NEON && CV_NEON_AARCH64 - CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4); + CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4); if (iblock > 3) { for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, @@ -144,7 +144,7 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr, } } #elif CV_SIMD128 - CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4); + CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4); for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, outbuf += _FX_WINO_ATOM_F32) {