mirror of
https://github.com/opencv/opencv.git
synced 2025-06-10 02:53:07 +08:00
fixed bug at winograd of SIMD128 and more robust code.
This commit is contained in:
parent
5d292826b2
commit
cee8c86b6e
@ -20,6 +20,7 @@ static void depthWiseBlock(const float *inptr, float *outptr, const float *weigh
|
|||||||
int inner_ybottom, bool ifMinMaxAct, bool useSIMD, bool is3x3)
|
int inner_ybottom, bool ifMinMaxAct, bool useSIMD, bool is3x3)
|
||||||
{
|
{
|
||||||
#if CV_SIMD128
|
#if CV_SIMD128
|
||||||
|
const int VEC_NLANES = 4;
|
||||||
v_float32x4 vminval = v_setall_f32(minval), vmaxval = v_setall_f32(maxval);
|
v_float32x4 vminval = v_setall_f32(minval), vmaxval = v_setall_f32(maxval);
|
||||||
|
|
||||||
v_float32x4 w0 = v_setall_f32(
|
v_float32x4 w0 = v_setall_f32(
|
||||||
@ -110,7 +111,7 @@ static void depthWiseBlock(const float *inptr, float *outptr, const float *weigh
|
|||||||
{
|
{
|
||||||
if (dy0 == 3)
|
if (dy0 == 3)
|
||||||
{
|
{
|
||||||
for (; x0 <= x1 - FAST_VEC_NLANES; x0 += FAST_VEC_NLANES)
|
for (; x0 <= x1 - VEC_NLANES; x0 += VEC_NLANES)
|
||||||
{
|
{
|
||||||
int xi_ = x0 * stride_x - pad_left;
|
int xi_ = x0 * stride_x - pad_left;
|
||||||
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
||||||
@ -186,7 +187,7 @@ static void depthWiseBlock(const float *inptr, float *outptr, const float *weigh
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (; x0 <= x1 - FAST_VEC_NLANES; x0 += FAST_VEC_NLANES)
|
for (; x0 <= x1 - VEC_NLANES; x0 += VEC_NLANES)
|
||||||
{
|
{
|
||||||
int xi_ = x0 * stride_x - pad_left;
|
int xi_ = x0 * stride_x - pad_left;
|
||||||
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
||||||
@ -211,7 +212,7 @@ static void depthWiseBlock(const float *inptr, float *outptr, const float *weigh
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (; x0 <= x1 - FAST_VEC_NLANES; x0 += FAST_VEC_NLANES)
|
for (; x0 <= x1 - VEC_NLANES; x0 += VEC_NLANES)
|
||||||
{
|
{
|
||||||
int xi_ = x0 * stride_x - pad_left, k = 0;
|
int xi_ = x0 * stride_x - pad_left, k = 0;
|
||||||
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
||||||
@ -314,7 +315,12 @@ void runDepthwise(InputArray _input, OutputArray _output, const Ptr<FastConv2d>&
|
|||||||
int pad_top = conv->pad_top, pad_bottom = conv->pad_bottom;
|
int pad_top = conv->pad_top, pad_bottom = conv->pad_bottom;
|
||||||
int pad_left = conv->pad_left, pad_right = conv->pad_right;
|
int pad_left = conv->pad_left, pad_right = conv->pad_right;
|
||||||
|
|
||||||
int ksize = Hk * Wk, padded_ksize = ((ksize + FAST_VEC_NLANES - 1) / FAST_VEC_NLANES) * FAST_VEC_NLANES;
|
int VEC_NLANES = 4;
|
||||||
|
#if CV_TRY_AVX2
|
||||||
|
if (conv->useAVX2)
|
||||||
|
VEC_NLANES = 8;
|
||||||
|
#endif
|
||||||
|
int ksize = Hk * Wk, padded_ksize = ((ksize + VEC_NLANES - 1) / VEC_NLANES) * VEC_NLANES;
|
||||||
|
|
||||||
const float *inp = input.ptr<float>();
|
const float *inp = input.ptr<float>();
|
||||||
float *out = output.ptr<float>();
|
float *out = output.ptr<float>();
|
||||||
|
@ -78,6 +78,7 @@ void depthWiseBlock_AVX2(const float *inptr, float *outptr, const float *weights
|
|||||||
int dilation_y, int stride_x, int stride_y, int inner_xleft, int inner_xright, int inner_ytop,
|
int dilation_y, int stride_x, int stride_y, int inner_xleft, int inner_xright, int inner_ytop,
|
||||||
int inner_ybottom, bool ifMinMaxAct, bool useSIMD, bool is3x3)
|
int inner_ybottom, bool ifMinMaxAct, bool useSIMD, bool is3x3)
|
||||||
{
|
{
|
||||||
|
const int VEC_NLANES = 8;
|
||||||
__m256 vminval = _mm256_set1_ps(minval);
|
__m256 vminval = _mm256_set1_ps(minval);
|
||||||
__m256 vmaxval = _mm256_set1_ps(maxval);
|
__m256 vmaxval = _mm256_set1_ps(maxval);
|
||||||
|
|
||||||
@ -174,7 +175,7 @@ void depthWiseBlock_AVX2(const float *inptr, float *outptr, const float *weights
|
|||||||
{
|
{
|
||||||
if (dy0 == 3)
|
if (dy0 == 3)
|
||||||
{
|
{
|
||||||
for (; x0 <= x1 - FAST_VEC_NLANES; x0 += FAST_VEC_NLANES)
|
for (; x0 <= x1 - VEC_NLANES; x0 += VEC_NLANES)
|
||||||
{
|
{
|
||||||
int xi_ = x0 * stride_x - pad_left;
|
int xi_ = x0 * stride_x - pad_left;
|
||||||
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
||||||
@ -250,7 +251,7 @@ void depthWiseBlock_AVX2(const float *inptr, float *outptr, const float *weights
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (; x0 <= x1 - FAST_VEC_NLANES; x0 += FAST_VEC_NLANES)
|
for (; x0 <= x1 - VEC_NLANES; x0 += VEC_NLANES)
|
||||||
{
|
{
|
||||||
int xi_ = x0 * stride_x - pad_left;
|
int xi_ = x0 * stride_x - pad_left;
|
||||||
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
||||||
@ -276,7 +277,7 @@ void depthWiseBlock_AVX2(const float *inptr, float *outptr, const float *weights
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (; x0 <= x1 - FAST_VEC_NLANES; x0 += FAST_VEC_NLANES)
|
for (; x0 <= x1 - VEC_NLANES; x0 += VEC_NLANES)
|
||||||
{
|
{
|
||||||
int xi_ = x0 * stride_x - pad_left, k = 0;
|
int xi_ = x0 * stride_x - pad_left, k = 0;
|
||||||
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
const float *inptr_xi = inptr + Wi * yi_ + xi_;
|
||||||
@ -701,7 +702,6 @@ void _fx_winograd_AtXA_8x8_f32(const float* inptr, int inpstep,
|
|||||||
z50 = _mm256_add_ps(vbias, z50);
|
z50 = _mm256_add_ps(vbias, z50);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO make sure the lenght of bpptr is 8.
|
|
||||||
if (bpptr)
|
if (bpptr)
|
||||||
{
|
{
|
||||||
z00 = _mm256_add_ps(z00, _mm256_loadu_ps(bpptr));
|
z00 = _mm256_add_ps(z00, _mm256_loadu_ps(bpptr));
|
||||||
|
@ -49,6 +49,15 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
useWinograd && ((conv->useSIMD128 || conv->useAVX2 || conv->useNEON) && Hk == 3 && Wk == 3 &&
|
useWinograd && ((conv->useSIMD128 || conv->useAVX2 || conv->useNEON) && Hk == 3 && Wk == 3 &&
|
||||||
dilation_y == 1 && dilation_x == 1 && stride_y == 1 && stride_x == 1) ? _FX_CONV_TYPE_WINOGRAD3X3 :
|
dilation_y == 1 && dilation_x == 1 && stride_y == 1 && stride_x == 1) ? _FX_CONV_TYPE_WINOGRAD3X3 :
|
||||||
_FX_CONV_TYPE_GENERIC;
|
_FX_CONV_TYPE_GENERIC;
|
||||||
|
|
||||||
|
int VEC_NLANES = 4;
|
||||||
|
#if CV_TRY_AVX2
|
||||||
|
if (!conv->useAVX2 && conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3) // convert Winograd to generic conv.
|
||||||
|
conv->conv_type = _FX_CONV_TYPE_GENERIC;
|
||||||
|
if (conv->useAVX2)
|
||||||
|
VEC_NLANES = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
Mat weightsMat = _weightsMat.getMat();
|
Mat weightsMat = _weightsMat.getMat();
|
||||||
auto wShape = shape(weightsMat);
|
auto wShape = shape(weightsMat);
|
||||||
const size_t wstep = weightsMat.step1();
|
const size_t wstep = weightsMat.step1();
|
||||||
@ -61,7 +70,7 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
int ksize = Hk*Wk;
|
int ksize = Hk*Wk;
|
||||||
|
|
||||||
// this code aims to let memory fit with vector size.
|
// this code aims to let memory fit with vector size.
|
||||||
int padded_ksize = ((ksize + FAST_VEC_NLANES-1) / FAST_VEC_NLANES) * FAST_VEC_NLANES;
|
int padded_ksize = ((ksize + VEC_NLANES-1) / VEC_NLANES) * VEC_NLANES;
|
||||||
int nweights = C*padded_ksize;
|
int nweights = C*padded_ksize;
|
||||||
conv->weightsBuf.reserve(nweights + VEC_ALIGN);
|
conv->weightsBuf.reserve(nweights + VEC_ALIGN);
|
||||||
conv->weightsBufPtr = alignPtr(conv->weightsBuf.data(), VEC_ALIGN);
|
conv->weightsBufPtr = alignPtr(conv->weightsBuf.data(), VEC_ALIGN);
|
||||||
@ -265,7 +274,8 @@ void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>
|
|||||||
else if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3 && inputShape[2] >= 12 && inputShape[3] >= 12) // winograd
|
else if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3 && inputShape[2] >= 12 && inputShape[3] >= 12) // winograd
|
||||||
{
|
{
|
||||||
CV_Assert(conv->weightsWinoBufPtr);
|
CV_Assert(conv->weightsWinoBufPtr);
|
||||||
return runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct);
|
if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct))
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int N = inputShape[0], C = inputShape[1], Hi = inputShape[2], Wi = inputShape[3]; // [N, C, H, W]
|
int N = inputShape[0], C = inputShape[1], Hi = inputShape[2], Wi = inputShape[3]; // [N, C, H, W]
|
||||||
|
@ -12,35 +12,25 @@
|
|||||||
#if CV_NEON && CV_NEON_AARCH64 // 32 registers.
|
#if CV_NEON && CV_NEON_AARCH64 // 32 registers.
|
||||||
#define CONV_MR 4
|
#define CONV_MR 4
|
||||||
#define CONV_NR 28
|
#define CONV_NR 28
|
||||||
enum { FAST_VEC_NLANES=4 };
|
|
||||||
#elif CV_NEON // 16 registers.
|
#elif CV_NEON // 16 registers.
|
||||||
#define CONV_MR 4
|
#define CONV_MR 4
|
||||||
#define CONV_NR 12
|
#define CONV_NR 12
|
||||||
enum { FAST_VEC_NLANES=4 };
|
|
||||||
#else // SIMD 128, AVX or AVX2
|
#else // SIMD 128, AVX or AVX2
|
||||||
#define CONV_MR 4
|
#define CONV_MR 4
|
||||||
#define CONV_NR 24
|
#define CONV_NR 24
|
||||||
|
|
||||||
#if CV_TRY_AVX2
|
|
||||||
enum { FAST_VEC_NLANES=8 }; // AVX2
|
|
||||||
#else
|
|
||||||
enum { FAST_VEC_NLANES=4 }; // SIMD 128
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Winograd Params
|
||||||
enum {
|
enum {
|
||||||
_FX_WINO_STEP=6,
|
_FX_WINO_STEP=6,
|
||||||
_FX_WINO_KSIZE=3,
|
_FX_WINO_KSIZE=3,
|
||||||
_FX_WINO_SIZE=_FX_WINO_STEP+_FX_WINO_KSIZE-1,
|
_FX_WINO_SIZE=_FX_WINO_STEP+_FX_WINO_KSIZE-1,
|
||||||
_FX_WINO_AREA=_FX_WINO_SIZE*_FX_WINO_SIZE,
|
_FX_WINO_AREA=_FX_WINO_SIZE*_FX_WINO_SIZE,
|
||||||
|
|
||||||
#if CV_TRY_AVX2 || (CV_NEON && CV_NEON_AARCH64)
|
|
||||||
_FX_WINO_KBLOCK = 4,
|
_FX_WINO_KBLOCK = 4,
|
||||||
|
#if (CV_NEON && CV_NEON_AARCH64) || CV_TRY_AVX2
|
||||||
_FX_WINO_IBLOCK = 6,
|
_FX_WINO_IBLOCK = 6,
|
||||||
#else
|
#else
|
||||||
_FX_WINO_KBLOCK = 4,
|
|
||||||
_FX_WINO_IBLOCK = 3,
|
_FX_WINO_IBLOCK = 3,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -52,8 +42,8 @@ enum {
|
|||||||
|
|
||||||
_FX_WINO_NATOMS_F32 = _FX_WINO_AREA / _FX_WINO_ATOM_F32, // for AVX2, it is 8, otherwise, it's 16.
|
_FX_WINO_NATOMS_F32 = _FX_WINO_AREA / _FX_WINO_ATOM_F32, // for AVX2, it is 8, otherwise, it's 16.
|
||||||
};
|
};
|
||||||
|
|
||||||
enum { _FX_CONV_TYPE_GENERIC=0, _FX_CONV_TYPE_DEPTHWISE=1, _FX_CONV_TYPE_WINOGRAD3X3=2 };
|
enum { _FX_CONV_TYPE_GENERIC=0, _FX_CONV_TYPE_DEPTHWISE=1, _FX_CONV_TYPE_WINOGRAD3X3=2 };
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace cv {
|
namespace cv {
|
||||||
namespace dnn {
|
namespace dnn {
|
||||||
@ -77,8 +67,18 @@ struct FastConv2d
|
|||||||
#else
|
#else
|
||||||
bool useSIMD128 = false;
|
bool useSIMD128 = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CV_TRY_AVX2
|
||||||
bool useAVX2 = checkHardwareSupport(CPU_AVX2);
|
bool useAVX2 = checkHardwareSupport(CPU_AVX2);
|
||||||
|
#else
|
||||||
|
bool useAVX2 = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CV_NEON
|
||||||
bool useNEON = checkHardwareSupport(CPU_NEON);
|
bool useNEON = checkHardwareSupport(CPU_NEON);
|
||||||
|
#else
|
||||||
|
bool useNEON = false;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
// return a FastConv2d instance.
|
// return a FastConv2d instance.
|
||||||
@ -99,7 +99,7 @@ void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>
|
|||||||
void runDepthwise(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, float minval, float maxval,
|
void runDepthwise(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, float minval, float maxval,
|
||||||
ActivationLayer* activ, bool ifMinMaxAct);
|
ActivationLayer* activ, bool ifMinMaxAct);
|
||||||
|
|
||||||
void runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks,
|
int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks,
|
||||||
float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct);
|
float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct);
|
||||||
|
|
||||||
} // namespace dnn
|
} // namespace dnn
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
#include "fast_convolution.hpp"
|
#include "fast_convolution.hpp"
|
||||||
|
|
||||||
namespace cv { namespace dnn {
|
namespace cv { namespace dnn {
|
||||||
|
|
||||||
|
#if CV_NEON || CV_SIMD128 || CV_TRY_AVX2
|
||||||
enum { VEC_ALIGN = 32, DFT_TYPE = CV_32F }; // Memory alignment.
|
enum { VEC_ALIGN = 32, DFT_TYPE = CV_32F }; // Memory alignment.
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -141,7 +143,7 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr,
|
|||||||
vst1q_f32(outbuf + 20*64, s32);
|
vst1q_f32(outbuf + 20*64, s32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_SIMD
|
#elif CV_SIMD128
|
||||||
CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4);
|
CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4);
|
||||||
for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++,
|
for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++,
|
||||||
outbuf += _FX_WINO_ATOM_F32)
|
outbuf += _FX_WINO_ATOM_F32)
|
||||||
@ -183,15 +185,15 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr,
|
|||||||
v_store(outbuf, s00);
|
v_store(outbuf, s00);
|
||||||
v_store(outbuf + 1*64, s01);
|
v_store(outbuf + 1*64, s01);
|
||||||
v_store(outbuf + 2*64, s02);
|
v_store(outbuf + 2*64, s02);
|
||||||
v_store(outbuf + 6*64, s10);
|
v_store(outbuf + 3*64, s10);
|
||||||
v_store(outbuf + 7*64, s11);
|
v_store(outbuf + 4*64, s11);
|
||||||
v_store(outbuf + 8*64, s12);
|
v_store(outbuf + 5*64, s12);
|
||||||
v_store(outbuf + 12*64, s20);
|
v_store(outbuf + 6*64, s20);
|
||||||
v_store(outbuf + 13*64, s21);
|
v_store(outbuf + 7*64, s21);
|
||||||
v_store(outbuf + 14*64, s22);
|
v_store(outbuf + 8*64, s22);
|
||||||
v_store(outbuf + 18*64, s30);
|
v_store(outbuf + 9*64, s30);
|
||||||
v_store(outbuf + 19*64, s31);
|
v_store(outbuf + 10*64, s31);
|
||||||
v_store(outbuf + 20*64, s32);
|
v_store(outbuf + 11*64, s32);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32;
|
for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32;
|
||||||
@ -406,7 +408,7 @@ _fx_winograd_BtXB_8x8_f32(const float* inptr, int inpstep,
|
|||||||
vst1q_f32(outptr + outstep*13, z61);
|
vst1q_f32(outptr + outstep*13, z61);
|
||||||
vst1q_f32(outptr + outstep*14, z70);
|
vst1q_f32(outptr + outstep*14, z70);
|
||||||
vst1q_f32(outptr + outstep*15, z71);
|
vst1q_f32(outptr + outstep*15, z71);
|
||||||
#elif CV_SIMD
|
#elif CV_SIMD128
|
||||||
v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4);
|
v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4);
|
||||||
v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4);
|
v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4);
|
||||||
v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4);
|
v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4);
|
||||||
@ -750,8 +752,7 @@ _fx_winograd_AtXA_8x8_f32(const float* inptr, int inpstep,
|
|||||||
vst1_f32(outptr + outstep*4 + 4, vget_low_f32(z41));
|
vst1_f32(outptr + outstep*4 + 4, vget_low_f32(z41));
|
||||||
vst1q_f32(outptr + outstep*5, z50);
|
vst1q_f32(outptr + outstep*5, z50);
|
||||||
vst1_f32(outptr + outstep*5 + 4, vget_low_f32(z51));
|
vst1_f32(outptr + outstep*5 + 4, vget_low_f32(z51));
|
||||||
//#elif CV_AVX2
|
#elif CV_SIMD128
|
||||||
#elif CV_SIMD
|
|
||||||
v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4);
|
v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4);
|
||||||
v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4);
|
v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4);
|
||||||
v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4);
|
v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4);
|
||||||
@ -919,7 +920,7 @@ _fx_winograd_AtXA_8x8_f32(const float* inptr, int inpstep,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr<FastConv2d>& conv,
|
int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr<FastConv2d>& conv,
|
||||||
int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct)
|
int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct)
|
||||||
{
|
{
|
||||||
Mat input = _input.getMat();
|
Mat input = _input.getMat();
|
||||||
@ -1138,5 +1139,15 @@ void runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outp
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}});
|
}});
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr<FastConv2d>& conv,
|
||||||
|
int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}} // namespace cv::dnn
|
}} // namespace cv::dnn
|
||||||
|
Loading…
Reference in New Issue
Block a user