mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
add enableWinograd API for Net.
This commit is contained in:
parent
347246901e
commit
1e2ceca4df
@ -259,6 +259,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
bool fusedActivation = false;
|
bool fusedActivation = false;
|
||||||
bool fusedAdd = false;
|
bool fusedAdd = false;
|
||||||
bool isConv2D = false; // Should be deleted after fastconv branch support Conv1D and Conv3D.
|
bool isConv2D = false; // Should be deleted after fastconv branch support Conv1D and Conv3D.
|
||||||
|
bool useWinograd = false; // Flag whether to use Winograd to speed up 3x3 convolution.
|
||||||
};
|
};
|
||||||
|
|
||||||
class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer
|
class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer
|
||||||
@ -270,6 +271,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
// quantization type flag. The perChannel default is true, that means it contains the parameters
|
// quantization type flag. The perChannel default is true, that means it contains the parameters
|
||||||
// of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
|
// of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
|
||||||
bool per_channel;
|
bool per_channel;
|
||||||
|
bool useWinograd = true; // Flag whether to use Winograd to speed up 3x3 convolution.
|
||||||
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
|
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -837,6 +837,12 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
*/
|
*/
|
||||||
CV_WRAP void enableFusion(bool fusion);
|
CV_WRAP void enableFusion(bool fusion);
|
||||||
|
|
||||||
|
/** @brief Enables or disables the Winograd compute branch. The Winograd compute branch can speed up
|
||||||
|
* 3x3 Convolution at a small loss of accuracy.
|
||||||
|
* @param useWinograd true to enable the Winograd compute branch. The default is true.
|
||||||
|
*/
|
||||||
|
CV_WRAP void enableWinograd(bool useWinograd);
|
||||||
|
|
||||||
/** @brief Returns overall time for inference and timings (in ticks) for layers.
|
/** @brief Returns overall time for inference and timings (in ticks) for layers.
|
||||||
*
|
*
|
||||||
* Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
|
* Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
|
||||||
|
@ -41,7 +41,7 @@ public:
|
|||||||
BaseConvolutionLayerInt8Impl(const LayerParams ¶ms)
|
BaseConvolutionLayerInt8Impl(const LayerParams ¶ms)
|
||||||
{
|
{
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads);
|
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads, useWinograd);
|
||||||
|
|
||||||
numOutput = params.get<int>("num_output");
|
numOutput = params.get<int>("num_output");
|
||||||
int ngroups = params.get<int>("group", 1);
|
int ngroups = params.get<int>("group", 1);
|
||||||
|
@ -23,7 +23,7 @@ namespace dnn
|
|||||||
{
|
{
|
||||||
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
||||||
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
|
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
|
||||||
cv::String &padMode, std::vector<size_t>& adjust_pads);
|
cv::String &padMode, std::vector<size_t>& adjust_pads, bool& useWinograd);
|
||||||
|
|
||||||
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
|
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
|
||||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
|
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
|
||||||
|
@ -89,7 +89,8 @@ public:
|
|||||||
BaseConvolutionLayerImpl(const LayerParams ¶ms)
|
BaseConvolutionLayerImpl(const LayerParams ¶ms)
|
||||||
{
|
{
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads);
|
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations,
|
||||||
|
padMode, adjust_pads, useWinograd);
|
||||||
|
|
||||||
numOutput = params.get<int>("num_output");
|
numOutput = params.get<int>("num_output");
|
||||||
int ngroups = params.get<int>("group", 1);
|
int ngroups = params.get<int>("group", 1);
|
||||||
@ -2112,7 +2113,7 @@ public:
|
|||||||
int dilation_w = dilations.back();
|
int dilation_w = dilations.back();
|
||||||
|
|
||||||
fastConv2dImpl = initFastConv2d(ngroups, K, C, Hk, Wk, stride_w, stride_h, dilation_w,
|
fastConv2dImpl = initFastConv2d(ngroups, K, C, Hk, Wk, stride_w, stride_h, dilation_w,
|
||||||
dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0]);
|
dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0], useWinograd);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fastConv2dImpl)
|
if (fastConv2dImpl)
|
||||||
|
@ -23,7 +23,8 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
const std::vector<size_t>& pads_begin,
|
const std::vector<size_t>& pads_begin,
|
||||||
const std::vector<size_t>& pads_end,
|
const std::vector<size_t>& pads_end,
|
||||||
InputArray _weightsMat,
|
InputArray _weightsMat,
|
||||||
float* srcBias)
|
float* srcBias,
|
||||||
|
bool useWinograd)
|
||||||
{
|
{
|
||||||
Ptr<FastConv2d> conv = makePtr<FastConv2d>();
|
Ptr<FastConv2d> conv = makePtr<FastConv2d>();
|
||||||
|
|
||||||
@ -48,11 +49,11 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
const size_t wstep = weightsMat.step1();
|
const size_t wstep = weightsMat.step1();
|
||||||
|
|
||||||
#if CV_NEON // For now, winograd is ARM platform only.
|
#if CV_NEON // For now, winograd is ARM platform only.
|
||||||
if (ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 &&
|
if (useWinograd && ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 &&
|
||||||
dilation_x == 1 && dilation_y ==1 && K >= 16 && C >= 16)
|
dilation_x == 1 && dilation_y ==1 && K >= 16 && C >= 16)
|
||||||
conv->ifWinograd63 = true;
|
conv->useWinograd63 = true;
|
||||||
#else
|
#else
|
||||||
conv->ifWinograd63 = false;
|
conv->useWinograd63 = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float *srcWeights = (float *)weightsMat.data;
|
float *srcWeights = (float *)weightsMat.data;
|
||||||
@ -115,7 +116,7 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
}});
|
}});
|
||||||
|
|
||||||
// Prepare Weight for Winograd F(6x6, 3x3)
|
// Prepare Weight for Winograd F(6x6, 3x3)
|
||||||
if (conv->ifWinograd63)
|
if (conv->useWinograd63)
|
||||||
{
|
{
|
||||||
initWinograd63(conv, weightsMat, K, C);
|
initWinograd63(conv, weightsMat, K, C);
|
||||||
}
|
}
|
||||||
@ -191,10 +192,7 @@ void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if CV_NEON
|
#if CV_NEON
|
||||||
if (conv->ifWinograd63
|
if (conv->useWinograd63 && inputShape[2] > 12 && inputShape[3] > 12)
|
||||||
&& inputShape[2] > 12 && inputShape[3] > 12
|
|
||||||
&& inputShape[2] < 120 && inputShape[3] < 120
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct))
|
if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct))
|
||||||
return;
|
return;
|
||||||
|
@ -44,7 +44,7 @@ struct FastConv2d
|
|||||||
std::vector<float> weightsBuf; // For generic Conv 2D
|
std::vector<float> weightsBuf; // For generic Conv 2D
|
||||||
std::vector<float> weightsWino63Buf; // For Winograd F(6x6, 3x3).
|
std::vector<float> weightsWino63Buf; // For Winograd F(6x6, 3x3).
|
||||||
std::vector<float> biasBuf;
|
std::vector<float> biasBuf;
|
||||||
bool ifWinograd63 = false;
|
bool useWinograd63 = false;
|
||||||
bool useAVX2 = checkHardwareSupport(CPU_AVX2);
|
bool useAVX2 = checkHardwareSupport(CPU_AVX2);
|
||||||
bool useNEON = checkHardwareSupport(CPU_NEON);
|
bool useNEON = checkHardwareSupport(CPU_NEON);
|
||||||
};
|
};
|
||||||
@ -58,7 +58,7 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
const std::vector<size_t>& pads_begin,
|
const std::vector<size_t>& pads_begin,
|
||||||
const std::vector<size_t>& pads_end,
|
const std::vector<size_t>& pads_end,
|
||||||
InputArray weightsMat,
|
InputArray weightsMat,
|
||||||
float* srcBias);
|
float* srcBias, bool useWinograd);
|
||||||
|
|
||||||
// It contains different computing branches, like winograd, 1x1 conv.
|
// It contains different computing branches, like winograd, 1x1 conv.
|
||||||
void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks,
|
void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks,
|
||||||
|
@ -1689,7 +1689,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu
|
|||||||
|
|
||||||
void initWinograd63(Ptr<FastConv2d>& conv, InputArray _weightsMat, int K, int C)
|
void initWinograd63(Ptr<FastConv2d>& conv, InputArray _weightsMat, int K, int C)
|
||||||
{
|
{
|
||||||
conv->ifWinograd63 = false;
|
conv->useWinograd63 = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int runWinograd63(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct)
|
int runWinograd63(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct)
|
||||||
|
@ -187,12 +187,14 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kern
|
|||||||
|
|
||||||
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
||||||
std::vector<size_t>& pads_end, std::vector<size_t>& strides,
|
std::vector<size_t>& pads_end, std::vector<size_t>& strides,
|
||||||
std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads)
|
std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads,
|
||||||
|
bool& useWinograd)
|
||||||
{
|
{
|
||||||
util::getKernelSize(params, kernel);
|
util::getKernelSize(params, kernel);
|
||||||
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
|
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
|
||||||
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
|
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
|
||||||
util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector<size_t>(kernel.size(), 0));
|
util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector<size_t>(kernel.size(), 0));
|
||||||
|
useWinograd = params.get<bool>("use_winograd", true);
|
||||||
|
|
||||||
for (int i = 0; i < dilations.size(); i++)
|
for (int i = 0; i < dilations.size(); i++)
|
||||||
CV_Assert(dilations[i] > 0);
|
CV_Assert(dilations[i] > 0);
|
||||||
|
@ -61,7 +61,7 @@ namespace dnn
|
|||||||
{
|
{
|
||||||
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
||||||
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
|
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
|
||||||
cv::String &padMode, std::vector<size_t>& adjust_pads);
|
cv::String &padMode, std::vector<size_t>& adjust_pads, bool& useWinograd);
|
||||||
|
|
||||||
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
|
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
|
||||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
|
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
|
||||||
|
@ -395,6 +395,13 @@ void Net::enableFusion(bool fusion)
|
|||||||
return impl->enableFusion(fusion);
|
return impl->enableFusion(fusion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Net::enableWinograd(bool useWinograd)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_Assert(impl);
|
||||||
|
return impl->enableWinograd(useWinograd);
|
||||||
|
}
|
||||||
|
|
||||||
void Net::setHalideScheduler(const String& scheduler)
|
void Net::setHalideScheduler(const String& scheduler)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -55,6 +55,7 @@ Net::Impl::Impl()
|
|||||||
preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
|
preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
|
||||||
preferableTarget = DNN_TARGET_CPU;
|
preferableTarget = DNN_TARGET_CPU;
|
||||||
hasDynamicShapes = false;
|
hasDynamicShapes = false;
|
||||||
|
useWinograd = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2038,6 +2039,37 @@ void Net::Impl::getMemoryConsumption(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Net::Impl::enableWinograd(bool useWinograd_)
|
||||||
|
{
|
||||||
|
if (useWinograd != useWinograd_)
|
||||||
|
{
|
||||||
|
useWinograd = useWinograd_;
|
||||||
|
|
||||||
|
for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
|
||||||
|
{
|
||||||
|
int lid = it->first;
|
||||||
|
LayerData &ld = layers[lid];
|
||||||
|
Ptr<Layer>& currLayer = ld.layerInstance;
|
||||||
|
|
||||||
|
if (ld.type == "Convolution")
|
||||||
|
{
|
||||||
|
ld.params.set("use_winograd", useWinograd_);
|
||||||
|
Ptr<ConvolutionLayer> convLayer = ld.layerInstance.dynamicCast<ConvolutionLayer>();
|
||||||
|
if (!convLayer.empty())
|
||||||
|
convLayer->useWinograd = useWinograd_;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ld.type == "ConvolutionInt8")
|
||||||
|
{
|
||||||
|
Ptr<ConvolutionLayerInt8> convLayer = currLayer.dynamicCast<ConvolutionLayerInt8>();
|
||||||
|
ld.params.set("use_winograd", useWinograd_);
|
||||||
|
if (!convLayer.empty())
|
||||||
|
convLayer->useWinograd = useWinograd_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO drop?
|
// TODO drop?
|
||||||
void Net::Impl::getLayerTypes(std::vector<String>& layersTypes) const
|
void Net::Impl::getLayerTypes(std::vector<String>& layersTypes) const
|
||||||
|
@ -64,6 +64,7 @@ struct Net::Impl : public detail::NetImplBase
|
|||||||
bool netWasQuantized;
|
bool netWasQuantized;
|
||||||
bool fusion;
|
bool fusion;
|
||||||
bool isAsync; // FIXIT: drop
|
bool isAsync; // FIXIT: drop
|
||||||
|
bool useWinograd;
|
||||||
std::vector<int64> layersTimings;
|
std::vector<int64> layersTimings;
|
||||||
|
|
||||||
|
|
||||||
@ -211,6 +212,7 @@ struct Net::Impl : public detail::NetImplBase
|
|||||||
void enableFusion(bool fusion_);
|
void enableFusion(bool fusion_);
|
||||||
|
|
||||||
virtual void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
|
virtual void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
|
||||||
|
void enableWinograd(bool useWinograd_);
|
||||||
|
|
||||||
void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);
|
void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);
|
||||||
|
|
||||||
|
@ -51,6 +51,7 @@ Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype,
|
|||||||
setPreferableBackend(net, DNN_BACKEND_OPENCV);
|
setPreferableBackend(net, DNN_BACKEND_OPENCV);
|
||||||
setPreferableTarget(DNN_TARGET_CPU);
|
setPreferableTarget(DNN_TARGET_CPU);
|
||||||
enableFusion(false);
|
enableFusion(false);
|
||||||
|
enableWinograd(false);
|
||||||
|
|
||||||
if (calibData.isMat())
|
if (calibData.isMat())
|
||||||
{
|
{
|
||||||
|
@ -476,6 +476,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
|
|||||||
ASSERT_TRUE(!net.empty());
|
ASSERT_TRUE(!net.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
net.enableWinograd(false);
|
||||||
net.setPreferableBackend(backend);
|
net.setPreferableBackend(backend);
|
||||||
net.setPreferableTarget(target);
|
net.setPreferableTarget(target);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user