mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
Merge pull request #16436 from YashasSamaga:feature-enetb0-yolo
dnn(darknet-importer): add grouped convolutions, sigmoid, swish, scale_channels * update darknet importer to support enetb0-yolo * remove dropout (pr16438) and fix formatting * add test for scale_channels * disable batch testing for scale channels * do not set LayerParams::name * merge all activations into setActivation
This commit is contained in:
parent
0bcdf7d03e
commit
490908f0ff
@ -149,7 +149,7 @@ namespace cv {
|
||||
|
||||
|
||||
void setConvolution(int kernel, int pad, int stride,
|
||||
int filters_num, int channels_num, int use_batch_normalize)
|
||||
int filters_num, int channels_num, int groups, int use_batch_normalize)
|
||||
{
|
||||
cv::dnn::LayerParams conv_param =
|
||||
getParamConvolution(kernel, pad, stride, filters_num);
|
||||
@ -162,6 +162,8 @@ namespace cv {
|
||||
conv_param.set<bool>("bias_term", true);
|
||||
}
|
||||
|
||||
conv_param.set<int>("group", groups);
|
||||
|
||||
lp.layer_name = layer_name;
|
||||
lp.layer_type = conv_param.type;
|
||||
lp.layerParams = conv_param;
|
||||
@ -215,15 +217,30 @@ namespace cv {
|
||||
fused_layer_names.push_back(last_layer);
|
||||
}
|
||||
|
||||
void setReLU()
|
||||
void setActivation(String type)
|
||||
{
|
||||
cv::dnn::LayerParams activation_param;
|
||||
activation_param.set<float>("negative_slope", 0.1f);
|
||||
activation_param.name = "ReLU-name";
|
||||
activation_param.type = "ReLU";
|
||||
if (type == "relu")
|
||||
{
|
||||
activation_param.set<float>("negative_slope", 0.1f);
|
||||
activation_param.type = "ReLU";
|
||||
}
|
||||
else if (type == "swish")
|
||||
{
|
||||
activation_param.type = "Swish";
|
||||
}
|
||||
else if (type == "logistic")
|
||||
{
|
||||
activation_param.type = "Sigmoid";
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(cv::Error::StsParseError, "Unsupported activation: " + type);
|
||||
}
|
||||
|
||||
std::string layer_name = cv::format("%s_%d", type.c_str(), layer_id);
|
||||
|
||||
darknet::LayerParameter lp;
|
||||
std::string layer_name = cv::format("relu_%d", layer_id);
|
||||
lp.layer_name = layer_name;
|
||||
lp.layer_type = activation_param.type;
|
||||
lp.layerParams = activation_param;
|
||||
@ -487,6 +504,25 @@ namespace cv {
|
||||
fused_layer_names.push_back(last_layer);
|
||||
}
|
||||
|
||||
void setScaleChannels(int from)
|
||||
{
|
||||
cv::dnn::LayerParams shortcut_param;
|
||||
shortcut_param.type = "Scale";
|
||||
|
||||
darknet::LayerParameter lp;
|
||||
std::string layer_name = cv::format("scale_channels_%d", layer_id);
|
||||
lp.layer_name = layer_name;
|
||||
lp.layer_type = shortcut_param.type;
|
||||
lp.layerParams = shortcut_param;
|
||||
lp.bottom_indexes.push_back(fused_layer_names.at(from));
|
||||
lp.bottom_indexes.push_back(last_layer);
|
||||
last_layer = layer_name;
|
||||
net->layers.push_back(lp);
|
||||
|
||||
layer_id++;
|
||||
fused_layer_names.push_back(last_layer);
|
||||
}
|
||||
|
||||
void setUpsample(int scaleFactor)
|
||||
{
|
||||
cv::dnn::LayerParams param;
|
||||
@ -608,6 +644,7 @@ namespace cv {
|
||||
int padding = getParam<int>(layer_params, "padding", 0);
|
||||
int stride = getParam<int>(layer_params, "stride", 1);
|
||||
int filters = getParam<int>(layer_params, "filters", -1);
|
||||
int groups = getParam<int>(layer_params, "groups", 1);
|
||||
bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
|
||||
int flipped = getParam<int>(layer_params, "flipped", 0);
|
||||
if (flipped == 1)
|
||||
@ -618,9 +655,10 @@ namespace cv {
|
||||
|
||||
CV_Assert(kernel_size > 0 && filters > 0);
|
||||
CV_Assert(tensor_shape[0] > 0);
|
||||
CV_Assert(tensor_shape[0] % groups == 0);
|
||||
|
||||
setParams.setConvolution(kernel_size, padding, stride, filters, tensor_shape[0],
|
||||
batch_normalize);
|
||||
groups, batch_normalize);
|
||||
|
||||
tensor_shape[0] = filters;
|
||||
tensor_shape[1] = (tensor_shape[1] - kernel_size + 2 * padding) / stride + 1;
|
||||
@ -727,6 +765,14 @@ namespace cv {
|
||||
from = from < 0 ? from + layers_counter : from;
|
||||
setParams.setShortcut(from, alpha);
|
||||
}
|
||||
else if (layer_type == "scale_channels")
|
||||
{
|
||||
std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
|
||||
CV_Assert(!bottom_layer.empty());
|
||||
int from = std::atoi(bottom_layer.c_str());
|
||||
from = from < 0 ? from + layers_counter : from;
|
||||
setParams.setScaleChannels(from);
|
||||
}
|
||||
else if (layer_type == "upsample")
|
||||
{
|
||||
int scaleFactor = getParam<int>(layer_params, "stride", 1);
|
||||
@ -761,7 +807,15 @@ namespace cv {
|
||||
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
|
||||
if (activation == "leaky")
|
||||
{
|
||||
setParams.setReLU();
|
||||
setParams.setActivation("relu");
|
||||
}
|
||||
else if (activation == "swish")
|
||||
{
|
||||
setParams.setActivation("swish");
|
||||
}
|
||||
else if (activation == "logistic")
|
||||
{
|
||||
setParams.setActivation("logistic");
|
||||
}
|
||||
else if (activation != "linear")
|
||||
CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
|
||||
@ -818,13 +872,15 @@ namespace cv {
|
||||
{
|
||||
int kernel_size = getParam<int>(layer_params, "size", -1);
|
||||
filters = getParam<int>(layer_params, "filters", -1);
|
||||
int groups = getParam<int>(layer_params, "groups", 1);
|
||||
use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
|
||||
|
||||
CV_Assert(kernel_size > 0 && filters > 0);
|
||||
CV_Assert(tensor_shape[0] > 0);
|
||||
CV_Assert(tensor_shape[0] % groups == 0);
|
||||
|
||||
weights_size = filters * tensor_shape[0] * kernel_size * kernel_size;
|
||||
int sizes_weights[] = { filters, tensor_shape[0], kernel_size, kernel_size };
|
||||
weights_size = filters * (tensor_shape[0] / groups) * kernel_size * kernel_size;
|
||||
int sizes_weights[] = { filters, tensor_shape[0] / groups, kernel_size, kernel_size };
|
||||
weightsBlob.create(4, sizes_weights, CV_32F);
|
||||
}
|
||||
else
|
||||
@ -879,8 +935,8 @@ namespace cv {
|
||||
}
|
||||
|
||||
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
|
||||
if(activation == "leaky")
|
||||
++cv_layers_counter; // For ReLU
|
||||
if(activation == "leaky" || activation == "swish" || activation == "logistic")
|
||||
++cv_layers_counter; // For ReLU, Swish, Sigmoid
|
||||
|
||||
if(!darknet_layers_counter)
|
||||
tensor_shape.resize(1);
|
||||
|
@ -97,7 +97,7 @@ TEST(Test_Darknet, read_yolo_voc_stream)
|
||||
class Test_Darknet_layers : public DNNTestLayer
|
||||
{
|
||||
public:
|
||||
void testDarknetLayer(const std::string& name, bool hasWeights = false)
|
||||
void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true)
|
||||
{
|
||||
SCOPED_TRACE(name);
|
||||
Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
|
||||
@ -117,7 +117,7 @@ public:
|
||||
Mat out = net.forward();
|
||||
normAssert(out, ref, "", default_l1, default_lInf);
|
||||
|
||||
if (inp.size[0] == 1) // test handling of batch size
|
||||
if (inp.size[0] == 1 && testBatchProcessing) // test handling of batch size
|
||||
{
|
||||
SCOPED_TRACE("batch size 2");
|
||||
|
||||
@ -552,6 +552,12 @@ TEST_P(Test_Darknet_layers, convolutional)
|
||||
testDarknetLayer("convolutional", true);
|
||||
}
|
||||
|
||||
TEST_P(Test_Darknet_layers, scale_channels)
|
||||
{
|
||||
// TODO: test fails for batches due to a bug/missing feature in ScaleLayer
|
||||
testDarknetLayer("scale_channels", false, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_Darknet_layers, connected)
|
||||
{
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
|
Loading…
Reference in New Issue
Block a user