mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00

Add support for YOLOv4x-mish * backport to 3.4 for supporting yolov4x-mish * add YOLOv4x-mish test * address review comments Co-authored-by: Guo Xu <guoxu@1school.com.cn>
1053 lines
48 KiB
C++
1053 lines
48 KiB
C++
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
//
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
// If you do not agree to this license, do not download, install,
|
|
// copy or use the software.
|
|
//
|
|
//
|
|
// License Agreement
|
|
// For Open Source Computer Vision Library
|
|
// (3-clause BSD License)
|
|
//
|
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
// are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
//
|
|
// * Neither the names of the copyright holders nor the names of the contributors
|
|
// may be used to endorse or promote products derived from this software
|
|
// without specific prior written permission.
|
|
//
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
// In no event shall copyright holders or contributors be liable for any direct,
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
//
|
|
//M*/
|
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//MIT License
|
|
//
|
|
//Copyright (c) 2017 Joseph Redmon
|
|
//
|
|
//Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
//of this software and associated documentation files (the "Software"), to deal
|
|
//in the Software without restriction, including without limitation the rights
|
|
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
//copies of the Software, and to permit persons to whom the Software is
|
|
//furnished to do so, subject to the following conditions:
|
|
//
|
|
//The above copyright notice and this permission notice shall be included in all
|
|
//copies or substantial portions of the Software.
|
|
//
|
|
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
//SOFTWARE.
|
|
//
|
|
//M*/
|
|
|
|
#include "../precomp.hpp"
|
|
#include <opencv2/dnn/shape_utils.hpp>
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
|
|
#include "darknet_io.hpp"
|
|
|
|
namespace cv {
|
|
namespace dnn {
|
|
namespace darknet {
|
|
|
|
template<typename T>
|
|
T getParam(const std::map<std::string, std::string> ¶ms, const std::string param_name, T init_val)
|
|
{
|
|
std::map<std::string, std::string>::const_iterator it = params.find(param_name);
|
|
if (it != params.end()) {
|
|
std::stringstream ss(it->second);
|
|
ss >> init_val;
|
|
}
|
|
return init_val;
|
|
}
|
|
|
|
static const std::string kFirstLayerName = "data";
|
|
|
|
class setLayersParams {
|
|
|
|
NetParameter *net;
|
|
int layer_id;
|
|
std::string last_layer;
|
|
std::vector<std::string> fused_layer_names;
|
|
|
|
public:
|
|
setLayersParams(NetParameter *_net) :
|
|
net(_net), layer_id(0), last_layer(kFirstLayerName)
|
|
{}
|
|
|
|
void setLayerBlobs(int i, std::vector<cv::Mat> blobs)
|
|
{
|
|
cv::dnn::LayerParams ¶ms = net->layers[i].layerParams;
|
|
params.blobs = blobs;
|
|
}
|
|
|
|
void setBatchNorm()
|
|
{
|
|
cv::dnn::LayerParams bn_param;
|
|
|
|
bn_param.name = "BatchNorm-name";
|
|
bn_param.type = "BatchNorm";
|
|
bn_param.set<bool>("has_weight", true);
|
|
bn_param.set<bool>("has_bias", true);
|
|
bn_param.set<float>("eps", 1E-6); // .000001f in Darknet Yolo
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("bn_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = bn_param.type;
|
|
lp.layerParams = bn_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
}
|
|
|
|
cv::dnn::LayerParams getParamConvolution(int kernel, int pad,
|
|
int stride, int filters_num)
|
|
{
|
|
cv::dnn::LayerParams params;
|
|
params.name = "Convolution-name";
|
|
params.type = "Convolution";
|
|
|
|
params.set<int>("kernel_size", kernel);
|
|
params.set<int>("pad", pad);
|
|
params.set<int>("stride", stride);
|
|
|
|
params.set<bool>("bias_term", false); // true only if(BatchNorm == false)
|
|
params.set<int>("num_output", filters_num);
|
|
|
|
return params;
|
|
}
|
|
|
|
|
|
void setConvolution(int kernel, int pad, int stride,
|
|
int filters_num, int channels_num, int groups, int use_batch_normalize)
|
|
{
|
|
cv::dnn::LayerParams conv_param =
|
|
getParamConvolution(kernel, pad, stride, filters_num);
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("conv_%d", layer_id);
|
|
|
|
// use BIAS in any case
|
|
if (!use_batch_normalize) {
|
|
conv_param.set<bool>("bias_term", true);
|
|
}
|
|
|
|
conv_param.set<int>("group", groups);
|
|
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = conv_param.type;
|
|
lp.layerParams = conv_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
if (use_batch_normalize)
|
|
setBatchNorm();
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
cv::dnn::LayerParams getParamFullyConnected(int output)
|
|
{
|
|
cv::dnn::LayerParams params;
|
|
params.name = "FullyConnected-name";
|
|
params.type = "InnerProduct";
|
|
|
|
params.set<bool>("bias_term", false); // true only if(BatchNorm == false)
|
|
params.set<int>("num_output", output);
|
|
|
|
return params;
|
|
}
|
|
|
|
void setFullyConnected(int output, int use_batch_normalize)
|
|
{
|
|
cv::dnn::LayerParams fullyconnected_param =
|
|
getParamFullyConnected(output);
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("fullyConnected_%d", layer_id);
|
|
|
|
// use BIAS in any case
|
|
if (!use_batch_normalize) {
|
|
fullyconnected_param.set<bool>("bias_term", true);
|
|
}
|
|
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = fullyconnected_param.type;
|
|
lp.layerParams = fullyconnected_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
if (use_batch_normalize)
|
|
setBatchNorm();
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setActivation(String type)
|
|
{
|
|
cv::dnn::LayerParams activation_param;
|
|
if (type == "relu")
|
|
{
|
|
activation_param.type = "ReLU";
|
|
}
|
|
else if (type == "leaky")
|
|
{
|
|
activation_param.set<float>("negative_slope", 0.1f);
|
|
activation_param.type = "ReLU";
|
|
}
|
|
else if (type == "swish")
|
|
{
|
|
activation_param.type = "Swish";
|
|
}
|
|
else if (type == "mish")
|
|
{
|
|
activation_param.type = "Mish";
|
|
}
|
|
else if (type == "logistic")
|
|
{
|
|
activation_param.type = "Sigmoid";
|
|
}
|
|
else if (type == "tanh")
|
|
{
|
|
activation_param.type = "TanH";
|
|
}
|
|
else
|
|
{
|
|
CV_Error(cv::Error::StsParseError, "Unsupported activation: " + type);
|
|
}
|
|
|
|
std::string layer_name = cv::format("%s_%d", type.c_str(), layer_id);
|
|
|
|
darknet::LayerParameter lp;
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = activation_param.type;
|
|
lp.layerParams = activation_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
fused_layer_names.back() = last_layer;
|
|
}
|
|
|
|
void setMaxpool(int kernel, int pad, int stride)
|
|
{
|
|
cv::dnn::LayerParams maxpool_param;
|
|
maxpool_param.set<cv::String>("pool", "max");
|
|
maxpool_param.set<int>("kernel_size", kernel);
|
|
maxpool_param.set<int>("pad_l", floor((float)pad / 2));
|
|
maxpool_param.set<int>("pad_r", ceil((float)pad / 2));
|
|
maxpool_param.set<int>("pad_t", floor((float)pad / 2));
|
|
maxpool_param.set<int>("pad_b", ceil((float)pad / 2));
|
|
maxpool_param.set<bool>("ceil_mode", false);
|
|
maxpool_param.set<int>("stride", stride);
|
|
maxpool_param.name = "Pooling-name";
|
|
maxpool_param.type = "Pooling";
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("pool_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = maxpool_param.type;
|
|
lp.layerParams = maxpool_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setAvgpool()
|
|
{
|
|
cv::dnn::LayerParams avgpool_param;
|
|
avgpool_param.set<cv::String>("pool", "ave");
|
|
avgpool_param.set<bool>("global_pooling", true);
|
|
avgpool_param.name = "Pooling-name";
|
|
avgpool_param.type = "Pooling";
|
|
darknet::LayerParameter lp;
|
|
|
|
std::string layer_name = cv::format("avgpool_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = avgpool_param.type;
|
|
lp.layerParams = avgpool_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setSoftmax()
|
|
{
|
|
cv::dnn::LayerParams softmax_param;
|
|
softmax_param.name = "Softmax-name";
|
|
softmax_param.type = "Softmax";
|
|
darknet::LayerParameter lp;
|
|
|
|
std::string layer_name = cv::format("softmax_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = softmax_param.type;
|
|
lp.layerParams = softmax_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setConcat(int number_of_inputs, int *input_indexes)
|
|
{
|
|
cv::dnn::LayerParams concat_param;
|
|
concat_param.name = "Concat-name";
|
|
concat_param.type = "Concat";
|
|
concat_param.set<int>("axis", 1); // channels are in axis = 1
|
|
|
|
darknet::LayerParameter lp;
|
|
|
|
std::string layer_name = cv::format("concat_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = concat_param.type;
|
|
lp.layerParams = concat_param;
|
|
for (int i = 0; i < number_of_inputs; ++i)
|
|
lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i]));
|
|
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setIdentity(int bottom_index)
|
|
{
|
|
cv::dnn::LayerParams identity_param;
|
|
identity_param.name = "Identity-name";
|
|
identity_param.type = "Identity";
|
|
|
|
darknet::LayerParameter lp;
|
|
|
|
std::string layer_name = cv::format("identity_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = identity_param.type;
|
|
lp.layerParams = identity_param;
|
|
lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index));
|
|
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setSlice(int input_index, int split_size, int group_id)
|
|
{
|
|
int begin[] = {0, split_size * group_id, 0, 0};
|
|
cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin, 4);
|
|
|
|
int end[] = {-1, begin[1] + split_size, -1, -1};
|
|
cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end, 4);
|
|
|
|
darknet::LayerParameter lp;
|
|
lp.layer_name = cv::format("slice_%d", layer_id);
|
|
lp.layer_type = "Slice";
|
|
lp.layerParams.set("begin", paramBegin);
|
|
lp.layerParams.set("end", paramEnd);
|
|
|
|
lp.bottom_indexes.push_back(fused_layer_names.at(input_index));
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
last_layer = lp.layer_name;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setReorg(int stride)
|
|
{
|
|
cv::dnn::LayerParams reorg_params;
|
|
reorg_params.name = "Reorg-name";
|
|
reorg_params.type = "Reorg";
|
|
reorg_params.set<int>("reorg_stride", stride);
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("reorg_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = reorg_params.type;
|
|
lp.layerParams = reorg_params;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setPermute(bool isDarknetLayer = true)
|
|
{
|
|
cv::dnn::LayerParams permute_params;
|
|
permute_params.name = "Permute-name";
|
|
permute_params.type = "Permute";
|
|
int permute[] = { 0, 2, 3, 1 };
|
|
cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4);
|
|
|
|
permute_params.set("order", paramOrder);
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("permute_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = permute_params.type;
|
|
lp.layerParams = permute_params;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
if (isDarknetLayer)
|
|
{
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
}
|
|
|
|
void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData)
|
|
{
|
|
cv::dnn::LayerParams region_param;
|
|
region_param.name = "Region-name";
|
|
region_param.type = "Region";
|
|
|
|
region_param.set<float>("thresh", thresh);
|
|
region_param.set<int>("coords", coords);
|
|
region_param.set<int>("classes", classes);
|
|
region_param.set<int>("anchors", anchors);
|
|
region_param.set<int>("classfix", classfix);
|
|
region_param.set<bool>("softmax_tree", softmax_tree);
|
|
region_param.set<bool>("softmax", softmax);
|
|
|
|
cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone();
|
|
region_param.blobs.push_back(biasData_mat);
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = "detection_out";
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = region_param.type;
|
|
lp.layerParams = region_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold, float scale_x_y, int new_coords)
|
|
{
|
|
cv::dnn::LayerParams region_param;
|
|
region_param.name = "Region-name";
|
|
region_param.type = "Region";
|
|
|
|
const int numAnchors = mask.size();
|
|
|
|
region_param.set<int>("classes", classes);
|
|
region_param.set<int>("anchors", numAnchors);
|
|
region_param.set<bool>("logistic", true);
|
|
region_param.set<float>("thresh", thresh);
|
|
region_param.set<float>("nms_threshold", nms_threshold);
|
|
region_param.set<float>("scale_x_y", scale_x_y);
|
|
region_param.set<int>("new_coords", new_coords);
|
|
|
|
std::vector<float> usedAnchors(numAnchors * 2);
|
|
for (int i = 0; i < numAnchors; ++i)
|
|
{
|
|
usedAnchors[i * 2] = anchors[mask[i] * 2];
|
|
usedAnchors[i * 2 + 1] = anchors[mask[i] * 2 + 1];
|
|
}
|
|
|
|
cv::Mat biasData_mat = cv::Mat(1, numAnchors * 2, CV_32F, &usedAnchors[0]).clone();
|
|
region_param.blobs.push_back(biasData_mat);
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("yolo_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = region_param.type;
|
|
lp.layerParams = region_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
lp.bottom_indexes.push_back(kFirstLayerName);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setShortcut(int from, float alpha)
|
|
{
|
|
cv::dnn::LayerParams shortcut_param;
|
|
shortcut_param.name = "Shortcut-name";
|
|
shortcut_param.type = "Eltwise";
|
|
|
|
if (alpha != 1)
|
|
{
|
|
std::vector<float> coeffs(2, 1);
|
|
coeffs[0] = alpha;
|
|
shortcut_param.set("coeff", DictValue::arrayReal<float*>(&coeffs[0], coeffs.size()));
|
|
}
|
|
|
|
shortcut_param.set<std::string>("op", "sum");
|
|
shortcut_param.set<std::string>("output_channels_mode", "input_0_truncate");
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("shortcut_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = shortcut_param.type;
|
|
lp.layerParams = shortcut_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
lp.bottom_indexes.push_back(fused_layer_names.at(from));
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setScaleChannels(int from)
|
|
{
|
|
cv::dnn::LayerParams shortcut_param;
|
|
shortcut_param.type = "Scale";
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("scale_channels_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = shortcut_param.type;
|
|
lp.layerParams = shortcut_param;
|
|
lp.bottom_indexes.push_back(fused_layer_names.at(from));
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setSAM(int from)
|
|
{
|
|
cv::dnn::LayerParams eltwise_param;
|
|
eltwise_param.name = "SAM-name";
|
|
eltwise_param.type = "Eltwise";
|
|
|
|
eltwise_param.set<std::string>("operation", "prod");
|
|
eltwise_param.set<std::string>("output_channels_mode", "same");
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("sam_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = eltwise_param.type;
|
|
lp.layerParams = eltwise_param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
lp.bottom_indexes.push_back(fused_layer_names.at(from));
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
|
|
void setUpsample(int scaleFactor)
|
|
{
|
|
cv::dnn::LayerParams param;
|
|
param.name = "Upsample-name";
|
|
param.type = "Resize";
|
|
|
|
param.set<int>("zoom_factor", scaleFactor);
|
|
param.set<String>("interpolation", "nearest");
|
|
|
|
darknet::LayerParameter lp;
|
|
std::string layer_name = cv::format("upsample_%d", layer_id);
|
|
lp.layer_name = layer_name;
|
|
lp.layer_type = param.type;
|
|
lp.layerParams = param;
|
|
lp.bottom_indexes.push_back(last_layer);
|
|
last_layer = layer_name;
|
|
net->layers.push_back(lp);
|
|
|
|
layer_id++;
|
|
fused_layer_names.push_back(last_layer);
|
|
}
|
|
};
|
|
|
|
std::string escapeString(const std::string &src)
|
|
{
|
|
std::string dst;
|
|
for (size_t i = 0; i < src.size(); ++i)
|
|
if (src[i] > ' ' && src[i] <= 'z')
|
|
dst += src[i];
|
|
return dst;
|
|
}
|
|
|
|
template<typename T>
|
|
std::vector<T> getNumbers(const std::string &src)
|
|
{
|
|
std::vector<T> dst;
|
|
std::stringstream ss(src);
|
|
|
|
for (std::string str; std::getline(ss, str, ',');) {
|
|
std::stringstream line(str);
|
|
T val;
|
|
line >> val;
|
|
dst.push_back(val);
|
|
}
|
|
return dst;
|
|
}
|
|
|
|
bool ReadDarknetFromCfgStream(std::istream &ifile, NetParameter *net)
|
|
{
|
|
bool read_net = false;
|
|
int layers_counter = -1;
|
|
for (std::string line; std::getline(ifile, line);) {
|
|
line = escapeString(line);
|
|
if (line.empty()) continue;
|
|
switch (line[0]) {
|
|
case '\0': break;
|
|
case '#': break;
|
|
case ';': break;
|
|
case '[':
|
|
if (line == "[net]") {
|
|
read_net = true;
|
|
}
|
|
else {
|
|
// read section
|
|
read_net = false;
|
|
++layers_counter;
|
|
const size_t layer_type_size = line.find(']') - 1;
|
|
CV_Assert(layer_type_size < line.size());
|
|
std::string layer_type = line.substr(1, layer_type_size);
|
|
net->layers_cfg[layers_counter]["layer_type"] = layer_type;
|
|
}
|
|
break;
|
|
default:
|
|
// read entry
|
|
const size_t separator_index = line.find('=');
|
|
CV_Assert(separator_index < line.size());
|
|
if (separator_index != std::string::npos) {
|
|
std::string name = line.substr(0, separator_index);
|
|
std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1));
|
|
name = escapeString(name);
|
|
value = escapeString(value);
|
|
if (name.empty() || value.empty()) continue;
|
|
if (read_net)
|
|
net->net_cfg[name] = value;
|
|
else
|
|
net->layers_cfg[layers_counter][name] = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"];
|
|
std::vector<float> vec = getNumbers<float>(anchors);
|
|
std::map<std::string, std::string> &net_params = net->net_cfg;
|
|
net->width = getParam(net_params, "width", 416);
|
|
net->height = getParam(net_params, "height", 416);
|
|
net->channels = getParam(net_params, "channels", 3);
|
|
CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0);
|
|
|
|
MatShape tensor_shape(3);
|
|
tensor_shape[0] = net->channels;
|
|
tensor_shape[1] = net->width;
|
|
tensor_shape[2] = net->height;
|
|
net->out_channels_vec.resize(net->layers_cfg.size());
|
|
|
|
layers_counter = -1;
|
|
|
|
setLayersParams setParams(net);
|
|
|
|
typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
|
|
for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
|
|
++layers_counter;
|
|
std::map<std::string, std::string> &layer_params = i->second;
|
|
std::string layer_type = layer_params["layer_type"];
|
|
|
|
if (layer_type == "convolutional")
|
|
{
|
|
int kernel_size = getParam<int>(layer_params, "size", -1);
|
|
int pad = getParam<int>(layer_params, "pad", 0);
|
|
int padding = getParam<int>(layer_params, "padding", 0);
|
|
int stride = getParam<int>(layer_params, "stride", 1);
|
|
int filters = getParam<int>(layer_params, "filters", -1);
|
|
int groups = getParam<int>(layer_params, "groups", 1);
|
|
bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
|
|
int flipped = getParam<int>(layer_params, "flipped", 0);
|
|
if (flipped == 1)
|
|
CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented");
|
|
|
|
if (pad)
|
|
padding = kernel_size / 2;
|
|
|
|
// Cannot divide 0
|
|
CV_Assert(stride > 0);
|
|
CV_Assert(kernel_size > 0 && filters > 0);
|
|
CV_Assert(tensor_shape[0] > 0);
|
|
CV_Assert(tensor_shape[0] % groups == 0);
|
|
|
|
setParams.setConvolution(kernel_size, padding, stride, filters, tensor_shape[0],
|
|
groups, batch_normalize);
|
|
|
|
tensor_shape[0] = filters;
|
|
tensor_shape[1] = (tensor_shape[1] - kernel_size + 2 * padding) / stride + 1;
|
|
tensor_shape[2] = (tensor_shape[2] - kernel_size + 2 * padding) / stride + 1;
|
|
}
|
|
else if (layer_type == "connected")
|
|
{
|
|
int output = getParam<int>(layer_params, "output", 1);
|
|
bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
|
|
|
|
CV_Assert(output > 0);
|
|
|
|
setParams.setFullyConnected(output, batch_normalize);
|
|
|
|
if(layers_counter && tensor_shape[1] > 1)
|
|
net->out_channels_vec[layers_counter-1] = total(tensor_shape);
|
|
|
|
tensor_shape[0] = output;
|
|
tensor_shape[1] = 1;
|
|
tensor_shape[2] = 1;
|
|
}
|
|
else if (layer_type == "maxpool")
|
|
{
|
|
int kernel_size = getParam<int>(layer_params, "size", 2);
|
|
int stride = getParam<int>(layer_params, "stride", 2);
|
|
int padding = getParam<int>(layer_params, "padding", kernel_size - 1);
|
|
// Cannot divide 0
|
|
CV_Assert(stride > 0);
|
|
|
|
setParams.setMaxpool(kernel_size, padding, stride);
|
|
|
|
tensor_shape[1] = (tensor_shape[1] - kernel_size + padding) / stride + 1;
|
|
tensor_shape[2] = (tensor_shape[2] - kernel_size + padding) / stride + 1;
|
|
}
|
|
else if (layer_type == "avgpool")
|
|
{
|
|
setParams.setAvgpool();
|
|
tensor_shape[1] = 1;
|
|
tensor_shape[2] = 1;
|
|
}
|
|
else if (layer_type == "softmax")
|
|
{
|
|
int groups = getParam<int>(layer_params, "groups", 1);
|
|
if (groups != 1)
|
|
CV_Error(Error::StsNotImplemented, "Softmax from Darknet with groups != 1");
|
|
setParams.setSoftmax();
|
|
}
|
|
else if (layer_type == "route")
|
|
{
|
|
std::string bottom_layers = getParam<std::string>(layer_params, "layers", "");
|
|
CV_Assert(!bottom_layers.empty());
|
|
int groups = getParam<int>(layer_params, "groups", 1);
|
|
std::vector<int> layers_vec = getNumbers<int>(bottom_layers);
|
|
|
|
tensor_shape[0] = 0;
|
|
for (size_t k = 0; k < layers_vec.size(); ++k) {
|
|
layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter);
|
|
tensor_shape[0] += net->out_channels_vec[layers_vec[k]];
|
|
}
|
|
|
|
if (groups > 1)
|
|
{
|
|
int group_id = getParam<int>(layer_params, "group_id", 0);
|
|
tensor_shape[0] /= groups;
|
|
int split_size = tensor_shape[0] / layers_vec.size();
|
|
for (size_t k = 0; k < layers_vec.size(); ++k)
|
|
setParams.setSlice(layers_vec[k], split_size, group_id);
|
|
|
|
if (layers_vec.size() > 1)
|
|
{
|
|
// layer ids in layers_vec - inputs of Slice layers
|
|
// after adding offset to layers_vec: layer ids - ouputs of Slice layers
|
|
for (size_t k = 0; k < layers_vec.size(); ++k)
|
|
layers_vec[k] += layers_vec.size();
|
|
|
|
setParams.setConcat(layers_vec.size(), layers_vec.data());
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (layers_vec.size() == 1)
|
|
setParams.setIdentity(layers_vec.at(0));
|
|
else
|
|
setParams.setConcat(layers_vec.size(), layers_vec.data());
|
|
}
|
|
}
|
|
else if (layer_type == "dropout" || layer_type == "cost")
|
|
{
|
|
setParams.setIdentity(layers_counter-1);
|
|
}
|
|
else if (layer_type == "reorg")
|
|
{
|
|
int stride = getParam<int>(layer_params, "stride", 2);
|
|
// Cannot divide 0
|
|
CV_Assert(stride > 0);
|
|
tensor_shape[0] = tensor_shape[0] * (stride * stride);
|
|
tensor_shape[1] = tensor_shape[1] / stride;
|
|
tensor_shape[2] = tensor_shape[2] / stride;
|
|
|
|
setParams.setReorg(stride);
|
|
}
|
|
else if (layer_type == "region")
|
|
{
|
|
float thresh = getParam<float>(layer_params, "thresh", 0.001);
|
|
int coords = getParam<int>(layer_params, "coords", 4);
|
|
int classes = getParam<int>(layer_params, "classes", -1);
|
|
int num_of_anchors = getParam<int>(layer_params, "num", -1);
|
|
int classfix = getParam<int>(layer_params, "classfix", 0);
|
|
bool softmax = (getParam<int>(layer_params, "softmax", 0) == 1);
|
|
bool softmax_tree = (getParam<std::string>(layer_params, "tree", "").size() > 0);
|
|
|
|
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
|
|
CV_Assert(!anchors_values.empty());
|
|
std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
|
|
|
|
CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
|
|
|
|
setParams.setPermute(false);
|
|
setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data());
|
|
}
|
|
else if (layer_type == "shortcut")
|
|
{
|
|
std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
|
|
float alpha = getParam<float>(layer_params, "alpha", 1);
|
|
float beta = getParam<float>(layer_params, "beta", 0);
|
|
if (beta != 0)
|
|
CV_Error(Error::StsNotImplemented, "Non-zero beta");
|
|
CV_Assert(!bottom_layer.empty());
|
|
int from = std::atoi(bottom_layer.c_str());
|
|
|
|
from = from < 0 ? from + layers_counter : from;
|
|
setParams.setShortcut(from, alpha);
|
|
}
|
|
else if (layer_type == "scale_channels")
|
|
{
|
|
std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
|
|
CV_Assert(!bottom_layer.empty());
|
|
int from = std::atoi(bottom_layer.c_str());
|
|
from = from < 0 ? from + layers_counter : from;
|
|
setParams.setScaleChannels(from);
|
|
}
|
|
else if (layer_type == "sam")
|
|
{
|
|
std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
|
|
CV_Assert(!bottom_layer.empty());
|
|
int from = std::atoi(bottom_layer.c_str());
|
|
from = from < 0 ? from + layers_counter : from;
|
|
setParams.setSAM(from);
|
|
}
|
|
else if (layer_type == "upsample")
|
|
{
|
|
int scaleFactor = getParam<int>(layer_params, "stride", 1);
|
|
setParams.setUpsample(scaleFactor);
|
|
tensor_shape[1] = tensor_shape[1] * scaleFactor;
|
|
tensor_shape[2] = tensor_shape[2] * scaleFactor;
|
|
}
|
|
else if (layer_type == "yolo")
|
|
{
|
|
int classes = getParam<int>(layer_params, "classes", -1);
|
|
int num_of_anchors = getParam<int>(layer_params, "num", -1);
|
|
float thresh = getParam<float>(layer_params, "thresh", 0.2);
|
|
float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.0);
|
|
float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0);
|
|
int new_coords = getParam<int>(layer_params, "new_coords", 0);
|
|
|
|
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
|
|
CV_Assert(!anchors_values.empty());
|
|
std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
|
|
|
|
std::string mask_values = getParam<std::string>(layer_params, "mask", std::string());
|
|
CV_Assert(!mask_values.empty());
|
|
std::vector<int> mask_vec = getNumbers<int>(mask_values);
|
|
|
|
CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
|
|
|
|
setParams.setPermute(false);
|
|
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y, new_coords);
|
|
}
|
|
else {
|
|
CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
|
|
}
|
|
|
|
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
|
|
if (activation != "linear")
|
|
setParams.setActivation(activation);
|
|
|
|
net->out_channels_vec[layers_counter] = tensor_shape[0];
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ReadDarknetFromWeightsStream(std::istream &ifile, NetParameter *net)
|
|
{
|
|
int32_t major_ver, minor_ver, revision;
|
|
ifile.read(reinterpret_cast<char *>(&major_ver), sizeof(int32_t));
|
|
ifile.read(reinterpret_cast<char *>(&minor_ver), sizeof(int32_t));
|
|
ifile.read(reinterpret_cast<char *>(&revision), sizeof(int32_t));
|
|
|
|
uint64_t seen;
|
|
if ((major_ver * 10 + minor_ver) >= 2) {
|
|
ifile.read(reinterpret_cast<char *>(&seen), sizeof(uint64_t));
|
|
}
|
|
else {
|
|
int32_t iseen = 0;
|
|
ifile.read(reinterpret_cast<char *>(&iseen), sizeof(int32_t));
|
|
seen = iseen;
|
|
}
|
|
bool transpose = (major_ver > 1000) || (minor_ver > 1000);
|
|
if(transpose)
|
|
CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented");
|
|
|
|
MatShape tensor_shape(3);
|
|
tensor_shape[0] = net->channels;
|
|
tensor_shape[1] = net->width;
|
|
tensor_shape[2] = net->height;
|
|
int cv_layers_counter = -1;
|
|
int darknet_layers_counter = -1;
|
|
|
|
setLayersParams setParams(net);
|
|
|
|
typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
|
|
for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
|
|
++darknet_layers_counter;
|
|
++cv_layers_counter;
|
|
std::map<std::string, std::string> &layer_params = i->second;
|
|
std::string layer_type = layer_params["layer_type"];
|
|
|
|
if (layer_type == "convolutional" || layer_type == "connected")
|
|
{
|
|
size_t weights_size;
|
|
int filters;
|
|
bool use_batch_normalize;
|
|
cv::Mat weightsBlob;
|
|
if(layer_type == "convolutional")
|
|
{
|
|
int kernel_size = getParam<int>(layer_params, "size", -1);
|
|
filters = getParam<int>(layer_params, "filters", -1);
|
|
int groups = getParam<int>(layer_params, "groups", 1);
|
|
use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
|
|
|
|
CV_Assert(kernel_size > 0 && filters > 0);
|
|
CV_Assert(tensor_shape[0] > 0);
|
|
CV_Assert(tensor_shape[0] % groups == 0);
|
|
|
|
weights_size = filters * (tensor_shape[0] / groups) * kernel_size * kernel_size;
|
|
int sizes_weights[] = { filters, tensor_shape[0] / groups, kernel_size, kernel_size };
|
|
weightsBlob.create(4, sizes_weights, CV_32F);
|
|
}
|
|
else
|
|
{
|
|
filters = getParam<int>(layer_params, "output", 1);
|
|
use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
|
|
|
|
CV_Assert(filters>0);
|
|
|
|
weights_size = total(tensor_shape) * filters;
|
|
int sizes_weights[] = { filters, total(tensor_shape) };
|
|
weightsBlob.create(2, sizes_weights, CV_32F);
|
|
}
|
|
CV_Assert(weightsBlob.isContinuous());
|
|
|
|
cv::Mat meanData_mat(1, filters, CV_32F); // mean
|
|
cv::Mat stdData_mat(1, filters, CV_32F); // variance
|
|
cv::Mat weightsData_mat(1, filters, CV_32F);// scale
|
|
cv::Mat biasData_mat(1, filters, CV_32F); // bias
|
|
|
|
ifile.read(reinterpret_cast<char *>(biasData_mat.ptr<float>()), sizeof(float)*filters);
|
|
if (use_batch_normalize) {
|
|
ifile.read(reinterpret_cast<char *>(weightsData_mat.ptr<float>()), sizeof(float)*filters);
|
|
ifile.read(reinterpret_cast<char *>(meanData_mat.ptr<float>()), sizeof(float)*filters);
|
|
ifile.read(reinterpret_cast<char *>(stdData_mat.ptr<float>()), sizeof(float)*filters);
|
|
}
|
|
ifile.read(reinterpret_cast<char *>(weightsBlob.ptr<float>()), sizeof(float)*weights_size);
|
|
|
|
// set conv/connected weights
|
|
std::vector<cv::Mat> layer_blobs;
|
|
layer_blobs.push_back(weightsBlob);
|
|
if (!use_batch_normalize) {
|
|
// use BIAS in any case
|
|
layer_blobs.push_back(biasData_mat);
|
|
}
|
|
setParams.setLayerBlobs(cv_layers_counter, layer_blobs);
|
|
|
|
// set batch normalize (mean, variance, scale, bias)
|
|
if (use_batch_normalize) {
|
|
++cv_layers_counter;
|
|
std::vector<cv::Mat> bn_blobs;
|
|
bn_blobs.push_back(meanData_mat);
|
|
bn_blobs.push_back(stdData_mat);
|
|
bn_blobs.push_back(weightsData_mat);
|
|
bn_blobs.push_back(biasData_mat);
|
|
setParams.setLayerBlobs(cv_layers_counter, bn_blobs);
|
|
}
|
|
}
|
|
if (layer_type == "region" || layer_type == "yolo")
|
|
{
|
|
++cv_layers_counter; // For permute.
|
|
}
|
|
|
|
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
|
|
if (activation != "linear")
|
|
++cv_layers_counter; // For ReLU, Swish, Mish, Sigmoid, etc
|
|
|
|
if(!darknet_layers_counter)
|
|
tensor_shape.resize(1);
|
|
|
|
tensor_shape[0] = net->out_channels_vec[darknet_layers_counter];
|
|
}
|
|
return true;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
void ReadNetParamsFromCfgStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
|
|
{
|
|
if (!darknet::ReadDarknetFromCfgStream(ifile, net)) {
|
|
CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
|
|
}
|
|
}
|
|
|
|
void ReadNetParamsFromBinaryStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
|
|
{
|
|
if (!darknet::ReadDarknetFromWeightsStream(ifile, net)) {
|
|
CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
|
|
}
|
|
}
|
|
}
|
|
}
|