mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 14:06:27 +08:00
Merge pull request #20671 from rogday:yolov4x-mish
Add support for YOLOv4x-mish * backport to 3.4 for supporting yolov4x-mish * add YOLOv4x-mish test * address review comments Co-authored-by: Guo Xu <guoxu@1school.com.cn>
This commit is contained in:
parent
6fa63dcc0c
commit
c410d7a97d
@ -470,7 +470,7 @@ namespace cv {
|
||||
fused_layer_names.push_back(last_layer);
|
||||
}
|
||||
|
||||
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold, float scale_x_y)
|
||||
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold, float scale_x_y, int new_coords)
|
||||
{
|
||||
cv::dnn::LayerParams region_param;
|
||||
region_param.name = "Region-name";
|
||||
@ -484,6 +484,7 @@ namespace cv {
|
||||
region_param.set<float>("thresh", thresh);
|
||||
region_param.set<float>("nms_threshold", nms_threshold);
|
||||
region_param.set<float>("scale_x_y", scale_x_y);
|
||||
region_param.set<int>("new_coords", new_coords);
|
||||
|
||||
std::vector<float> usedAnchors(numAnchors * 2);
|
||||
for (int i = 0; i < numAnchors; ++i)
|
||||
@ -882,6 +883,7 @@ namespace cv {
|
||||
float thresh = getParam<float>(layer_params, "thresh", 0.2);
|
||||
float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.0);
|
||||
float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0);
|
||||
int new_coords = getParam<int>(layer_params, "new_coords", 0);
|
||||
|
||||
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
|
||||
CV_Assert(!anchors_values.empty());
|
||||
@ -894,7 +896,7 @@ namespace cv {
|
||||
CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
|
||||
|
||||
setParams.setPermute(false);
|
||||
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y);
|
||||
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y, new_coords);
|
||||
}
|
||||
else {
|
||||
CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
|
||||
|
@ -64,6 +64,7 @@ class RegionLayerImpl CV_FINAL : public RegionLayer
|
||||
public:
|
||||
int coords, classes, anchors, classfix;
|
||||
float thresh, nmsThreshold, scale_x_y;
|
||||
int new_coords;
|
||||
bool useSoftmax, useLogistic;
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat blob_umat;
|
||||
@ -83,6 +84,7 @@ public:
|
||||
useLogistic = params.get<bool>("logistic", false);
|
||||
nmsThreshold = params.get<float>("nms_threshold", 0.4);
|
||||
scale_x_y = params.get<float>("scale_x_y", 1.0); // Yolov4
|
||||
new_coords = params.get<int>("new_coords", 0); // Yolov4x-mish
|
||||
|
||||
CV_Assert(nmsThreshold >= 0.);
|
||||
CV_Assert(coords == 4);
|
||||
@ -113,7 +115,7 @@ public:
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD;
|
||||
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD && new_coords == 0;
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
@ -259,26 +261,28 @@ public:
|
||||
const float *srcData = inpBlob.ptr<float>();
|
||||
float *dstData = outBlob.ptr<float>();
|
||||
|
||||
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
|
||||
int index = cell_size*i;
|
||||
float x = srcData[index + 4];
|
||||
dstData[index + 4] = logistic_activate(x); // logistic activation
|
||||
}
|
||||
|
||||
if (useSoftmax) { // Yolo v2
|
||||
if (new_coords == 0) {
|
||||
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
|
||||
int index = cell_size*i;
|
||||
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
|
||||
float x = srcData[index + 4];
|
||||
dstData[index + 4] = logistic_activate(x); // logistic activation
|
||||
}
|
||||
}
|
||||
else if (useLogistic) { // Yolo v3
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
|
||||
int index = cell_size*i;
|
||||
const float* input = srcData + index + 5;
|
||||
float* output = dstData + index + 5;
|
||||
for (int c = 0; c < classes; ++c)
|
||||
output[c] = logistic_activate(input[c]);
|
||||
|
||||
if (useSoftmax) { // Yolo v2
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
|
||||
int index = cell_size*i;
|
||||
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
|
||||
}
|
||||
}
|
||||
else if (useLogistic) { // Yolo v3
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
|
||||
int index = cell_size*i;
|
||||
const float* input = srcData + index + 5;
|
||||
float* output = dstData + index + 5;
|
||||
for (int c = 0; c < classes; ++c)
|
||||
output[c] = logistic_activate(input[c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int b = 0; b < batch_size; ++b)
|
||||
@ -290,20 +294,46 @@ public:
|
||||
int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
|
||||
int p_index = index_sample_offset + index * cell_size + 4;
|
||||
float scale = dstData[p_index];
|
||||
if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
|
||||
if (classfix == -1 && scale < .5)
|
||||
{
|
||||
scale = 0; // if(t0 < 0.5) t0 = 0;
|
||||
}
|
||||
int box_index = index_sample_offset + index * cell_size;
|
||||
|
||||
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
|
||||
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
|
||||
dstData[box_index + 0] = (x + x_tmp) / cols;
|
||||
dstData[box_index + 1] = (y + y_tmp) / rows;
|
||||
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
|
||||
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
|
||||
if (new_coords == 1) {
|
||||
float x_tmp = (srcData[box_index + 0] - 0.5f) * scale_x_y + 0.5f;
|
||||
float y_tmp = (srcData[box_index + 1] - 0.5f) * scale_x_y + 0.5f;
|
||||
dstData[box_index + 0] = (x + x_tmp) / cols;
|
||||
dstData[box_index + 1] = (y + y_tmp) / rows;
|
||||
dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm;
|
||||
dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm;
|
||||
|
||||
int class_index = index_sample_offset + index * cell_size + 5;
|
||||
for (int j = 0; j < classes; ++j) {
|
||||
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
|
||||
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
|
||||
scale = srcData[p_index];
|
||||
if (classfix == -1 && scale < thresh)
|
||||
{
|
||||
scale = 0; // if(t0 < 0.5) t0 = 0;
|
||||
}
|
||||
|
||||
int class_index = index_sample_offset + index * cell_size + 5;
|
||||
for (int j = 0; j < classes; ++j) {
|
||||
float prob = scale*srcData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
|
||||
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
|
||||
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
|
||||
dstData[box_index + 0] = (x + x_tmp) / cols;
|
||||
dstData[box_index + 1] = (y + y_tmp) / rows;
|
||||
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
|
||||
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
|
||||
|
||||
int class_index = index_sample_offset + index * cell_size + 5;
|
||||
for (int j = 0; j < classes; ++j) {
|
||||
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
|
||||
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nmsThreshold > 0) {
|
||||
|
@ -681,6 +681,78 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_P(Test_Darknet_nets, YOLOv4x_mish)
|
||||
{
|
||||
applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB));
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
|
||||
// batchId, classId, confidence, left, top, right, bottom
|
||||
const int N0 = 3;
|
||||
const int N1 = 5;
|
||||
static const float ref_[/* (N0 + N1) * 7 */] = {
|
||||
0, 16, 0.925536f, 0.17188f, 0.386832f, 0.406138f, 0.941696f,
|
||||
0, 1, 0.912028f, 0.162125f, 0.208863f, 0.741316f, 0.729332f,
|
||||
0, 7, 0.841018f, 0.608953f, 0.128653f, 0.900692f, 0.295657f,
|
||||
|
||||
1, 2, 0.925697f, 0.650438f, 0.458118f, 0.813927f, 0.661775f,
|
||||
1, 0, 0.882156f, 0.203644f, 0.365763f, 0.265473f, 0.632195f,
|
||||
1, 2, 0.848857f, 0.451044f, 0.462997f, 0.496629f, 0.522719f,
|
||||
1, 9, 0.736015f, 0.374503f, 0.316029f, 0.399358f, 0.392883f,
|
||||
1, 9, 0.727129f, 0.662469f, 0.373687f, 0.687877f, 0.441335f,
|
||||
};
|
||||
Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);
|
||||
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4;
|
||||
|
||||
std::string config_file = "yolov4x-mish.cfg";
|
||||
std::string weights_file = "yolov4x-mish.weights";
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD &&
|
||||
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
|
||||
{
|
||||
scoreDiff = 0.04;
|
||||
iouDiff = 0.2;
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
SCOPED_TRACE("batch size 1");
|
||||
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("batch size 2");
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
{
|
||||
if (target == DNN_TARGET_OPENCL)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000))
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
else if (target == DNN_TARGET_MYRIAD &&
|
||||
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
|
||||
}
|
||||
#endif
|
||||
|
||||
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user