From 83d70b0f36904e8906dc3f446fc093dac9e6a590 Mon Sep 17 00:00:00 2001 From: Chia-Hsiang Tsai <84863554+Tsai-chia-hsiang@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:40:00 +0800 Subject: [PATCH] Merge pull request #24396 from Tsai-chia-hsiang:yolov8cv Using cv2 dnn interface to run yolov8 model #24396 This is a sample code for using opencv dnn interface to run ultralytics yolov8 model for object detection. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [] There is a reference to the original bug report and related work - [] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [] The feature is well documented and sample code can be built with the project CMake --- samples/dnn/common.py | 4 ++++ samples/dnn/models.yml | 18 +++++++++++++++++ samples/dnn/object_detection.py | 36 ++++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/samples/dnn/common.py b/samples/dnn/common.py index db9283b5d8..4765506eac 100644 --- a/samples/dnn/common.py +++ b/samples/dnn/common.py @@ -79,6 +79,10 @@ def add_preproc_args(zoo, parser, sample): help='Indicate that model works with RGB input images instead BGR ones.') add_argument(zoo, parser, 'classes', help='Optional path to a text file with names of classes to label detected objects.') + add_argument(zoo, parser, 'postprocessing', type=str, + help='Post-processing kind depends on model topology.') + add_argument(zoo, parser, 'background_label_id', type=int, default=-1, + help='An index of background class in predictions. If not negative, exclude such class from list of classes.') def findFile(filename): diff --git a/samples/dnn/models.yml b/samples/dnn/models.yml index 53d8b8048f..4d2774c71e 100644 --- a/samples/dnn/models.yml +++ b/samples/dnn/models.yml @@ -33,6 +33,7 @@ yolov4: height: 416 rgb: true classes: "object_detection_classes_yolo.txt" + background_label_id: 0 sample: "object_detection" yolov4-tiny: @@ -47,6 +48,7 @@ yolov4-tiny: height: 416 rgb: true classes: "object_detection_classes_yolo.txt" + background_label_id: 0 sample: "object_detection" yolov3: @@ -61,6 +63,7 @@ yolov3: height: 416 rgb: true classes: "object_detection_classes_yolo.txt" + background_label_id: 0 sample: "object_detection" tiny-yolo-voc: @@ -75,6 +78,21 @@ tiny-yolo-voc: height: 416 rgb: true classes: "object_detection_classes_pascal_voc.txt" + background_label_id: 0 + sample: "object_detection" + +yolov8: + load_info: + url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n.onnx" + sha1: "68f864475d06e2ec4037181052739f268eeac38d" + model: "yolov8n.onnx" + mean: [0, 0, 0] + scale: 0.00392 + width: 640 + height: 640 + rgb: true + postprocessing: "yolov8" + classes: "object_detection_classes_yolo.txt" sample: "object_detection" # Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index 0ca5586159..875ed3929f 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -2,6 +2,7 @@ import cv2 as cv import argparse import numpy as np import sys +import copy import time from threading import Thread if sys.version_info[0] == 2: @@ -27,7 +28,7 @@ parser.add_argument('--out_tf_graph', default='graph.pbtxt', help='For models from TensorFlow Object Detection API, you may ' 'pass a .config file which was used for training through --config ' 'argument. This way an additional .pbtxt file with TensorFlow graph will be created.') -parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet', 'dldt'], +parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet', 'dldt', 'onnx'], help='Optional name of an origin framework of the model. ' 'Detect it automatically if it does not set.') parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') @@ -86,7 +87,7 @@ if args.classes: classes = f.read().rstrip('\n').split('\n') # Load a network -net = cv.dnn.readNet(cv.samples.findFile(args.model), cv.samples.findFile(args.config), args.framework) +net = cv.dnn.readNet(args.model, args.config, args.framework) net.setPreferableBackend(args.backend) net.setPreferableTarget(args.target) outNames = net.getUnconnectedOutLayersNames() @@ -145,20 +146,32 @@ def postprocess(frame, outs): classIds.append(int(detection[1]) - 1) # Skip background label confidences.append(float(confidence)) boxes.append([left, top, width, height]) - elif lastLayer.type == 'Region': + elif lastLayer.type == 'Region' or args.postprocessing == 'yolov8': # Network produces output blob with a shape NxC where N is a number of # detected objects and C is a number of classes + 4 where the first 4 # numbers are [center_x, center_y, width, height] + if args.postprocessing == 'yolov8': + box_scale_w = frameWidth / args.width + box_scale_h = frameHeight / args.height + else: + box_scale_w = frameWidth + box_scale_h = frameHeight + for out in outs: + if args.postprocessing == 'yolov8': + out = out[0].transpose(1, 0) + for detection in out: - scores = detection[5:] + scores = detection[4:] + if args.background_label_id >= 0: + scores = np.delete(scores, args.background_label_id) classId = np.argmax(scores) confidence = scores[classId] if confidence > confThreshold: - center_x = int(detection[0] * frameWidth) - center_y = int(detection[1] * frameHeight) - width = int(detection[2] * frameWidth) - height = int(detection[3] * frameHeight) + center_x = int(detection[0] * box_scale_w) + center_y = int(detection[1] * box_scale_h) + width = int(detection[2] * box_scale_w) + height = int(detection[3] * box_scale_h) left = int(center_x - width / 2) top = int(center_y - height / 2) classIds.append(classId) @@ -170,7 +183,7 @@ def postprocess(frame, outs): # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample # or NMS is required if number of outputs > 1 - if len(outNames) > 1 or lastLayer.type == 'Region' and args.backend != cv.dnn.DNN_BACKEND_OPENCV: + if len(outNames) > 1 or (lastLayer.type == 'Region' or args.postprocessing == 'yolov8') and args.backend != cv.dnn.DNN_BACKEND_OPENCV: indices = [] classIds = np.array(classIds) boxes = np.array(boxes) @@ -181,7 +194,6 @@ def postprocess(frame, outs): conf = confidences[class_indices] box = boxes[class_indices].tolist() nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold) - nms_indices = nms_indices[:, 0] if len(nms_indices) else [] indices.extend(class_indices[nms_indices]) else: indices = np.arange(0, len(classIds)) @@ -282,11 +294,11 @@ def processingThreadBody(): futureOutputs.append(net.forwardAsync()) else: outs = net.forward(outNames) - predictionsQueue.put(np.copy(outs)) + predictionsQueue.put(copy.deepcopy(outs)) while futureOutputs and futureOutputs[0].wait_for(0): out = futureOutputs[0].get() - predictionsQueue.put(np.copy([out])) + predictionsQueue.put(copy.deepcopy([out])) del futureOutputs[0]