mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Merge pull request #26486 from gursimarsingh:object_detection_engine_update
Code Fixes and changed post processing based on models.yml in Object Detection Sample #26486 ## Major Changes 1. Changes to add findModel support for config file in models like yolov4, yolov4-tiny, yolov3, ssd_caffe, tiny-yolo-voc, ssd_tf and faster_rcnn_tf. 2. Added new model and config download links for ssd_caffe, as previous links were not working. 3. Switched to DNN ENGINE_CLASSIC for non-cpu convig as new engine does not support it. 4. Fixes in python sample related to yolov5 usage. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
b476ed6d06
commit
efbe580ff3
@ -62,7 +62,12 @@ std::string genArgument(const std::string& argName, const std::string& help,
|
|||||||
FileNode value = node[argName];
|
FileNode value = node[argName];
|
||||||
if (argName.find("sha1") != std::string::npos) {
|
if (argName.find("sha1") != std::string::npos) {
|
||||||
std::string prefix = argName.substr(0, argName.find("sha1"));
|
std::string prefix = argName.substr(0, argName.find("sha1"));
|
||||||
value = node[prefix+"load_info"][argName];
|
if (prefix == "config_"){
|
||||||
|
value = node[prefix+"load_info"]["sha1"];
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
value = node[prefix+"load_info"][argName];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (argName.find("download_sha") != std::string::npos) {
|
if (argName.find("download_sha") != std::string::npos) {
|
||||||
std::string prefix = argName.substr(0, argName.find("download_sha"));
|
std::string prefix = argName.substr(0, argName.find("download_sha"));
|
||||||
@ -183,6 +188,8 @@ std::string genPreprocArguments(const std::string& modelName, const std::string&
|
|||||||
modelName, zooFile)+
|
modelName, zooFile)+
|
||||||
genArgument(prefix + "sha1", "Optional path to hashsum of downloaded model to be loaded from models.yml",
|
genArgument(prefix + "sha1", "Optional path to hashsum of downloaded model to be loaded from models.yml",
|
||||||
modelName, zooFile)+
|
modelName, zooFile)+
|
||||||
|
genArgument(prefix + "config_sha1", "Optional path to hashsum of downloaded config to be loaded from models.yml",
|
||||||
|
modelName, zooFile)+
|
||||||
genArgument(prefix + "download_sha", "Optional path to hashsum of downloaded model to be loaded from models.yml",
|
genArgument(prefix + "download_sha", "Optional path to hashsum of downloaded model to be loaded from models.yml",
|
||||||
modelName, zooFile);
|
modelName, zooFile);
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,10 @@ def add_argument(zoo, parser, name, help, required=False, default=None, type=Non
|
|||||||
if "sha1" in name:
|
if "sha1" in name:
|
||||||
prefix = name.replace("sha1", "")
|
prefix = name.replace("sha1", "")
|
||||||
value = node.getNode(prefix + "load_info")
|
value = node.getNode(prefix + "load_info")
|
||||||
value = value.getNode(name)
|
if prefix == "config_":
|
||||||
|
value = value.getNode("sha1")
|
||||||
|
else:
|
||||||
|
value = value.getNode(name)
|
||||||
if "download_sha" in name:
|
if "download_sha" in name:
|
||||||
prefix = name.replace("download_sha", "")
|
prefix = name.replace("download_sha", "")
|
||||||
value = node.getNode(prefix + "load_info")
|
value = node.getNode(prefix + "load_info")
|
||||||
@ -97,6 +100,8 @@ def add_preproc_args(zoo, parser, sample, alias=None, prefix=""):
|
|||||||
help='An index of background class in predictions. If not negative, exclude such class from list of classes.', alias=alias)
|
help='An index of background class in predictions. If not negative, exclude such class from list of classes.', alias=alias)
|
||||||
add_argument(zoo, parser, prefix+'sha1', type=str,
|
add_argument(zoo, parser, prefix+'sha1', type=str,
|
||||||
help='Optional path to hashsum of downloaded model to be loaded from models.yml', alias=alias)
|
help='Optional path to hashsum of downloaded model to be loaded from models.yml', alias=alias)
|
||||||
|
add_argument(zoo, parser, prefix+'config_sha1', type=str,
|
||||||
|
help='Optional path to hashsum of downloaded config to be loaded from models.yml', alias=alias)
|
||||||
add_argument(zoo, parser, prefix+'download_sha', type=str,
|
add_argument(zoo, parser, prefix+'download_sha', type=str,
|
||||||
help='Optional path to hashsum of downloaded model to be loaded from models.yml', alias=alias)
|
help='Optional path to hashsum of downloaded model to be loaded from models.yml', alias=alias)
|
||||||
|
|
||||||
|
@ -89,8 +89,8 @@ yolov5l:
|
|||||||
width: 640
|
width: 640
|
||||||
height: 640
|
height: 640
|
||||||
rgb: true
|
rgb: true
|
||||||
classes: "object_detection_classes_yolo.txt"
|
labels: "object_detection_classes_yolo.txt"
|
||||||
background_label_id: 0
|
postprocessing: "yolov5"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
# YOLO4 object detection family from Darknet (https://github.com/AlexeyAB/darknet)
|
# YOLO4 object detection family from Darknet (https://github.com/AlexeyAB/darknet)
|
||||||
@ -112,6 +112,7 @@ yolov4:
|
|||||||
rgb: true
|
rgb: true
|
||||||
labels: "object_detection_classes_yolo.txt"
|
labels: "object_detection_classes_yolo.txt"
|
||||||
background_label_id: 0
|
background_label_id: 0
|
||||||
|
postprocessing: "darknet"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
yolov4-tiny:
|
yolov4-tiny:
|
||||||
@ -130,6 +131,7 @@ yolov4-tiny:
|
|||||||
rgb: true
|
rgb: true
|
||||||
labels: "object_detection_classes_yolo.txt"
|
labels: "object_detection_classes_yolo.txt"
|
||||||
background_label_id: 0
|
background_label_id: 0
|
||||||
|
postprocessing: "darknet"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
yolov3:
|
yolov3:
|
||||||
@ -148,6 +150,7 @@ yolov3:
|
|||||||
rgb: true
|
rgb: true
|
||||||
labels: "object_detection_classes_yolo.txt"
|
labels: "object_detection_classes_yolo.txt"
|
||||||
background_label_id: 0
|
background_label_id: 0
|
||||||
|
postprocessing: "darknet"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
tiny-yolo-voc:
|
tiny-yolo-voc:
|
||||||
@ -166,14 +169,18 @@ tiny-yolo-voc:
|
|||||||
rgb: true
|
rgb: true
|
||||||
labels: "object_detection_classes_pascal_voc.txt"
|
labels: "object_detection_classes_pascal_voc.txt"
|
||||||
background_label_id: 0
|
background_label_id: 0
|
||||||
|
postprocessing: "darknet"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD
|
# Caffe implementation of SSD model from https://github.com/PINTO0309/MobileNet-SSD-RealSense
|
||||||
ssd_caffe:
|
ssd_caffe:
|
||||||
load_info:
|
load_info:
|
||||||
url: "https://drive.google.com/uc?export=download&id=0B3gersZ2cHIxRm5PMWRoTkdHdHc"
|
url: "https://github.com/PINTO0309/MobileNet-SSD-RealSense/raw/refs/heads/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel"
|
||||||
sha1: "994d30a8afaa9e754d17d2373b2d62a7dfbaaf7a"
|
sha1: "994d30a8afaa9e754d17d2373b2d62a7dfbaaf7a"
|
||||||
model: "MobileNetSSD_deploy.caffemodel"
|
model: "MobileNetSSD_deploy.caffemodel"
|
||||||
|
config_load_info:
|
||||||
|
url: "https://github.com/PINTO0309/MobileNet-SSD-RealSense/raw/refs/heads/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.prototxt"
|
||||||
|
sha1: "25c8404cecdef638c2bd9ac7f3b46a8b96897deb"
|
||||||
config: "MobileNetSSD_deploy.prototxt"
|
config: "MobileNetSSD_deploy.prototxt"
|
||||||
mean: [127.5, 127.5, 127.5]
|
mean: [127.5, 127.5, 127.5]
|
||||||
scale: 0.007843
|
scale: 0.007843
|
||||||
@ -181,6 +188,7 @@ ssd_caffe:
|
|||||||
height: 300
|
height: 300
|
||||||
rgb: false
|
rgb: false
|
||||||
labels: "object_detection_classes_pascal_voc.txt"
|
labels: "object_detection_classes_pascal_voc.txt"
|
||||||
|
postprocessing: "ssd"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
# TensorFlow implementation of SSD model from https://github.com/tensorflow/models/tree/master/research/object_detection
|
# TensorFlow implementation of SSD model from https://github.com/tensorflow/models/tree/master/research/object_detection
|
||||||
@ -202,6 +210,7 @@ ssd_tf:
|
|||||||
height: 300
|
height: 300
|
||||||
rgb: true
|
rgb: true
|
||||||
labels: "object_detection_classes_coco.txt"
|
labels: "object_detection_classes_coco.txt"
|
||||||
|
postprocessing: "ssd"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
# TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection
|
# TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection
|
||||||
@ -222,6 +231,7 @@ faster_rcnn_tf:
|
|||||||
width: 800
|
width: 800
|
||||||
height: 600
|
height: 600
|
||||||
rgb: true
|
rgb: true
|
||||||
|
postprocessing: "ssd"
|
||||||
sample: "object_detection"
|
sample: "object_detection"
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
@ -76,7 +76,7 @@ string modelName, framework;
|
|||||||
|
|
||||||
static void preprocess(const Mat& frame, Net& net, Size inpSize);
|
static void preprocess(const Mat& frame, Net& net, Size inpSize);
|
||||||
|
|
||||||
static void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string yolo_name);
|
static void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string postprocessing);
|
||||||
|
|
||||||
static void drawPred(vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, Mat& frame, FontFace& sans, int stdSize, int stdWeight, int stdImgSize, int stdThickness);
|
static void drawPred(vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, Mat& frame, FontFace& sans, int stdSize, int stdWeight, int stdImgSize, int stdThickness);
|
||||||
|
|
||||||
@ -91,7 +91,7 @@ static void yoloPostProcessing(
|
|||||||
vector<Rect2d>& keep_boxes,
|
vector<Rect2d>& keep_boxes,
|
||||||
float conf_threshold,
|
float conf_threshold,
|
||||||
float iou_threshold,
|
float iou_threshold,
|
||||||
const string& yolo_name);
|
const string& postprocessing);
|
||||||
|
|
||||||
static void printAliases(string& zooFile){
|
static void printAliases(string& zooFile){
|
||||||
vector<string> aliases = findAliases(zooFile, "object_detection");
|
vector<string> aliases = findAliases(zooFile, "object_detection");
|
||||||
@ -195,12 +195,13 @@ int main(int argc, char** argv)
|
|||||||
inpHeight = parser.get<int>("height");
|
inpHeight = parser.get<int>("height");
|
||||||
int async = parser.get<int>("async");
|
int async = parser.get<int>("async");
|
||||||
paddingValue = parser.get<float>("padvalue");
|
paddingValue = parser.get<float>("padvalue");
|
||||||
const string yolo_name = parser.get<String>("postprocessing");
|
const string postprocessing = parser.get<String>("postprocessing");
|
||||||
paddingMode = static_cast<ImagePaddingMode>(parser.get<int>("paddingmode"));
|
paddingMode = static_cast<ImagePaddingMode>(parser.get<int>("paddingmode"));
|
||||||
//![preprocess_params]
|
//![preprocess_params]
|
||||||
String sha1 = parser.get<String>("sha1");
|
String sha1 = parser.get<String>("sha1");
|
||||||
|
String config_sha1 = parser.get<String>("config_sha1");
|
||||||
const string modelPath = findModel(parser.get<String>("model"), sha1);
|
const string modelPath = findModel(parser.get<String>("model"), sha1);
|
||||||
const string configPath = findFile(parser.get<String>("config"));
|
const string configPath = findModel(parser.get<String>("config"), config_sha1);
|
||||||
framework = modelPath.substr(modelPath.rfind('.') + 1);
|
framework = modelPath.substr(modelPath.rfind('.') + 1);
|
||||||
|
|
||||||
if (parser.has("labels"))
|
if (parser.has("labels"))
|
||||||
@ -216,7 +217,11 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
//![read_net]
|
//![read_net]
|
||||||
Net net = readNet(modelPath, configPath);
|
EngineType engine = ENGINE_AUTO;
|
||||||
|
if ((parser.get<String>("backend") != "default") || (parser.get<String>("target") != "cpu")){
|
||||||
|
engine = ENGINE_CLASSIC;
|
||||||
|
}
|
||||||
|
Net net = readNet(modelPath, configPath, "", engine);
|
||||||
int backend = getBackendID(parser.get<String>("backend"));
|
int backend = getBackendID(parser.get<String>("backend"));
|
||||||
net.setPreferableBackend(backend);
|
net.setPreferableBackend(backend);
|
||||||
net.setPreferableTarget(getTargetID(parser.get<String>("target")));
|
net.setPreferableTarget(getTargetID(parser.get<String>("target")));
|
||||||
@ -230,7 +235,7 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
// Open a video file or an image file or a camera stream.
|
// Open a video file or an image file or a camera stream.
|
||||||
VideoCapture cap;
|
VideoCapture cap;
|
||||||
bool openSuccess = parser.has("input") ? cap.open(parser.get<String>("input")) : cap.open(parser.get<int>("device"));
|
bool openSuccess = parser.has("input") ? cap.open(findFile(parser.get<String>("input"))) : cap.open(parser.get<int>("device"));
|
||||||
if (!openSuccess){
|
if (!openSuccess){
|
||||||
cout << "Could not open input file or camera device" << endl;
|
cout << "Could not open input file or camera device" << endl;
|
||||||
return 0;
|
return 0;
|
||||||
@ -324,7 +329,7 @@ int main(int argc, char** argv)
|
|||||||
classIds.clear();
|
classIds.clear();
|
||||||
confidences.clear();
|
confidences.clear();
|
||||||
boxes.clear();
|
boxes.clear();
|
||||||
postprocess(frame, outs, net, backend, classIds, confidences, boxes, yolo_name);
|
postprocess(frame, outs, net, backend, classIds, confidences, boxes, postprocessing);
|
||||||
|
|
||||||
drawPred(classIds, confidences, boxes, frame, sans, stdSize, stdWeight, stdImgSize, stdThickness);
|
drawPred(classIds, confidences, boxes, frame, sans, stdSize, stdWeight, stdImgSize, stdThickness);
|
||||||
|
|
||||||
@ -354,7 +359,7 @@ int main(int argc, char** argv)
|
|||||||
CV_Error(Error::StsNotImplemented, "Asynchronous forward is supported only with Inference Engine backend.");
|
CV_Error(Error::StsNotImplemented, "Asynchronous forward is supported only with Inference Engine backend.");
|
||||||
// Threading is disabled, run synchronously
|
// Threading is disabled, run synchronously
|
||||||
Mat frame, blob;
|
Mat frame, blob;
|
||||||
while (waitKey(100) < 0) {
|
while (waitKey(1) < 0) {
|
||||||
cap >> frame;
|
cap >> frame;
|
||||||
if (frame.empty()) {
|
if (frame.empty()) {
|
||||||
waitKey();
|
waitKey();
|
||||||
@ -369,7 +374,7 @@ int main(int argc, char** argv)
|
|||||||
confidences.clear();
|
confidences.clear();
|
||||||
boxes.clear();
|
boxes.clear();
|
||||||
|
|
||||||
postprocess(frame, outs, net, backend, classIds, confidences, boxes, yolo_name);
|
postprocess(frame, outs, net, backend, classIds, confidences, boxes, postprocessing);
|
||||||
|
|
||||||
drawPred(classIds, confidences, boxes, frame, sans, stdSize, stdWeight, stdImgSize, stdThickness);
|
drawPred(classIds, confidences, boxes, frame, sans, stdSize, stdWeight, stdImgSize, stdThickness);
|
||||||
|
|
||||||
@ -379,7 +384,7 @@ int main(int argc, char** argv)
|
|||||||
int weight = static_cast<int>((stdWeight * imgWidth) / (stdImgSize * 1.5));
|
int weight = static_cast<int>((stdWeight * imgWidth) / (stdImgSize * 1.5));
|
||||||
double freq = getTickFrequency() / 1000;
|
double freq = getTickFrequency() / 1000;
|
||||||
double t = net.getPerfProfile(layersTimes) / freq;
|
double t = net.getPerfProfile(layersTimes) / freq;
|
||||||
string label = format("Inference time: %.2f ms", t);
|
string label = format("FPS: %.2f", 1000/t);
|
||||||
putText(frame, label, Point(0, size), Scalar(0, 255, 0), sans, size, weight);
|
putText(frame, label, Point(0, size), Scalar(0, 255, 0), sans, size, weight);
|
||||||
imshow(kWinName, frame);
|
imshow(kWinName, frame);
|
||||||
}
|
}
|
||||||
@ -414,15 +419,6 @@ void preprocess(const Mat& frame, Net& net, Size inpSize)
|
|||||||
|
|
||||||
// Set the blob as the network input
|
// Set the blob as the network input
|
||||||
net.setInput(inp);
|
net.setInput(inp);
|
||||||
|
|
||||||
// Check if the model is Faster-RCNN or R-FCN
|
|
||||||
if (net.getLayer(0)->outputNameToIndex("im_info") != -1)
|
|
||||||
{
|
|
||||||
// Resize the frame and prepare imInfo
|
|
||||||
resize(frame, frame, size);
|
|
||||||
Mat imInfo = (Mat_<float>(1, 3) << size.height, size.width, 1.6f);
|
|
||||||
net.setInput(imInfo, "im_info");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void yoloPostProcessing(
|
void yoloPostProcessing(
|
||||||
@ -432,7 +428,7 @@ void yoloPostProcessing(
|
|||||||
vector<Rect2d>& keep_boxes,
|
vector<Rect2d>& keep_boxes,
|
||||||
float conf_threshold,
|
float conf_threshold,
|
||||||
float iou_threshold,
|
float iou_threshold,
|
||||||
const string& yolo_name)
|
const string& postprocessing)
|
||||||
{
|
{
|
||||||
// Retrieve
|
// Retrieve
|
||||||
vector<int> classIds;
|
vector<int> classIds;
|
||||||
@ -441,12 +437,12 @@ void yoloPostProcessing(
|
|||||||
|
|
||||||
vector<Mat> outs_copy = outs;
|
vector<Mat> outs_copy = outs;
|
||||||
|
|
||||||
if (yolo_name == "yolov8")
|
if (postprocessing == "yolov8")
|
||||||
{
|
{
|
||||||
transposeND(outs_copy[0], {0, 2, 1}, outs_copy[0]);
|
transposeND(outs_copy[0], {0, 2, 1}, outs_copy[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (yolo_name == "yolonas")
|
if (postprocessing == "yolonas")
|
||||||
{
|
{
|
||||||
// outs contains 2 elements of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
|
// outs contains 2 elements of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
|
||||||
Mat concat_out;
|
Mat concat_out;
|
||||||
@ -467,16 +463,16 @@ void yoloPostProcessing(
|
|||||||
for (int i = 0; i < preds.rows; ++i)
|
for (int i = 0; i < preds.rows; ++i)
|
||||||
{
|
{
|
||||||
// filter out non-object
|
// filter out non-object
|
||||||
float obj_conf = (yolo_name == "yolov8" || yolo_name == "yolonas") ? 1.0f : preds.at<float>(i, 4);
|
float obj_conf = (postprocessing == "yolov8" || postprocessing == "yolonas") ? 1.0f : preds.at<float>(i, 4);
|
||||||
if (obj_conf < conf_threshold)
|
if (obj_conf < conf_threshold)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Mat scores = preds.row(i).colRange((yolo_name == "yolov8" || yolo_name == "yolonas") ? 4 : 5, preds.cols);
|
Mat scores = preds.row(i).colRange((postprocessing == "yolov8" || postprocessing == "yolonas") ? 4 : 5, preds.cols);
|
||||||
double conf;
|
double conf;
|
||||||
Point maxLoc;
|
Point maxLoc;
|
||||||
minMaxLoc(scores, 0, &conf, 0, &maxLoc);
|
minMaxLoc(scores, 0, &conf, 0, &maxLoc);
|
||||||
|
|
||||||
conf = (yolo_name == "yolov8" || yolo_name == "yolonas") ? conf : conf * obj_conf;
|
conf = (postprocessing == "yolov8" || postprocessing == "yolonas") ? conf : conf * obj_conf;
|
||||||
if (conf < conf_threshold)
|
if (conf < conf_threshold)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -488,7 +484,7 @@ void yoloPostProcessing(
|
|||||||
double h = det[3];
|
double h = det[3];
|
||||||
|
|
||||||
// [x1, y1, x2, y2]
|
// [x1, y1, x2, y2]
|
||||||
if (yolo_name == "yolonas") {
|
if (postprocessing == "yolonas") {
|
||||||
boxes.push_back(Rect2d(cx, cy, w, h));
|
boxes.push_back(Rect2d(cx, cy, w, h));
|
||||||
} else {
|
} else {
|
||||||
boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
|
boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
|
||||||
@ -511,12 +507,10 @@ void yoloPostProcessing(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string yolo_name)
|
void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string postprocessing)
|
||||||
{
|
{
|
||||||
static vector<int> outLayers = net.getUnconnectedOutLayers();
|
static vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||||
static string outLayerType = net.getLayer(outLayers[0])->type;
|
if (postprocessing == "ssd")
|
||||||
|
|
||||||
if (outLayerType == "DetectionOutput")
|
|
||||||
{
|
{
|
||||||
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||||
// detections and an every detection is a vector of values
|
// detections and an every detection is a vector of values
|
||||||
@ -552,7 +546,7 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (outLayerType == "Region")
|
else if (postprocessing == "darknet")
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < outs.size(); ++i)
|
for (size_t i = 0; i < outs.size(); ++i)
|
||||||
{
|
{
|
||||||
@ -582,7 +576,7 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (outLayerType == "Identity")
|
else if (postprocessing == "yolov8" || postprocessing == "yolov5")
|
||||||
{
|
{
|
||||||
//![forward_buffers]
|
//![forward_buffers]
|
||||||
vector<int> keep_classIds;
|
vector<int> keep_classIds;
|
||||||
@ -591,7 +585,7 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
|
|||||||
//![forward_buffers]
|
//![forward_buffers]
|
||||||
|
|
||||||
//![postprocess]
|
//![postprocess]
|
||||||
yoloPostProcessing(outs, keep_classIds, keep_confidences, keep_boxes, confThreshold, nmsThreshold, yolo_name);
|
yoloPostProcessing(outs, keep_classIds, keep_confidences, keep_boxes, confThreshold, nmsThreshold, postprocessing);
|
||||||
//![postprocess]
|
//![postprocess]
|
||||||
|
|
||||||
for (size_t i = 0; i < keep_classIds.size(); ++i)
|
for (size_t i = 0; i < keep_classIds.size(); ++i)
|
||||||
@ -614,12 +608,13 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
|
cout<< ("Unknown postprocessing method: " + postprocessing)<<endl;
|
||||||
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// NMS is used inside Region layer only on DNN_BACKEND_OPENCV for other backends we need NMS in sample
|
// NMS is used inside Region layer only on DNN_BACKEND_OPENCV for other backends we need NMS in sample
|
||||||
// or NMS is required if the number of outputs > 1
|
// or NMS is required if the number of outputs > 1
|
||||||
if (outLayers.size() > 1 || (outLayerType == "Region" && backend != DNN_BACKEND_OPENCV))
|
if (outLayers.size() > 1 || (postprocessing == "darknet" && backend != DNN_BACKEND_OPENCV))
|
||||||
{
|
{
|
||||||
map<int, vector<size_t> > class2indices;
|
map<int, vector<size_t> > class2indices;
|
||||||
for (size_t i = 0; i < classIds.size(); i++)
|
for (size_t i = 0; i < classIds.size(); i++)
|
||||||
|
@ -37,9 +37,6 @@ parser.add_argument('--out_tf_graph', default='graph.pbtxt',
|
|||||||
help='For models from TensorFlow Object Detection API, you may '
|
help='For models from TensorFlow Object Detection API, you may '
|
||||||
'pass a .config file which was used for training through --config '
|
'pass a .config file which was used for training through --config '
|
||||||
'argument. This way an additional .pbtxt file with TensorFlow graph will be created.')
|
'argument. This way an additional .pbtxt file with TensorFlow graph will be created.')
|
||||||
parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'darknet', 'dldt', 'onnx'],
|
|
||||||
help='Optional name of an origin framework of the model. '
|
|
||||||
'Detect it automatically if it does not set.')
|
|
||||||
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
|
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
|
||||||
parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
|
parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
|
||||||
parser.add_argument('--backend', default="default", type=str, choices=backends,
|
parser.add_argument('--backend', default="default", type=str, choices=backends,
|
||||||
@ -76,8 +73,9 @@ if args.alias is None or hasattr(args, 'help'):
|
|||||||
|
|
||||||
args.model = findModel(args.model, args.sha1)
|
args.model = findModel(args.model, args.sha1)
|
||||||
if args.config is not None:
|
if args.config is not None:
|
||||||
args.config = findFile(args.config)
|
args.config = findModel(args.config, args.config_sha1)
|
||||||
args.labels = findFile(args.labels)
|
if args.labels is not None:
|
||||||
|
args.labels = findFile(args.labels)
|
||||||
|
|
||||||
# If config specified, try to load it as TensorFlow Object Detection API's pipeline.
|
# If config specified, try to load it as TensorFlow Object Detection API's pipeline.
|
||||||
config = readTextMessage(args.config)
|
config = readTextMessage(args.config)
|
||||||
@ -100,7 +98,10 @@ if args.labels:
|
|||||||
labels = f.read().rstrip('\n').split('\n')
|
labels = f.read().rstrip('\n').split('\n')
|
||||||
|
|
||||||
# Load a network
|
# Load a network
|
||||||
net = cv.dnn.readNet(args.model, args.config, args.framework)
|
engine = cv.dnn.ENGINE_AUTO
|
||||||
|
if args.backend != "default" or args.target != "cpu":
|
||||||
|
engine = cv.dnn.ENGINE_CLASSIC
|
||||||
|
net = cv.dnn.readNet(args.model, args.config, "", engine)
|
||||||
net.setPreferableBackend(get_backend_id(args.backend))
|
net.setPreferableBackend(get_backend_id(args.backend))
|
||||||
net.setPreferableTarget(get_target_id(args.target))
|
net.setPreferableTarget(get_target_id(args.target))
|
||||||
outNames = net.getUnconnectedOutLayersNames()
|
outNames = net.getUnconnectedOutLayersNames()
|
||||||
@ -126,14 +127,10 @@ def postprocess(frame, outs):
|
|||||||
frameHeight = frame.shape[0]
|
frameHeight = frame.shape[0]
|
||||||
frameWidth = frame.shape[1]
|
frameWidth = frame.shape[1]
|
||||||
|
|
||||||
layerNames = net.getLayerNames()
|
|
||||||
lastLayerId = net.getLayerId(layerNames[-1])
|
|
||||||
lastLayer = net.getLayer(lastLayerId)
|
|
||||||
|
|
||||||
classIds = []
|
classIds = []
|
||||||
confidences = []
|
confidences = []
|
||||||
boxes = []
|
boxes = []
|
||||||
if lastLayer.type == 'DetectionOutput':
|
if args.postprocessing == 'ssd':
|
||||||
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||||
# detections and an every detection is a vector of values
|
# detections and an every detection is a vector of values
|
||||||
# [batchId, classId, confidence, left, top, right, bottom]
|
# [batchId, classId, confidence, left, top, right, bottom]
|
||||||
@ -157,21 +154,12 @@ def postprocess(frame, outs):
|
|||||||
classIds.append(int(detection[1]) - 1) # Skip background label
|
classIds.append(int(detection[1]) - 1) # Skip background label
|
||||||
confidences.append(float(confidence))
|
confidences.append(float(confidence))
|
||||||
boxes.append([left, top, width, height])
|
boxes.append([left, top, width, height])
|
||||||
elif lastLayer.type == 'Region' or args.postprocessing == 'yolov8':
|
|
||||||
# Network produces output blob with a shape NxC where N is a number of
|
elif args.postprocessing == 'darknet':
|
||||||
# detected objects and C is a number of classes + 4 where the first 4
|
box_scale_w = frameWidth
|
||||||
# numbers are [center_x, center_y, width, height]
|
box_scale_h = frameHeight
|
||||||
if args.postprocessing == 'yolov8':
|
|
||||||
box_scale_w = frameWidth / args.width
|
|
||||||
box_scale_h = frameHeight / args.height
|
|
||||||
else:
|
|
||||||
box_scale_w = frameWidth
|
|
||||||
box_scale_h = frameHeight
|
|
||||||
|
|
||||||
for out in outs:
|
for out in outs:
|
||||||
if args.postprocessing == 'yolov8':
|
|
||||||
out = out[0].transpose(1, 0)
|
|
||||||
|
|
||||||
for detection in out:
|
for detection in out:
|
||||||
scores = detection[4:]
|
scores = detection[4:]
|
||||||
if args.background_label_id >= 0:
|
if args.background_label_id >= 0:
|
||||||
@ -188,13 +176,47 @@ def postprocess(frame, outs):
|
|||||||
classIds.append(classId)
|
classIds.append(classId)
|
||||||
confidences.append(float(confidence))
|
confidences.append(float(confidence))
|
||||||
boxes.append([left, top, width, height])
|
boxes.append([left, top, width, height])
|
||||||
|
|
||||||
|
elif args.postprocessing == 'yolov8' or args.postprocessing == 'yolov5':
|
||||||
|
# Network produces output blob with a shape NxC where N is a number of
|
||||||
|
# detected objects and C is a number of classes + 4 where the first 4
|
||||||
|
# numbers are [center_x, center_y, width, height]
|
||||||
|
box_scale_w = frameWidth / args.width
|
||||||
|
box_scale_h = frameHeight / args.height
|
||||||
|
|
||||||
|
for out in outs:
|
||||||
|
if args.postprocessing == 'yolov8':
|
||||||
|
out = out[0].transpose(1, 0)
|
||||||
|
else: # YOLOv5, no transposition needed
|
||||||
|
out = out[0]
|
||||||
|
|
||||||
|
for detection in out:
|
||||||
|
if args.postprocessing == 'yolov8':
|
||||||
|
scores = detection[4:]
|
||||||
|
obj_conf = 1
|
||||||
|
else:
|
||||||
|
scores = detection[5:]
|
||||||
|
obj_conf = detection[4]
|
||||||
|
|
||||||
|
classId = np.argmax(scores)
|
||||||
|
confidence = scores[classId]*obj_conf
|
||||||
|
if confidence > confThreshold:
|
||||||
|
center_x = int(detection[0] * box_scale_w)
|
||||||
|
center_y = int(detection[1] * box_scale_h)
|
||||||
|
width = int(detection[2] * box_scale_w)
|
||||||
|
height = int(detection[3] * box_scale_h)
|
||||||
|
left = int(center_x - width / 2)
|
||||||
|
top = int(center_y - height / 2)
|
||||||
|
classIds.append(classId)
|
||||||
|
confidences.append(float(confidence))
|
||||||
|
boxes.append([left, top, width, height])
|
||||||
else:
|
else:
|
||||||
print('Unknown output layer type: ' + lastLayer.type)
|
print('Unknown postprocessing method: ' + args.postprocessing)
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
# NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
|
# NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
|
||||||
# or NMS is required if number of outputs > 1
|
# or NMS is required if number of outputs > 1
|
||||||
if len(outNames) > 1 or (lastLayer.type == 'Region' or args.postprocessing == 'yolov8') and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
|
if len(outNames) > 1 or (args.postprocessing == 'darknet' or args.postprocessing == 'yolov8' or args.postprocessing == 'yolov5') and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
|
||||||
indices = []
|
indices = []
|
||||||
classIds = np.array(classIds)
|
classIds = np.array(classIds)
|
||||||
boxes = np.array(boxes)
|
boxes = np.array(boxes)
|
||||||
@ -308,14 +330,11 @@ def processingThreadBody():
|
|||||||
# Create a 4D blob from a frame.
|
# Create a 4D blob from a frame.
|
||||||
inpWidth = args.width if args.width else frameWidth
|
inpWidth = args.width if args.width else frameWidth
|
||||||
inpHeight = args.height if args.height else frameHeight
|
inpHeight = args.height if args.height else frameHeight
|
||||||
blob = cv.dnn.blobFromImage(frame, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
|
blob = cv.dnn.blobFromImage(frame, scalefactor=args.scale, mean=args.mean, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
|
||||||
processedFramesQueue.put(frame)
|
processedFramesQueue.put(frame)
|
||||||
|
|
||||||
# Run a model
|
# Run a model
|
||||||
net.setInput(blob, scalefactor=args.scale, mean=args.mean)
|
net.setInput(blob)
|
||||||
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
|
|
||||||
frame = cv.resize(frame, (inpWidth, inpHeight))
|
|
||||||
net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info')
|
|
||||||
|
|
||||||
if asyncN:
|
if asyncN:
|
||||||
futureOutputs.append(net.forwardAsync())
|
futureOutputs.append(net.forwardAsync())
|
||||||
@ -385,9 +404,9 @@ else:
|
|||||||
|
|
||||||
inpWidth = args.width if args.width else frameWidth
|
inpWidth = args.width if args.width else frameWidth
|
||||||
inpHeight = args.height if args.height else frameHeight
|
inpHeight = args.height if args.height else frameHeight
|
||||||
blob = cv.dnn.blobFromImage(frame, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
|
blob = cv.dnn.blobFromImage(frame, scalefactor=args.scale, mean=args.mean, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
|
||||||
|
|
||||||
net.setInput(blob, scalefactor=args.scale, mean=args.mean)
|
net.setInput(blob)
|
||||||
outs = net.forward(outNames)
|
outs = net.forward(outNames)
|
||||||
|
|
||||||
boxes, classIds, confidences, indices = postprocess(frame, outs)
|
boxes, classIds, confidences, indices = postprocess(frame, outs)
|
||||||
|
Loading…
Reference in New Issue
Block a user