From efbe580ff32163eaa4e2d76b245b9ea9e96e9038 Mon Sep 17 00:00:00 2001
From: Gursimar Singh <gursimar@bigvisionllc.com>
Date: Mon, 2 Dec 2024 11:35:25 +0530
Subject: [PATCH] Merge pull request #26486 from
 gursimarsingh:object_detection_engine_update

Code Fixes and changed post processing based on models.yml in Object Detection Sample #26486

## Major Changes

1. Changes to add findModel support for config file in models like yolov4, yolov4-tiny, yolov3, ssd_caffe, tiny-yolo-voc, ssd_tf and faster_rcnn_tf.
2. Added new model and config download links for ssd_caffe, as previous links were not working.
3. Switched to DNN ENGINE_CLASSIC for non-cpu convig as new engine does not support it.
4. Fixes in python sample related to yolov5 usage.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 samples/dnn/common.hpp           |  9 +++-
 samples/dnn/common.py            |  7 ++-
 samples/dnn/models.yml           | 18 +++++--
 samples/dnn/object_detection.cpp | 65 +++++++++++-------------
 samples/dnn/object_detection.py  | 85 +++++++++++++++++++-------------
 5 files changed, 110 insertions(+), 74 deletions(-)

diff --git a/samples/dnn/common.hpp b/samples/dnn/common.hpp
index a01b1ca40f..4e07a74778 100644
--- a/samples/dnn/common.hpp
+++ b/samples/dnn/common.hpp
@@ -62,7 +62,12 @@ std::string genArgument(const std::string& argName, const std::string& help,
                 FileNode value = node[argName];
                 if (argName.find("sha1") != std::string::npos) {
                     std::string prefix = argName.substr(0, argName.find("sha1"));
-                    value = node[prefix+"load_info"][argName];
+                    if (prefix == "config_"){
+                        value = node[prefix+"load_info"]["sha1"];
+                    }
+                    else{
+                        value = node[prefix+"load_info"][argName];
+                    }
                 }
                 if (argName.find("download_sha") != std::string::npos) {
                     std::string prefix = argName.substr(0, argName.find("download_sha"));
@@ -183,6 +188,8 @@ std::string genPreprocArguments(const std::string& modelName, const std::string&
                        modelName, zooFile)+
            genArgument(prefix + "sha1", "Optional path to hashsum of downloaded model to be loaded from models.yml",
                        modelName, zooFile)+
+           genArgument(prefix + "config_sha1", "Optional path to hashsum of downloaded config to be loaded from models.yml",
+                       modelName, zooFile)+
            genArgument(prefix + "download_sha", "Optional path to hashsum of downloaded model to be loaded from models.yml",
                        modelName, zooFile);
 }
diff --git a/samples/dnn/common.py b/samples/dnn/common.py
index 4c5cefc8fc..5e211a30ca 100644
--- a/samples/dnn/common.py
+++ b/samples/dnn/common.py
@@ -19,7 +19,10 @@ def add_argument(zoo, parser, name, help, required=False, default=None, type=Non
             if "sha1" in name:
                 prefix = name.replace("sha1", "")
                 value = node.getNode(prefix + "load_info")
-                value = value.getNode(name)
+                if prefix == "config_":
+                    value = value.getNode("sha1")
+                else:
+                    value = value.getNode(name)
             if "download_sha" in name:
                 prefix = name.replace("download_sha", "")
                 value = node.getNode(prefix + "load_info")
@@ -97,6 +100,8 @@ def add_preproc_args(zoo, parser, sample, alias=None, prefix=""):
                  help='An index of background class in predictions. If not negative, exclude such class from list of classes.', alias=alias)
     add_argument(zoo, parser, prefix+'sha1', type=str,
                  help='Optional path to hashsum of downloaded model to be loaded from models.yml', alias=alias)
+    add_argument(zoo, parser, prefix+'config_sha1', type=str,
+                 help='Optional path to hashsum of downloaded config to be loaded from models.yml', alias=alias)
     add_argument(zoo, parser, prefix+'download_sha', type=str,
                  help='Optional path to hashsum of downloaded model to be loaded from models.yml', alias=alias)
 
diff --git a/samples/dnn/models.yml b/samples/dnn/models.yml
index 41fe7862b8..c7325d7cb3 100644
--- a/samples/dnn/models.yml
+++ b/samples/dnn/models.yml
@@ -89,8 +89,8 @@ yolov5l:
   width: 640
   height: 640
   rgb: true
-  classes: "object_detection_classes_yolo.txt"
-  background_label_id: 0
+  labels: "object_detection_classes_yolo.txt"
+  postprocessing: "yolov5"
   sample: "object_detection"
 
 # YOLO4 object detection family from Darknet (https://github.com/AlexeyAB/darknet)
@@ -112,6 +112,7 @@ yolov4:
   rgb: true
   labels: "object_detection_classes_yolo.txt"
   background_label_id: 0
+  postprocessing: "darknet"
   sample: "object_detection"
 
 yolov4-tiny:
@@ -130,6 +131,7 @@ yolov4-tiny:
   rgb: true
   labels: "object_detection_classes_yolo.txt"
   background_label_id: 0
+  postprocessing: "darknet"
   sample: "object_detection"
 
 yolov3:
@@ -148,6 +150,7 @@ yolov3:
   rgb: true
   labels: "object_detection_classes_yolo.txt"
   background_label_id: 0
+  postprocessing: "darknet"
   sample: "object_detection"
 
 tiny-yolo-voc:
@@ -166,14 +169,18 @@ tiny-yolo-voc:
   rgb: true
   labels: "object_detection_classes_pascal_voc.txt"
   background_label_id: 0
+  postprocessing: "darknet"
   sample: "object_detection"
 
-# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD
+# Caffe implementation of SSD model from https://github.com/PINTO0309/MobileNet-SSD-RealSense
 ssd_caffe:
   load_info:
-    url: "https://drive.google.com/uc?export=download&id=0B3gersZ2cHIxRm5PMWRoTkdHdHc"
+    url: "https://github.com/PINTO0309/MobileNet-SSD-RealSense/raw/refs/heads/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel"
     sha1: "994d30a8afaa9e754d17d2373b2d62a7dfbaaf7a"
   model: "MobileNetSSD_deploy.caffemodel"
+  config_load_info:
+    url: "https://github.com/PINTO0309/MobileNet-SSD-RealSense/raw/refs/heads/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.prototxt"
+    sha1: "25c8404cecdef638c2bd9ac7f3b46a8b96897deb"
   config: "MobileNetSSD_deploy.prototxt"
   mean: [127.5, 127.5, 127.5]
   scale: 0.007843
@@ -181,6 +188,7 @@ ssd_caffe:
   height: 300
   rgb: false
   labels: "object_detection_classes_pascal_voc.txt"
+  postprocessing: "ssd"
   sample: "object_detection"
 
 # TensorFlow implementation of SSD model from https://github.com/tensorflow/models/tree/master/research/object_detection
@@ -202,6 +210,7 @@ ssd_tf:
   height: 300
   rgb: true
   labels: "object_detection_classes_coco.txt"
+  postprocessing: "ssd"
   sample: "object_detection"
 
 # TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection
@@ -222,6 +231,7 @@ faster_rcnn_tf:
   width: 800
   height: 600
   rgb: true
+  postprocessing: "ssd"
   sample: "object_detection"
 
 ################################################################################
diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp
index abbeada29a..d2be2f1eff 100644
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@@ -76,7 +76,7 @@ string modelName, framework;
 
 static void preprocess(const Mat& frame, Net& net, Size inpSize);
 
-static void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string yolo_name);
+static void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string postprocessing);
 
 static void drawPred(vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, Mat& frame, FontFace& sans, int stdSize, int stdWeight, int stdImgSize, int stdThickness);
 
@@ -91,7 +91,7 @@ static void yoloPostProcessing(
     vector<Rect2d>& keep_boxes,
     float conf_threshold,
     float iou_threshold,
-    const string& yolo_name);
+    const string& postprocessing);
 
 static void printAliases(string& zooFile){
     vector<string> aliases = findAliases(zooFile, "object_detection");
@@ -195,12 +195,13 @@ int main(int argc, char** argv)
     inpHeight = parser.get<int>("height");
     int async = parser.get<int>("async");
     paddingValue = parser.get<float>("padvalue");
-    const string yolo_name = parser.get<String>("postprocessing");
+    const string postprocessing = parser.get<String>("postprocessing");
     paddingMode = static_cast<ImagePaddingMode>(parser.get<int>("paddingmode"));
     //![preprocess_params]
     String sha1 = parser.get<String>("sha1");
+    String config_sha1 = parser.get<String>("config_sha1");
     const string modelPath = findModel(parser.get<String>("model"), sha1);
-    const string configPath = findFile(parser.get<String>("config"));
+    const string configPath = findModel(parser.get<String>("config"), config_sha1);
     framework = modelPath.substr(modelPath.rfind('.') + 1);
 
     if (parser.has("labels"))
@@ -216,7 +217,11 @@ int main(int argc, char** argv)
         }
     }
     //![read_net]
-    Net net = readNet(modelPath, configPath);
+    EngineType engine = ENGINE_AUTO;
+    if ((parser.get<String>("backend") != "default") || (parser.get<String>("target") != "cpu")){
+        engine = ENGINE_CLASSIC;
+    }
+    Net net = readNet(modelPath, configPath, "", engine);
     int backend = getBackendID(parser.get<String>("backend"));
     net.setPreferableBackend(backend);
     net.setPreferableTarget(getTargetID(parser.get<String>("target")));
@@ -230,7 +235,7 @@ int main(int argc, char** argv)
 
     // Open a video file or an image file or a camera stream.
     VideoCapture cap;
-    bool openSuccess = parser.has("input") ? cap.open(parser.get<String>("input")) : cap.open(parser.get<int>("device"));
+    bool openSuccess = parser.has("input") ? cap.open(findFile(parser.get<String>("input"))) : cap.open(parser.get<int>("device"));
     if (!openSuccess){
         cout << "Could not open input file or camera device" << endl;
         return 0;
@@ -324,7 +329,7 @@ int main(int argc, char** argv)
             classIds.clear();
             confidences.clear();
             boxes.clear();
-            postprocess(frame, outs, net, backend, classIds, confidences, boxes, yolo_name);
+            postprocess(frame, outs, net, backend, classIds, confidences, boxes, postprocessing);
 
             drawPred(classIds, confidences, boxes, frame, sans, stdSize, stdWeight, stdImgSize, stdThickness);
 
@@ -354,7 +359,7 @@ int main(int argc, char** argv)
             CV_Error(Error::StsNotImplemented, "Asynchronous forward is supported only with Inference Engine backend.");
         // Threading is disabled, run synchronously
         Mat frame, blob;
-        while (waitKey(100) < 0) {
+        while (waitKey(1) < 0) {
             cap >> frame;
             if (frame.empty()) {
                 waitKey();
@@ -369,7 +374,7 @@ int main(int argc, char** argv)
             confidences.clear();
             boxes.clear();
 
-            postprocess(frame, outs, net, backend, classIds, confidences, boxes, yolo_name);
+            postprocess(frame, outs, net, backend, classIds, confidences, boxes, postprocessing);
 
             drawPred(classIds, confidences, boxes, frame, sans, stdSize, stdWeight, stdImgSize, stdThickness);
 
@@ -379,7 +384,7 @@ int main(int argc, char** argv)
             int weight = static_cast<int>((stdWeight * imgWidth) / (stdImgSize * 1.5));
             double freq = getTickFrequency() / 1000;
             double t = net.getPerfProfile(layersTimes) / freq;
-            string label = format("Inference time: %.2f ms", t);
+            string label = format("FPS: %.2f", 1000/t);
             putText(frame, label, Point(0, size), Scalar(0, 255, 0), sans, size, weight);
             imshow(kWinName, frame);
         }
@@ -414,15 +419,6 @@ void preprocess(const Mat& frame, Net& net, Size inpSize)
 
     // Set the blob as the network input
     net.setInput(inp);
-
-    // Check if the model is Faster-RCNN or R-FCN
-    if (net.getLayer(0)->outputNameToIndex("im_info") != -1)
-    {
-        // Resize the frame and prepare imInfo
-        resize(frame, frame, size);
-        Mat imInfo = (Mat_<float>(1, 3) << size.height, size.width, 1.6f);
-        net.setInput(imInfo, "im_info");
-    }
 }
 
 void yoloPostProcessing(
@@ -432,7 +428,7 @@ void yoloPostProcessing(
     vector<Rect2d>& keep_boxes,
     float conf_threshold,
     float iou_threshold,
-    const string& yolo_name)
+    const string& postprocessing)
 {
     // Retrieve
     vector<int> classIds;
@@ -441,12 +437,12 @@ void yoloPostProcessing(
 
     vector<Mat> outs_copy = outs;
 
-    if (yolo_name == "yolov8")
+    if (postprocessing == "yolov8")
     {
         transposeND(outs_copy[0], {0, 2, 1}, outs_copy[0]);
     }
 
-    if (yolo_name == "yolonas")
+    if (postprocessing == "yolonas")
     {
         // outs contains 2 elements of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
         Mat concat_out;
@@ -467,16 +463,16 @@ void yoloPostProcessing(
         for (int i = 0; i < preds.rows; ++i)
         {
             // filter out non-object
-            float obj_conf = (yolo_name == "yolov8" || yolo_name == "yolonas") ? 1.0f : preds.at<float>(i, 4);
+            float obj_conf = (postprocessing == "yolov8" || postprocessing == "yolonas") ? 1.0f : preds.at<float>(i, 4);
             if (obj_conf < conf_threshold)
                 continue;
 
-            Mat scores = preds.row(i).colRange((yolo_name == "yolov8" || yolo_name == "yolonas") ? 4 : 5, preds.cols);
+            Mat scores = preds.row(i).colRange((postprocessing == "yolov8" || postprocessing == "yolonas") ? 4 : 5, preds.cols);
             double conf;
             Point maxLoc;
             minMaxLoc(scores, 0, &conf, 0, &maxLoc);
 
-            conf = (yolo_name == "yolov8" || yolo_name == "yolonas") ? conf : conf * obj_conf;
+            conf = (postprocessing == "yolov8" || postprocessing == "yolonas") ? conf : conf * obj_conf;
             if (conf < conf_threshold)
                 continue;
 
@@ -488,7 +484,7 @@ void yoloPostProcessing(
             double h = det[3];
 
             // [x1, y1, x2, y2]
-            if (yolo_name == "yolonas") {
+            if (postprocessing == "yolonas") {
                 boxes.push_back(Rect2d(cx, cy, w, h));
             } else {
                 boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
@@ -511,12 +507,10 @@ void yoloPostProcessing(
     }
 }
 
-void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string yolo_name)
+void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vector<int>& classIds, vector<float>& confidences, vector<Rect>& boxes, const string postprocessing)
 {
     static vector<int> outLayers = net.getUnconnectedOutLayers();
-    static string outLayerType = net.getLayer(outLayers[0])->type;
-
-    if (outLayerType == "DetectionOutput")
+    if (postprocessing == "ssd")
     {
         // Network produces output blob with a shape 1x1xNx7 where N is a number of
         // detections and an every detection is a vector of values
@@ -552,7 +546,7 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
             }
         }
     }
-    else if (outLayerType == "Region")
+    else if (postprocessing == "darknet")
     {
         for (size_t i = 0; i < outs.size(); ++i)
         {
@@ -582,7 +576,7 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
             }
         }
     }
-    else if (outLayerType == "Identity")
+    else if (postprocessing == "yolov8" || postprocessing == "yolov5")
     {
         //![forward_buffers]
         vector<int> keep_classIds;
@@ -591,7 +585,7 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
         //![forward_buffers]
 
         //![postprocess]
-        yoloPostProcessing(outs, keep_classIds, keep_confidences, keep_boxes, confThreshold, nmsThreshold, yolo_name);
+        yoloPostProcessing(outs, keep_classIds, keep_confidences, keep_boxes, confThreshold, nmsThreshold, postprocessing);
         //![postprocess]
 
         for (size_t i = 0; i < keep_classIds.size(); ++i)
@@ -614,12 +608,13 @@ void postprocess(Mat& frame, const vector<Mat>& outs, Net& net, int backend, vec
     }
     else
     {
-        CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
+        cout<< ("Unknown postprocessing method: " + postprocessing)<<endl;
+        exit(-1);
     }
 
     // NMS is used inside Region layer only on DNN_BACKEND_OPENCV for other backends we need NMS in sample
     // or NMS is required if the number of outputs > 1
-    if (outLayers.size() > 1 || (outLayerType == "Region" && backend != DNN_BACKEND_OPENCV))
+    if (outLayers.size() > 1 || (postprocessing == "darknet" && backend != DNN_BACKEND_OPENCV))
     {
         map<int, vector<size_t> > class2indices;
         for (size_t i = 0; i < classIds.size(); i++)
diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py
index f2171a5716..9d4f47ae8a 100644
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@@ -37,9 +37,6 @@ parser.add_argument('--out_tf_graph', default='graph.pbtxt',
                     help='For models from TensorFlow Object Detection API, you may '
                          'pass a .config file which was used for training through --config '
                          'argument. This way an additional .pbtxt file with TensorFlow graph will be created.')
-parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'darknet', 'dldt', 'onnx'],
-                    help='Optional name of an origin framework of the model. '
-                         'Detect it automatically if it does not set.')
 parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
 parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
 parser.add_argument('--backend', default="default", type=str, choices=backends,
@@ -76,8 +73,9 @@ if args.alias is None or hasattr(args, 'help'):
 
 args.model = findModel(args.model, args.sha1)
 if args.config is not None:
-    args.config = findFile(args.config)
-args.labels = findFile(args.labels)
+    args.config = findModel(args.config, args.config_sha1)
+if args.labels is not None:
+    args.labels = findFile(args.labels)
 
 # If config specified, try to load it as TensorFlow Object Detection API's pipeline.
 config = readTextMessage(args.config)
@@ -100,7 +98,10 @@ if args.labels:
         labels = f.read().rstrip('\n').split('\n')
 
 # Load a network
-net = cv.dnn.readNet(args.model, args.config, args.framework)
+engine = cv.dnn.ENGINE_AUTO
+if args.backend != "default" or args.target != "cpu":
+    engine = cv.dnn.ENGINE_CLASSIC
+net = cv.dnn.readNet(args.model, args.config, "", engine)
 net.setPreferableBackend(get_backend_id(args.backend))
 net.setPreferableTarget(get_target_id(args.target))
 outNames = net.getUnconnectedOutLayersNames()
@@ -126,14 +127,10 @@ def postprocess(frame, outs):
     frameHeight = frame.shape[0]
     frameWidth = frame.shape[1]
 
-    layerNames = net.getLayerNames()
-    lastLayerId = net.getLayerId(layerNames[-1])
-    lastLayer = net.getLayer(lastLayerId)
-
     classIds = []
     confidences = []
     boxes = []
-    if lastLayer.type == 'DetectionOutput':
+    if args.postprocessing == 'ssd':
         # Network produces output blob with a shape 1x1xNx7 where N is a number of
         # detections and an every detection is a vector of values
         # [batchId, classId, confidence, left, top, right, bottom]
@@ -157,21 +154,12 @@ def postprocess(frame, outs):
                     classIds.append(int(detection[1]) - 1)  # Skip background label
                     confidences.append(float(confidence))
                     boxes.append([left, top, width, height])
-    elif lastLayer.type == 'Region' or args.postprocessing == 'yolov8':
-        # Network produces output blob with a shape NxC where N is a number of
-        # detected objects and C is a number of classes + 4 where the first 4
-        # numbers are [center_x, center_y, width, height]
-        if args.postprocessing == 'yolov8':
-            box_scale_w = frameWidth / args.width
-            box_scale_h = frameHeight / args.height
-        else:
-            box_scale_w = frameWidth
-            box_scale_h = frameHeight
+
+    elif args.postprocessing == 'darknet':
+        box_scale_w = frameWidth
+        box_scale_h = frameHeight
 
         for out in outs:
-            if args.postprocessing == 'yolov8':
-                out = out[0].transpose(1, 0)
-
             for detection in out:
                 scores = detection[4:]
                 if args.background_label_id >= 0:
@@ -188,13 +176,47 @@ def postprocess(frame, outs):
                     classIds.append(classId)
                     confidences.append(float(confidence))
                     boxes.append([left, top, width, height])
+
+    elif args.postprocessing == 'yolov8' or args.postprocessing == 'yolov5':
+        # Network produces output blob with a shape NxC where N is a number of
+        # detected objects and C is a number of classes + 4 where the first 4
+        # numbers are [center_x, center_y, width, height]
+        box_scale_w = frameWidth / args.width
+        box_scale_h = frameHeight / args.height
+
+        for out in outs:
+            if args.postprocessing == 'yolov8':
+                out = out[0].transpose(1, 0)
+            else:  # YOLOv5, no transposition needed
+                out = out[0]
+
+            for detection in out:
+                if args.postprocessing == 'yolov8':
+                    scores = detection[4:]
+                    obj_conf = 1
+                else:
+                    scores = detection[5:]
+                    obj_conf = detection[4]
+
+                classId = np.argmax(scores)
+                confidence = scores[classId]*obj_conf
+                if confidence > confThreshold:
+                    center_x = int(detection[0] * box_scale_w)
+                    center_y = int(detection[1] * box_scale_h)
+                    width = int(detection[2] * box_scale_w)
+                    height = int(detection[3] * box_scale_h)
+                    left = int(center_x - width / 2)
+                    top = int(center_y - height / 2)
+                    classIds.append(classId)
+                    confidences.append(float(confidence))
+                    boxes.append([left, top, width, height])
     else:
-        print('Unknown output layer type: ' + lastLayer.type)
+        print('Unknown postprocessing method: ' + args.postprocessing)
         exit()
 
     # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
     # or NMS is required if number of outputs > 1
-    if len(outNames) > 1 or (lastLayer.type == 'Region' or args.postprocessing == 'yolov8') and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
+    if len(outNames) > 1 or (args.postprocessing == 'darknet' or args.postprocessing == 'yolov8' or args.postprocessing == 'yolov5') and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
         indices = []
         classIds = np.array(classIds)
         boxes = np.array(boxes)
@@ -308,14 +330,11 @@ def processingThreadBody():
             # Create a 4D blob from a frame.
             inpWidth = args.width if args.width else frameWidth
             inpHeight = args.height if args.height else frameHeight
-            blob = cv.dnn.blobFromImage(frame, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
+            blob = cv.dnn.blobFromImage(frame, scalefactor=args.scale, mean=args.mean, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
             processedFramesQueue.put(frame)
 
             # Run a model
-            net.setInput(blob, scalefactor=args.scale, mean=args.mean)
-            if net.getLayer(0).outputNameToIndex('im_info') != -1:  # Faster-RCNN or R-FCN
-                frame = cv.resize(frame, (inpWidth, inpHeight))
-                net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info')
+            net.setInput(blob)
 
             if asyncN:
                 futureOutputs.append(net.forwardAsync())
@@ -385,9 +404,9 @@ else:
 
         inpWidth = args.width if args.width else frameWidth
         inpHeight = args.height if args.height else frameHeight
-        blob = cv.dnn.blobFromImage(frame, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
+        blob = cv.dnn.blobFromImage(frame, scalefactor=args.scale, mean=args.mean, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_32F)
 
-        net.setInput(blob, scalefactor=args.scale, mean=args.mean)
+        net.setInput(blob)
         outs = net.forward(outNames)
 
         boxes, classIds, confidences, indices = postprocess(frame, outs)