diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp index 94a6666393..c30e2179af 100644 --- a/samples/dnn/object_detection.cpp +++ b/samples/dnn/object_detection.cpp @@ -153,51 +153,39 @@ void postprocess(Mat& frame, const std::vector& outs, Net& net) std::vector classIds; std::vector confidences; std::vector boxes; - if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN + if (outLayerType == "DetectionOutput") { // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] - CV_Assert(outs.size() == 1); - float* data = (float*)outs[0].data; - for (size_t i = 0; i < outs[0].total(); i += 7) + CV_Assert(outs.size() > 0); + for (size_t k = 0; k < outs.size(); k++) { - float confidence = data[i + 2]; - if (confidence > confThreshold) + float* data = (float*)outs[k].data; + for (size_t i = 0; i < outs[k].total(); i += 7) { - int left = (int)data[i + 3]; - int top = (int)data[i + 4]; - int right = (int)data[i + 5]; - int bottom = (int)data[i + 6]; - int width = right - left + 1; - int height = bottom - top + 1; - classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. - boxes.push_back(Rect(left, top, width, height)); - confidences.push_back(confidence); - } - } - } - else if (outLayerType == "DetectionOutput") - { - // Network produces output blob with a shape 1x1xNx7 where N is a number of - // detections and an every detection is a vector of values - // [batchId, classId, confidence, left, top, right, bottom] - CV_Assert(outs.size() == 1); - float* data = (float*)outs[0].data; - for (size_t i = 0; i < outs[0].total(); i += 7) - { - float confidence = data[i + 2]; - if (confidence > confThreshold) - { - int left = (int)(data[i + 3] * frame.cols); - int top = (int)(data[i + 4] * frame.rows); - int right = (int)(data[i + 5] * frame.cols); - int bottom = (int)(data[i + 6] * frame.rows); - int width = right - left + 1; - int height = bottom - top + 1; - classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. - boxes.push_back(Rect(left, top, width, height)); - confidences.push_back(confidence); + float confidence = data[i + 2]; + if (confidence > confThreshold) + { + int left = (int)data[i + 3]; + int top = (int)data[i + 4]; + int right = (int)data[i + 5]; + int bottom = (int)data[i + 6]; + int width = right - left + 1; + int height = bottom - top + 1; + if (width * height <= 1) + { + left = (int)(data[i + 3] * frame.cols); + top = (int)(data[i + 4] * frame.rows); + right = (int)(data[i + 5] * frame.cols); + bottom = (int)(data[i + 6] * frame.rows); + width = right - left + 1; + height = bottom - top + 1; + } + classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. + boxes.push_back(Rect(left, top, width, height)); + confidences.push_back(confidence); + } } } } diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index bf1c2e4236..3f7b0e23d7 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -102,7 +102,7 @@ def postprocess(frame, outs): classIds = [] confidences = [] boxes = [] - if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN + if lastLayer.type == 'DetectionOutput': # Network produces output blob with a shape 1x1xNx7 where N is a number of # detections and an every detection is a vector of values # [batchId, classId, confidence, left, top, right, bottom] @@ -116,23 +116,13 @@ def postprocess(frame, outs): bottom = int(detection[6]) width = right - left + 1 height = bottom - top + 1 - classIds.append(int(detection[1]) - 1) # Skip background label - confidences.append(float(confidence)) - boxes.append([left, top, width, height]) - elif lastLayer.type == 'DetectionOutput': - # Network produces output blob with a shape 1x1xNx7 where N is a number of - # detections and an every detection is a vector of values - # [batchId, classId, confidence, left, top, right, bottom] - for out in outs: - for detection in out[0, 0]: - confidence = detection[2] - if confidence > confThreshold: - left = int(detection[3] * frameWidth) - top = int(detection[4] * frameHeight) - right = int(detection[5] * frameWidth) - bottom = int(detection[6] * frameHeight) - width = right - left + 1 - height = bottom - top + 1 + if width * height <= 1: + left = int(detection[3] * frameWidth) + top = int(detection[4] * frameHeight) + right = int(detection[5] * frameWidth) + bottom = int(detection[6] * frameHeight) + width = right - left + 1 + height = bottom - top + 1 classIds.append(int(detection[1]) - 1) # Skip background label confidences.append(float(confidence)) boxes.append([left, top, width, height])