diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index f574cc19be..32f59d1a3e 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -595,6 +595,8 @@ CV__DNN_INLINE_NS_BEGIN class CV_EXPORTS RegionLayer : public Layer { public: + float nmsThreshold; + static Ptr create(const LayerParams& params); }; diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 829a35acce..40c32fa0f9 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -69,7 +69,7 @@ class RegionLayerImpl CV_FINAL : public RegionLayer { public: int coords, classes, anchors, classfix; - float thresh, nmsThreshold, scale_x_y; + float thresh, scale_x_y; bool useSoftmax, useLogistic; #ifdef HAVE_OPENCL UMat blob_umat; diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index c0f7f93b81..677228bcf2 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -236,10 +236,27 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask) } } -DetectionModel::DetectionModel(const String& model, const String& config) - : Model(model, config) {}; +void disableRegionNMS(Net& net) +{ + for (String& name : net.getUnconnectedOutLayersNames()) + { + int layerId = net.getLayerId(name); + Ptr layer = net.getLayer(layerId).dynamicCast(); + if (!layer.empty()) + { + layer->nmsThreshold = 0; + } + } +} -DetectionModel::DetectionModel(const Net& network) : Model(network) {}; +DetectionModel::DetectionModel(const String& model, const String& config) + : Model(model, config) { + disableRegionNMS(*this); +} + +DetectionModel::DetectionModel(const Net& network) : Model(network) { + disableRegionNMS(*this); +} void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, CV_OUT std::vector& confidences, CV_OUT std::vector& boxes, @@ -264,9 +281,6 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, int lastLayerId = getLayerId(layerNames.back()); Ptr lastLayer = getLayer(lastLayerId); - std::vector predClassIds; - std::vector predBoxes; - std::vector predConf; if (lastLayer->type == "DetectionOutput") { // Network produces output blob with a shape 1x1xNx7 where N is a number of @@ -302,15 +316,18 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, top = std::max(0, std::min(top, frameHeight - 1)); width = std::max(1, std::min(width, frameWidth - left)); height = std::max(1, std::min(height, frameHeight - top)); - predBoxes.emplace_back(left, top, width, height); + boxes.emplace_back(left, top, width, height); - predClassIds.push_back(static_cast(data[j + 1])); - predConf.push_back(conf); + classIds.push_back(static_cast(data[j + 1])); + confidences.push_back(conf); } } } else if (lastLayer->type == "Region") { + std::vector predClassIds; + std::vector predBoxes; + std::vector predConf; for (int i = 0; i < detections.size(); ++i) { // Network produces output blob with a shape NxC where N is a number of @@ -343,35 +360,45 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, predBoxes.emplace_back(left, top, width, height); } } - } - else - CV_Error(Error::StsNotImplemented, "Unknown output layer type: \"" + lastLayer->type + "\""); - if (nmsThreshold) - { - std::vector indices; - NMSBoxes(predBoxes, predConf, confThreshold, nmsThreshold, indices); - - boxes.reserve(indices.size()); - confidences.reserve(indices.size()); - classIds.reserve(indices.size()); - - for (int idx : indices) + if (nmsThreshold) { - boxes.push_back(predBoxes[idx]); - confidences.push_back(predConf[idx]); - classIds.push_back(predClassIds[idx]); + std::map > class2indices; + for (size_t i = 0; i < predClassIds.size(); i++) + { + if (predConf[i] >= confThreshold) + { + class2indices[predClassIds[i]].push_back(i); + } + } + for (const auto& it : class2indices) + { + std::vector localBoxes; + std::vector localConfidences; + for (size_t idx : it.second) + { + localBoxes.push_back(predBoxes[idx]); + localConfidences.push_back(predConf[idx]); + } + std::vector indices; + NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); + classIds.resize(classIds.size() + indices.size(), it.first); + for (int idx : indices) + { + boxes.push_back(localBoxes[idx]); + confidences.push_back(localConfidences[idx]); + } + } + } + else + { + boxes = std::move(predBoxes); + classIds = std::move(predClassIds); + confidences = std::move(predConf); } } else - { - boxes = std::move(predBoxes); - classIds = std::move(predClassIds); - confidences = std::move(predConf); - } - - - + CV_Error(Error::StsNotImplemented, "Unknown output layer type: \"" + lastLayer->type + "\""); } }} // namespace