From 9d37cdaa6605d4b1512666fde7737e2bd80d839d Mon Sep 17 00:00:00 2001 From: Daniel Cauchi <33454325+CowKeyMan@users.noreply.github.com> Date: Tue, 1 Dec 2020 14:50:24 +0100 Subject: [PATCH] Merge pull request #18891 from CowKeyMan:NMS_boxes_with_different_labels Add option for NMS for boxes with different labels * DetectionModel impl * Add option for NMS for boxes with different labels In the detect function in modules/dnn/include/opencv2/dnn/dnn.hpp, whose implementation can be found at modules/dnn/src/model.cpp, the Non Max Suppression (NMS) is applied only for objects of the same label. Thus, a flag was added with the purpose to allow developers to choose if they want to keep the default implementation or wether they would like NMS to be applied to all the boxes, regardless of label. The flag is called nmsDifferentLabels, and is given a default value of false, which applies the current default implementation, thus allowing existing projects to update opencv without disruption Solves issue opencv#18832 * Change return type of set & Add default constr * Add assertions due to default constructor --- modules/dnn/include/opencv2/dnn/dnn.hpp | 17 ++++ modules/dnn/src/model.cpp | 126 +++++++++++++++++------- modules/dnn/test/test_model.cpp | 57 ++++++++++- 3 files changed, 165 insertions(+), 35 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 69b71f90ce..5467c989ac 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1296,6 +1296,23 @@ CV__DNN_INLINE_NS_BEGIN */ CV_WRAP DetectionModel(const Net& network); + CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first) + DetectionModel(); + + /** + * @brief nmsAcrossClasses defaults to false, + * such that when non max suppression is used during the detect() function, it will do so per-class. + * This function allows you to toggle this behaviour. + * @param[in] value The new value for nmsAcrossClasses + */ + CV_WRAP DetectionModel& setNmsAcrossClasses(bool value); + + /** + * @brief Getter for nmsAcrossClasses. This variable defaults to false, + * such that when non max suppression is used during the detect() function, it will do so only per-class + */ + CV_WRAP bool getNmsAcrossClasses(); + /** @brief Given the @p input frame, create input blob, run net and return result detections. * @param[in] frame The input image. * @param[out] classIds Class indexes in result detection. diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index aefeaa42b3..16f7d31a25 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -320,34 +320,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask) } } -void disableRegionNMS(Net& net) +class DetectionModel_Impl : public Model::Impl { - for (String& name : net.getUnconnectedOutLayersNames()) +public: + virtual ~DetectionModel_Impl() {} + DetectionModel_Impl() : Impl() {} + DetectionModel_Impl(const DetectionModel_Impl&) = delete; + DetectionModel_Impl(DetectionModel_Impl&&) = delete; + + void disableRegionNMS(Net& net) { - int layerId = net.getLayerId(name); - Ptr layer = net.getLayer(layerId).dynamicCast(); - if (!layer.empty()) + for (String& name : net.getUnconnectedOutLayersNames()) { - layer->nmsThreshold = 0; + int layerId = net.getLayerId(name); + Ptr layer = net.getLayer(layerId).dynamicCast(); + if (!layer.empty()) + { + layer->nmsThreshold = 0; + } } } -} + + void setNmsAcrossClasses(bool value) { + nmsAcrossClasses = value; + } + + bool getNmsAcrossClasses() { + return nmsAcrossClasses; + } + +private: + bool nmsAcrossClasses = false; +}; DetectionModel::DetectionModel(const String& model, const String& config) - : Model(model, config) + : DetectionModel(readNet(model, config)) { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + // nothing } -DetectionModel::DetectionModel(const Net& network) : Model(network) +DetectionModel::DetectionModel(const Net& network) : Model() { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + impl = makePtr(); + impl->initNet(network); + impl.dynamicCast()->disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() +} + +DetectionModel::DetectionModel() : Model() +{ + // nothing +} + +DetectionModel& DetectionModel::setNmsAcrossClasses(bool value) +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + impl.dynamicCast()->setNmsAcrossClasses(value); + return *this; +} + +bool DetectionModel::getNmsAcrossClasses() +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + return impl.dynamicCast()->getNmsAcrossClasses(); } void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, CV_OUT std::vector& confidences, CV_OUT std::vector& boxes, float confThreshold, float nmsThreshold) { + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + std::vector detections; impl->processFrame(frame, detections); @@ -413,7 +457,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { std::vector predClassIds; std::vector predBoxes; - std::vector predConf; + std::vector predConfidences; for (int i = 0; i < detections.size(); ++i) { // Network produces output blob with a shape NxC where N is a number of @@ -442,37 +486,51 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, height = std::max(1, std::min(height, frameHeight - top)); predClassIds.push_back(classIdPoint.x); - predConf.push_back(static_cast(conf)); + predConfidences.push_back(static_cast(conf)); predBoxes.emplace_back(left, top, width, height); } } if (nmsThreshold) { - std::map > class2indices; - for (size_t i = 0; i < predClassIds.size(); i++) + if (getNmsAcrossClasses()) { - if (predConf[i] >= confThreshold) - { - class2indices[predClassIds[i]].push_back(i); - } - } - for (const auto& it : class2indices) - { - std::vector localBoxes; - std::vector localConfidences; - for (size_t idx : it.second) - { - localBoxes.push_back(predBoxes[idx]); - localConfidences.push_back(predConf[idx]); - } std::vector indices; - NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); - classIds.resize(classIds.size() + indices.size(), it.first); + NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices); for (int idx : indices) { - boxes.push_back(localBoxes[idx]); - confidences.push_back(localConfidences[idx]); + boxes.push_back(predBoxes[idx]); + confidences.push_back(predConfidences[idx]); + classIds.push_back(predClassIds[idx]); + } + } + else + { + std::map > class2indices; + for (size_t i = 0; i < predClassIds.size(); i++) + { + if (predConfidences[i] >= confThreshold) + { + class2indices[predClassIds[i]].push_back(i); + } + } + for (const auto& it : class2indices) + { + std::vector localBoxes; + std::vector localConfidences; + for (size_t idx : it.second) + { + localBoxes.push_back(predBoxes[idx]); + localConfidences.push_back(predConfidences[idx]); + } + std::vector indices; + NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); + classIds.resize(classIds.size() + indices.size(), it.first); + for (int idx : indices) + { + boxes.push_back(localBoxes[idx]); + confidences.push_back(localConfidences[idx]); + } } } } @@ -480,7 +538,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { boxes = std::move(predBoxes); classIds = std::move(predClassIds); - confidences = std::move(predConf); + confidences = std::move(predConfidences); } } else diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 7d516de73e..58a881488a 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -25,7 +25,8 @@ public: double scoreDiff, double iouDiff, double confThreshold = 0.24, double nmsThreshold = 0.0, const Size& size = {-1, -1}, Scalar mean = Scalar(), - double scale = 1.0, bool swapRB = false, bool crop = false) + double scale = 1.0, bool swapRB = false, bool crop = false, + bool nmsAcrossClasses = false) { checkBackend(); @@ -38,6 +39,8 @@ public: model.setPreferableBackend(backend); model.setPreferableTarget(target); + model.setNmsAcrossClasses(nmsAcrossClasses); + std::vector classIds; std::vector confidences; std::vector boxes; @@ -177,6 +180,58 @@ TEST_P(Test_Model, DetectRegion) Scalar(), scale, swapRB); } +TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) +{ + applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); +#endif + +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD + && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); +#endif + + std::vector refClassIds = { 6, 11 }; + std::vector refConfidences = { 0.750469f, 0.901615f }; + std::vector refBoxes = { Rect2d(240, 53, 135, 72), + Rect2d(58, 141, 117, 249) }; + + std::string img_path = _tf("dog416.png"); + std::string weights_file = _tf("yolo-voc.weights", false); + std::string config_file = _tf("yolo-voc.cfg"); + + double scale = 1.0 / 255.0; + Size size{ 416, 416 }; + bool swapRB = true; + bool crop = false; + bool nmsAcrossClasses = true; + + double confThreshold = 0.24; + double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15; + double scoreDiff = 8e-5, iouDiff = 1e-5; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 1e-2; + iouDiff = 1.6e-2; + } + + testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, + refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, + Scalar(), scale, swapRB, crop, + nmsAcrossClasses); +} + TEST_P(Test_Model, DetectionOutput) { #if defined(INF_ENGINE_RELEASE)