diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 69b71f90ce..5467c989ac 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1296,6 +1296,23 @@ CV__DNN_INLINE_NS_BEGIN */ CV_WRAP DetectionModel(const Net& network); + CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first) + DetectionModel(); + + /** + * @brief nmsAcrossClasses defaults to false, + * such that when non max suppression is used during the detect() function, it will do so per-class. + * This function allows you to toggle this behaviour. + * @param[in] value The new value for nmsAcrossClasses + */ + CV_WRAP DetectionModel& setNmsAcrossClasses(bool value); + + /** + * @brief Getter for nmsAcrossClasses. This variable defaults to false, + * such that when non max suppression is used during the detect() function, it will do so only per-class + */ + CV_WRAP bool getNmsAcrossClasses(); + /** @brief Given the @p input frame, create input blob, run net and return result detections. * @param[in] frame The input image. * @param[out] classIds Class indexes in result detection. diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index aefeaa42b3..16f7d31a25 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -320,34 +320,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask) } } -void disableRegionNMS(Net& net) +class DetectionModel_Impl : public Model::Impl { - for (String& name : net.getUnconnectedOutLayersNames()) +public: + virtual ~DetectionModel_Impl() {} + DetectionModel_Impl() : Impl() {} + DetectionModel_Impl(const DetectionModel_Impl&) = delete; + DetectionModel_Impl(DetectionModel_Impl&&) = delete; + + void disableRegionNMS(Net& net) { - int layerId = net.getLayerId(name); - Ptr layer = net.getLayer(layerId).dynamicCast(); - if (!layer.empty()) + for (String& name : net.getUnconnectedOutLayersNames()) { - layer->nmsThreshold = 0; + int layerId = net.getLayerId(name); + Ptr layer = net.getLayer(layerId).dynamicCast(); + if (!layer.empty()) + { + layer->nmsThreshold = 0; + } } } -} + + void setNmsAcrossClasses(bool value) { + nmsAcrossClasses = value; + } + + bool getNmsAcrossClasses() { + return nmsAcrossClasses; + } + +private: + bool nmsAcrossClasses = false; +}; DetectionModel::DetectionModel(const String& model, const String& config) - : Model(model, config) + : DetectionModel(readNet(model, config)) { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + // nothing } -DetectionModel::DetectionModel(const Net& network) : Model(network) +DetectionModel::DetectionModel(const Net& network) : Model() { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + impl = makePtr(); + impl->initNet(network); + impl.dynamicCast()->disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() +} + +DetectionModel::DetectionModel() : Model() +{ + // nothing +} + +DetectionModel& DetectionModel::setNmsAcrossClasses(bool value) +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + impl.dynamicCast()->setNmsAcrossClasses(value); + return *this; +} + +bool DetectionModel::getNmsAcrossClasses() +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + return impl.dynamicCast()->getNmsAcrossClasses(); } void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, CV_OUT std::vector& confidences, CV_OUT std::vector& boxes, float confThreshold, float nmsThreshold) { + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + std::vector detections; impl->processFrame(frame, detections); @@ -413,7 +457,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { std::vector predClassIds; std::vector predBoxes; - std::vector predConf; + std::vector predConfidences; for (int i = 0; i < detections.size(); ++i) { // Network produces output blob with a shape NxC where N is a number of @@ -442,37 +486,51 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, height = std::max(1, std::min(height, frameHeight - top)); predClassIds.push_back(classIdPoint.x); - predConf.push_back(static_cast(conf)); + predConfidences.push_back(static_cast(conf)); predBoxes.emplace_back(left, top, width, height); } } if (nmsThreshold) { - std::map > class2indices; - for (size_t i = 0; i < predClassIds.size(); i++) + if (getNmsAcrossClasses()) { - if (predConf[i] >= confThreshold) - { - class2indices[predClassIds[i]].push_back(i); - } - } - for (const auto& it : class2indices) - { - std::vector localBoxes; - std::vector localConfidences; - for (size_t idx : it.second) - { - localBoxes.push_back(predBoxes[idx]); - localConfidences.push_back(predConf[idx]); - } std::vector indices; - NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); - classIds.resize(classIds.size() + indices.size(), it.first); + NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices); for (int idx : indices) { - boxes.push_back(localBoxes[idx]); - confidences.push_back(localConfidences[idx]); + boxes.push_back(predBoxes[idx]); + confidences.push_back(predConfidences[idx]); + classIds.push_back(predClassIds[idx]); + } + } + else + { + std::map > class2indices; + for (size_t i = 0; i < predClassIds.size(); i++) + { + if (predConfidences[i] >= confThreshold) + { + class2indices[predClassIds[i]].push_back(i); + } + } + for (const auto& it : class2indices) + { + std::vector localBoxes; + std::vector localConfidences; + for (size_t idx : it.second) + { + localBoxes.push_back(predBoxes[idx]); + localConfidences.push_back(predConfidences[idx]); + } + std::vector indices; + NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); + classIds.resize(classIds.size() + indices.size(), it.first); + for (int idx : indices) + { + boxes.push_back(localBoxes[idx]); + confidences.push_back(localConfidences[idx]); + } } } } @@ -480,7 +538,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { boxes = std::move(predBoxes); classIds = std::move(predClassIds); - confidences = std::move(predConf); + confidences = std::move(predConfidences); } } else diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 7d516de73e..58a881488a 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -25,7 +25,8 @@ public: double scoreDiff, double iouDiff, double confThreshold = 0.24, double nmsThreshold = 0.0, const Size& size = {-1, -1}, Scalar mean = Scalar(), - double scale = 1.0, bool swapRB = false, bool crop = false) + double scale = 1.0, bool swapRB = false, bool crop = false, + bool nmsAcrossClasses = false) { checkBackend(); @@ -38,6 +39,8 @@ public: model.setPreferableBackend(backend); model.setPreferableTarget(target); + model.setNmsAcrossClasses(nmsAcrossClasses); + std::vector classIds; std::vector confidences; std::vector boxes; @@ -177,6 +180,58 @@ TEST_P(Test_Model, DetectRegion) Scalar(), scale, swapRB); } +TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) +{ + applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); +#endif + +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD + && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); +#endif + + std::vector refClassIds = { 6, 11 }; + std::vector refConfidences = { 0.750469f, 0.901615f }; + std::vector refBoxes = { Rect2d(240, 53, 135, 72), + Rect2d(58, 141, 117, 249) }; + + std::string img_path = _tf("dog416.png"); + std::string weights_file = _tf("yolo-voc.weights", false); + std::string config_file = _tf("yolo-voc.cfg"); + + double scale = 1.0 / 255.0; + Size size{ 416, 416 }; + bool swapRB = true; + bool crop = false; + bool nmsAcrossClasses = true; + + double confThreshold = 0.24; + double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15; + double scoreDiff = 8e-5, iouDiff = 1e-5; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 1e-2; + iouDiff = 1.6e-2; + } + + testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, + refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, + Scalar(), scale, swapRB, crop, + nmsAcrossClasses); +} + TEST_P(Test_Model, DetectionOutput) { #if defined(INF_ENGINE_RELEASE)