mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 14:06:27 +08:00
Merge pull request #18891 from CowKeyMan:NMS_boxes_with_different_labels
Add option for NMS for boxes with different labels * DetectionModel impl * Add option for NMS for boxes with different labels In the detect function in modules/dnn/include/opencv2/dnn/dnn.hpp, whose implementation can be found at modules/dnn/src/model.cpp, the Non Max Suppression (NMS) is applied only for objects of the same label. Thus, a flag was added with the purpose to allow developers to choose if they want to keep the default implementation or wether they would like NMS to be applied to all the boxes, regardless of label. The flag is called nmsDifferentLabels, and is given a default value of false, which applies the current default implementation, thus allowing existing projects to update opencv without disruption Solves issue opencv#18832 * Change return type of set & Add default constr * Add assertions due to default constructor
This commit is contained in:
parent
3f686a6ab8
commit
9d37cdaa66
@ -1296,6 +1296,23 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
*/
|
||||
CV_WRAP DetectionModel(const Net& network);
|
||||
|
||||
CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first)
|
||||
DetectionModel();
|
||||
|
||||
/**
|
||||
* @brief nmsAcrossClasses defaults to false,
|
||||
* such that when non max suppression is used during the detect() function, it will do so per-class.
|
||||
* This function allows you to toggle this behaviour.
|
||||
* @param[in] value The new value for nmsAcrossClasses
|
||||
*/
|
||||
CV_WRAP DetectionModel& setNmsAcrossClasses(bool value);
|
||||
|
||||
/**
|
||||
* @brief Getter for nmsAcrossClasses. This variable defaults to false,
|
||||
* such that when non max suppression is used during the detect() function, it will do so only per-class
|
||||
*/
|
||||
CV_WRAP bool getNmsAcrossClasses();
|
||||
|
||||
/** @brief Given the @p input frame, create input blob, run net and return result detections.
|
||||
* @param[in] frame The input image.
|
||||
* @param[out] classIds Class indexes in result detection.
|
||||
|
@ -320,34 +320,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask)
|
||||
}
|
||||
}
|
||||
|
||||
void disableRegionNMS(Net& net)
|
||||
class DetectionModel_Impl : public Model::Impl
|
||||
{
|
||||
for (String& name : net.getUnconnectedOutLayersNames())
|
||||
public:
|
||||
virtual ~DetectionModel_Impl() {}
|
||||
DetectionModel_Impl() : Impl() {}
|
||||
DetectionModel_Impl(const DetectionModel_Impl&) = delete;
|
||||
DetectionModel_Impl(DetectionModel_Impl&&) = delete;
|
||||
|
||||
void disableRegionNMS(Net& net)
|
||||
{
|
||||
int layerId = net.getLayerId(name);
|
||||
Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
|
||||
if (!layer.empty())
|
||||
for (String& name : net.getUnconnectedOutLayersNames())
|
||||
{
|
||||
layer->nmsThreshold = 0;
|
||||
int layerId = net.getLayerId(name);
|
||||
Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
|
||||
if (!layer.empty())
|
||||
{
|
||||
layer->nmsThreshold = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void setNmsAcrossClasses(bool value) {
|
||||
nmsAcrossClasses = value;
|
||||
}
|
||||
|
||||
bool getNmsAcrossClasses() {
|
||||
return nmsAcrossClasses;
|
||||
}
|
||||
|
||||
private:
|
||||
bool nmsAcrossClasses = false;
|
||||
};
|
||||
|
||||
DetectionModel::DetectionModel(const String& model, const String& config)
|
||||
: Model(model, config)
|
||||
: DetectionModel(readNet(model, config))
|
||||
{
|
||||
disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet()
|
||||
// nothing
|
||||
}
|
||||
|
||||
DetectionModel::DetectionModel(const Net& network) : Model(network)
|
||||
DetectionModel::DetectionModel(const Net& network) : Model()
|
||||
{
|
||||
disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet()
|
||||
impl = makePtr<DetectionModel_Impl>();
|
||||
impl->initNet(network);
|
||||
impl.dynamicCast<DetectionModel_Impl>()->disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet()
|
||||
}
|
||||
|
||||
DetectionModel::DetectionModel() : Model()
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
DetectionModel& DetectionModel::setNmsAcrossClasses(bool value)
|
||||
{
|
||||
CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
|
||||
|
||||
impl.dynamicCast<DetectionModel_Impl>()->setNmsAcrossClasses(value);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool DetectionModel::getNmsAcrossClasses()
|
||||
{
|
||||
CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
|
||||
|
||||
return impl.dynamicCast<DetectionModel_Impl>()->getNmsAcrossClasses();
|
||||
}
|
||||
|
||||
void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
|
||||
CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes,
|
||||
float confThreshold, float nmsThreshold)
|
||||
{
|
||||
CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
|
||||
|
||||
std::vector<Mat> detections;
|
||||
impl->processFrame(frame, detections);
|
||||
|
||||
@ -413,7 +457,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
|
||||
{
|
||||
std::vector<int> predClassIds;
|
||||
std::vector<Rect> predBoxes;
|
||||
std::vector<float> predConf;
|
||||
std::vector<float> predConfidences;
|
||||
for (int i = 0; i < detections.size(); ++i)
|
||||
{
|
||||
// Network produces output blob with a shape NxC where N is a number of
|
||||
@ -442,37 +486,51 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
|
||||
height = std::max(1, std::min(height, frameHeight - top));
|
||||
|
||||
predClassIds.push_back(classIdPoint.x);
|
||||
predConf.push_back(static_cast<float>(conf));
|
||||
predConfidences.push_back(static_cast<float>(conf));
|
||||
predBoxes.emplace_back(left, top, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
if (nmsThreshold)
|
||||
{
|
||||
std::map<int, std::vector<size_t> > class2indices;
|
||||
for (size_t i = 0; i < predClassIds.size(); i++)
|
||||
if (getNmsAcrossClasses())
|
||||
{
|
||||
if (predConf[i] >= confThreshold)
|
||||
{
|
||||
class2indices[predClassIds[i]].push_back(i);
|
||||
}
|
||||
}
|
||||
for (const auto& it : class2indices)
|
||||
{
|
||||
std::vector<Rect> localBoxes;
|
||||
std::vector<float> localConfidences;
|
||||
for (size_t idx : it.second)
|
||||
{
|
||||
localBoxes.push_back(predBoxes[idx]);
|
||||
localConfidences.push_back(predConf[idx]);
|
||||
}
|
||||
std::vector<int> indices;
|
||||
NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
|
||||
classIds.resize(classIds.size() + indices.size(), it.first);
|
||||
NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices);
|
||||
for (int idx : indices)
|
||||
{
|
||||
boxes.push_back(localBoxes[idx]);
|
||||
confidences.push_back(localConfidences[idx]);
|
||||
boxes.push_back(predBoxes[idx]);
|
||||
confidences.push_back(predConfidences[idx]);
|
||||
classIds.push_back(predClassIds[idx]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::map<int, std::vector<size_t> > class2indices;
|
||||
for (size_t i = 0; i < predClassIds.size(); i++)
|
||||
{
|
||||
if (predConfidences[i] >= confThreshold)
|
||||
{
|
||||
class2indices[predClassIds[i]].push_back(i);
|
||||
}
|
||||
}
|
||||
for (const auto& it : class2indices)
|
||||
{
|
||||
std::vector<Rect> localBoxes;
|
||||
std::vector<float> localConfidences;
|
||||
for (size_t idx : it.second)
|
||||
{
|
||||
localBoxes.push_back(predBoxes[idx]);
|
||||
localConfidences.push_back(predConfidences[idx]);
|
||||
}
|
||||
std::vector<int> indices;
|
||||
NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
|
||||
classIds.resize(classIds.size() + indices.size(), it.first);
|
||||
for (int idx : indices)
|
||||
{
|
||||
boxes.push_back(localBoxes[idx]);
|
||||
confidences.push_back(localConfidences[idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -480,7 +538,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
|
||||
{
|
||||
boxes = std::move(predBoxes);
|
||||
classIds = std::move(predClassIds);
|
||||
confidences = std::move(predConf);
|
||||
confidences = std::move(predConfidences);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -25,7 +25,8 @@ public:
|
||||
double scoreDiff, double iouDiff,
|
||||
double confThreshold = 0.24, double nmsThreshold = 0.0,
|
||||
const Size& size = {-1, -1}, Scalar mean = Scalar(),
|
||||
double scale = 1.0, bool swapRB = false, bool crop = false)
|
||||
double scale = 1.0, bool swapRB = false, bool crop = false,
|
||||
bool nmsAcrossClasses = false)
|
||||
{
|
||||
checkBackend();
|
||||
|
||||
@ -38,6 +39,8 @@ public:
|
||||
model.setPreferableBackend(backend);
|
||||
model.setPreferableTarget(target);
|
||||
|
||||
model.setNmsAcrossClasses(nmsAcrossClasses);
|
||||
|
||||
std::vector<int> classIds;
|
||||
std::vector<float> confidences;
|
||||
std::vector<Rect> boxes;
|
||||
@ -177,6 +180,58 @@ TEST_P(Test_Model, DetectRegion)
|
||||
Scalar(), scale, swapRB);
|
||||
}
|
||||
|
||||
TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses)
|
||||
{
|
||||
applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB);
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
||||
#endif
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
if (target == DNN_TARGET_MYRIAD
|
||||
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
|
||||
#endif
|
||||
|
||||
std::vector<int> refClassIds = { 6, 11 };
|
||||
std::vector<float> refConfidences = { 0.750469f, 0.901615f };
|
||||
std::vector<Rect2d> refBoxes = { Rect2d(240, 53, 135, 72),
|
||||
Rect2d(58, 141, 117, 249) };
|
||||
|
||||
std::string img_path = _tf("dog416.png");
|
||||
std::string weights_file = _tf("yolo-voc.weights", false);
|
||||
std::string config_file = _tf("yolo-voc.cfg");
|
||||
|
||||
double scale = 1.0 / 255.0;
|
||||
Size size{ 416, 416 };
|
||||
bool swapRB = true;
|
||||
bool crop = false;
|
||||
bool nmsAcrossClasses = true;
|
||||
|
||||
double confThreshold = 0.24;
|
||||
double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15;
|
||||
double scoreDiff = 8e-5, iouDiff = 1e-5;
|
||||
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
|
||||
{
|
||||
scoreDiff = 1e-2;
|
||||
iouDiff = 1.6e-2;
|
||||
}
|
||||
|
||||
testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
|
||||
refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
|
||||
Scalar(), scale, swapRB, crop,
|
||||
nmsAcrossClasses);
|
||||
}
|
||||
|
||||
TEST_P(Test_Model, DetectionOutput)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
|
Loading…
Reference in New Issue
Block a user