diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 69b71f90ce..5467c989ac 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -1296,6 +1296,23 @@ CV__DNN_INLINE_NS_BEGIN
           */
          CV_WRAP DetectionModel(const Net& network);
 
+         CV_DEPRECATED_EXTERNAL  // avoid using in C++ code (need to fix bindings first)
+         DetectionModel();
+
+         /**
+          * @brief nmsAcrossClasses defaults to false,
+          * such that when non max suppression is used during the detect() function, it will do so per-class.
+          * This function allows you to toggle this behaviour.
+          * @param[in] value The new value for nmsAcrossClasses
+          */
+         CV_WRAP DetectionModel& setNmsAcrossClasses(bool value);
+
+         /**
+          * @brief Getter for nmsAcrossClasses. This variable defaults to false,
+          * such that when non max suppression is used during the detect() function, it will do so only per-class
+          */
+         CV_WRAP bool getNmsAcrossClasses();
+
          /** @brief Given the @p input frame, create input blob, run net and return result detections.
           *  @param[in]  frame  The input image.
           *  @param[out] classIds Class indexes in result detection.
diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp
index aefeaa42b3..16f7d31a25 100644
--- a/modules/dnn/src/model.cpp
+++ b/modules/dnn/src/model.cpp
@@ -320,34 +320,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask)
     }
 }
 
-void disableRegionNMS(Net& net)
+class DetectionModel_Impl : public Model::Impl
 {
-    for (String& name : net.getUnconnectedOutLayersNames())
+public:
+    virtual ~DetectionModel_Impl() {}
+    DetectionModel_Impl() : Impl() {}
+    DetectionModel_Impl(const DetectionModel_Impl&) = delete;
+    DetectionModel_Impl(DetectionModel_Impl&&) = delete;
+
+    void disableRegionNMS(Net& net)
     {
-        int layerId = net.getLayerId(name);
-        Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
-        if (!layer.empty())
+        for (String& name : net.getUnconnectedOutLayersNames())
         {
-            layer->nmsThreshold = 0;
+            int layerId = net.getLayerId(name);
+            Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
+            if (!layer.empty())
+            {
+                layer->nmsThreshold = 0;
+            }
         }
     }
-}
+
+    void setNmsAcrossClasses(bool value) {
+        nmsAcrossClasses = value;
+    }
+
+    bool getNmsAcrossClasses() {
+        return nmsAcrossClasses;
+    }
+
+private:
+    bool nmsAcrossClasses = false;
+};
 
 DetectionModel::DetectionModel(const String& model, const String& config)
-    : Model(model, config)
+    : DetectionModel(readNet(model, config))
 {
-    disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
+    // nothing
 }
 
-DetectionModel::DetectionModel(const Net& network) : Model(network)
+DetectionModel::DetectionModel(const Net& network) : Model()
 {
-    disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
+    impl = makePtr<DetectionModel_Impl>();
+    impl->initNet(network);
+    impl.dynamicCast<DetectionModel_Impl>()->disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
+}
+
+DetectionModel::DetectionModel() : Model()
+{
+    // nothing
+}
+
+DetectionModel& DetectionModel::setNmsAcrossClasses(bool value)
+{
+    CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
+
+    impl.dynamicCast<DetectionModel_Impl>()->setNmsAcrossClasses(value);
+    return *this;
+}
+
+bool DetectionModel::getNmsAcrossClasses()
+{
+    CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
+
+    return impl.dynamicCast<DetectionModel_Impl>()->getNmsAcrossClasses();
 }
 
 void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
                             CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes,
                             float confThreshold, float nmsThreshold)
 {
+    CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
+
     std::vector<Mat> detections;
     impl->processFrame(frame, detections);
 
@@ -413,7 +457,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
     {
         std::vector<int> predClassIds;
         std::vector<Rect> predBoxes;
-        std::vector<float> predConf;
+        std::vector<float> predConfidences;
         for (int i = 0; i < detections.size(); ++i)
         {
             // Network produces output blob with a shape NxC where N is a number of
@@ -442,37 +486,51 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
                 height   = std::max(1, std::min(height, frameHeight - top));
 
                 predClassIds.push_back(classIdPoint.x);
-                predConf.push_back(static_cast<float>(conf));
+                predConfidences.push_back(static_cast<float>(conf));
                 predBoxes.emplace_back(left, top, width, height);
             }
         }
 
         if (nmsThreshold)
         {
-            std::map<int, std::vector<size_t> > class2indices;
-            for (size_t i = 0; i < predClassIds.size(); i++)
+            if (getNmsAcrossClasses())
             {
-                if (predConf[i] >= confThreshold)
-                {
-                    class2indices[predClassIds[i]].push_back(i);
-                }
-            }
-            for (const auto& it : class2indices)
-            {
-                std::vector<Rect> localBoxes;
-                std::vector<float> localConfidences;
-                for (size_t idx : it.second)
-                {
-                    localBoxes.push_back(predBoxes[idx]);
-                    localConfidences.push_back(predConf[idx]);
-                }
                 std::vector<int> indices;
-                NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
-                classIds.resize(classIds.size() + indices.size(), it.first);
+                NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices);
                 for (int idx : indices)
                 {
-                    boxes.push_back(localBoxes[idx]);
-                    confidences.push_back(localConfidences[idx]);
+                    boxes.push_back(predBoxes[idx]);
+                    confidences.push_back(predConfidences[idx]);
+                    classIds.push_back(predClassIds[idx]);
+                }
+            }
+            else
+            {
+                std::map<int, std::vector<size_t> > class2indices;
+                for (size_t i = 0; i < predClassIds.size(); i++)
+                {
+                    if (predConfidences[i] >= confThreshold)
+                    {
+                        class2indices[predClassIds[i]].push_back(i);
+                    }
+                }
+                for (const auto& it : class2indices)
+                {
+                    std::vector<Rect> localBoxes;
+                    std::vector<float> localConfidences;
+                    for (size_t idx : it.second)
+                    {
+                        localBoxes.push_back(predBoxes[idx]);
+                        localConfidences.push_back(predConfidences[idx]);
+                    }
+                    std::vector<int> indices;
+                    NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
+                    classIds.resize(classIds.size() + indices.size(), it.first);
+                    for (int idx : indices)
+                    {
+                        boxes.push_back(localBoxes[idx]);
+                        confidences.push_back(localConfidences[idx]);
+                    }
                 }
             }
         }
@@ -480,7 +538,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
         {
             boxes       = std::move(predBoxes);
             classIds    = std::move(predClassIds);
-            confidences = std::move(predConf);
+            confidences = std::move(predConfidences);
         }
     }
     else
diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp
index 7d516de73e..58a881488a 100644
--- a/modules/dnn/test/test_model.cpp
+++ b/modules/dnn/test/test_model.cpp
@@ -25,7 +25,8 @@ public:
                          double scoreDiff, double iouDiff,
                          double confThreshold = 0.24, double nmsThreshold = 0.0,
                          const Size& size = {-1, -1}, Scalar mean = Scalar(),
-                         double scale = 1.0, bool swapRB = false, bool crop = false)
+                         double scale = 1.0, bool swapRB = false, bool crop = false,
+                         bool nmsAcrossClasses = false)
     {
         checkBackend();
 
@@ -38,6 +39,8 @@ public:
         model.setPreferableBackend(backend);
         model.setPreferableTarget(target);
 
+        model.setNmsAcrossClasses(nmsAcrossClasses);
+
         std::vector<int> classIds;
         std::vector<float> confidences;
         std::vector<Rect> boxes;
@@ -177,6 +180,58 @@ TEST_P(Test_Model, DetectRegion)
                     Scalar(), scale, swapRB);
 }
 
+TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses)
+{
+    applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB);
+
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000)  // nGraph compilation failure
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+#endif
+
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
+#endif
+
+#if defined(INF_ENGINE_RELEASE)
+    if (target == DNN_TARGET_MYRIAD
+        && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
+#endif
+
+    std::vector<int> refClassIds = { 6, 11 };
+    std::vector<float> refConfidences = { 0.750469f, 0.901615f };
+    std::vector<Rect2d> refBoxes = { Rect2d(240, 53, 135, 72),
+                                    Rect2d(58, 141, 117, 249) };
+
+    std::string img_path = _tf("dog416.png");
+    std::string weights_file = _tf("yolo-voc.weights", false);
+    std::string config_file = _tf("yolo-voc.cfg");
+
+    double scale = 1.0 / 255.0;
+    Size size{ 416, 416 };
+    bool swapRB = true;
+    bool crop = false;
+    bool nmsAcrossClasses = true;
+
+    double confThreshold = 0.24;
+    double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15;
+    double scoreDiff = 8e-5, iouDiff = 1e-5;
+    if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
+    {
+        scoreDiff = 1e-2;
+        iouDiff = 1.6e-2;
+    }
+
+    testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
+        refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
+        Scalar(), scale, swapRB, crop,
+        nmsAcrossClasses);
+}
+
 TEST_P(Test_Model, DetectionOutput)
 {
 #if defined(INF_ENGINE_RELEASE)