diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index 037c7fb5ba..bcb8a3e203 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -119,12 +119,12 @@ if(CV_GCC OR CV_CLANG)
     # we want.
     add_extra_compiler_option(-Wall)
   endif()
-  add_extra_compiler_option(-Werror=return-type)
-  add_extra_compiler_option(-Werror=non-virtual-dtor)
-  add_extra_compiler_option(-Werror=address)
-  add_extra_compiler_option(-Werror=sequence-point)
+  add_extra_compiler_option(-Wreturn-type)
+  add_extra_compiler_option(-Wnon-virtual-dtor)
+  add_extra_compiler_option(-Waddress)
+  add_extra_compiler_option(-Wsequence-point)
   add_extra_compiler_option(-Wformat)
-  add_extra_compiler_option(-Werror=format-security -Wformat)
+  add_extra_compiler_option(-Wformat-security -Wformat)
   add_extra_compiler_option(-Wmissing-declarations)
   add_extra_compiler_option(-Wmissing-prototypes)
   add_extra_compiler_option(-Wstrict-prototypes)
@@ -367,6 +367,22 @@ if(NOT OPENCV_SKIP_LINK_AS_NEEDED)
   endif()
 endif()
 
+# Apply "-Wl,--no-undefined" linker flags: https://github.com/opencv/opencv/pull/21347
+if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)
+  if(UNIX AND (NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2"))
+    set(_option "-Wl,--no-undefined")
+    set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}")  # requires CMake 3.2+ and CMP0056
+    ocv_check_compiler_flag(CXX "" HAVE_LINK_NO_UNDEFINED)
+    set(CMAKE_EXE_LINKER_FLAGS "${_saved_CMAKE_EXE_LINKER_FLAGS}")
+    if(HAVE_LINK_NO_UNDEFINED)
+      set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS} ${_option}")
+      set(OPENCV_EXTRA_SHARED_LINKER_FLAGS "${OPENCV_EXTRA_SHARED_LINKER_FLAGS} ${_option}")
+      set(OPENCV_EXTRA_MODULE_LINKER_FLAGS "${OPENCV_EXTRA_MODULE_LINKER_FLAGS} ${_option}")
+    endif()
+  endif()
+endif()
+
 # combine all "extra" options
 if(NOT OPENCV_SKIP_EXTRA_COMPILER_FLAGS)
   set(CMAKE_C_FLAGS           "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
diff --git a/modules/core/include/opencv2/core/core_c.h b/modules/core/include/opencv2/core/core_c.h
index 09ac1e789a..7b686b86f3 100644
--- a/modules/core/include/opencv2/core/core_c.h
+++ b/modules/core/include/opencv2/core/core_c.h
@@ -48,16 +48,19 @@
 #include "opencv2/core/types_c.h"
 
 #ifdef __cplusplus
-#  ifdef _MSC_VER
-/* disable warning C4190: 'function' has C-linkage specified, but returns UDT 'typename'
-                          which is incompatible with C
+/* disable MSVC warning C4190 / clang-cl -Wreturn-type-c-linkage:
+       'function' has C-linkage specified, but returns UDT 'typename'
+       which is incompatible with C
 
    It is OK to disable it because we only extend few plain structures with
    C++ constructors for simpler interoperability with C++ API of the library
 */
-#    pragma warning(disable:4190)
-#  elif defined __clang__ && __clang_major__ >= 3
+#  if defined(__clang__)
+     // handle clang on Linux and clang-cl (i. e. clang on Windows) first
 #    pragma GCC diagnostic ignored "-Wreturn-type-c-linkage"
+#  elif defined(_MSC_VER)
+     // then handle MSVC
+#    pragma warning(disable:4190)
 #  endif
 #endif
 
diff --git a/modules/core/include/opencv2/core/utils/fp_control.private.hpp b/modules/core/include/opencv2/core/utils/fp_control.private.hpp
new file mode 100644
index 0000000000..12ee363dd8
--- /dev/null
+++ b/modules/core/include/opencv2/core/utils/fp_control.private.hpp
@@ -0,0 +1,29 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
+#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
+
+#include "fp_control_utils.hpp"
+
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0
+  // disabled
+#elif defined(OPENCV_IMPL_FP_HINTS)
+  // custom
+#elif defined(OPENCV_IMPL_FP_HINTS_X86)
+  // custom
+#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+  #include <xmmintrin.h>
+  #define OPENCV_IMPL_FP_HINTS_X86 1
+  #define OPENCV_IMPL_FP_HINTS 1
+#endif
+
+#ifndef OPENCV_IMPL_FP_HINTS
+#define OPENCV_IMPL_FP_HINTS 0
+#endif
+#ifndef OPENCV_IMPL_FP_HINTS_X86
+#define OPENCV_IMPL_FP_HINTS_X86 0
+#endif
+
+#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
diff --git a/modules/core/include/opencv2/core/utils/fp_control_utils.hpp b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp
new file mode 100644
index 0000000000..930bc5d367
--- /dev/null
+++ b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp
@@ -0,0 +1,69 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP
+#define OPENCV_CORE_FP_CONTROL_UTILS_HPP
+
+namespace cv {
+
+namespace details {
+
+struct FPDenormalsModeState
+{
+    uint32_t reserved[16];  // 64-bytes
+};  // FPDenormalsModeState
+
+CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state);
+CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state);
+CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state);
+
+class FPDenormalsIgnoreHintScope
+{
+public:
+    inline explicit FPDenormalsIgnoreHintScope(bool ignore = true)
+    {
+        details::setFPDenormalsIgnoreHint(ignore, saved_state);
+    }
+
+    inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state)
+    {
+        details::saveFPDenormalsState(saved_state);
+        details::restoreFPDenormalsState(state);
+    }
+
+    inline ~FPDenormalsIgnoreHintScope()
+    {
+        details::restoreFPDenormalsState(saved_state);
+    }
+
+protected:
+    FPDenormalsModeState saved_state;
+};  // FPDenormalsIgnoreHintScope
+
+class FPDenormalsIgnoreHintScopeNOOP
+{
+public:
+    inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); }
+    inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); }
+    inline ~FPDenormalsIgnoreHintScopeNOOP() { }
+};  // FPDenormalsIgnoreHintScopeNOOP
+
+}  // namespace details
+
+
+// Should depend on target compilation architecture only
+// Note: previously added archs should NOT be removed to preserve ABI compatibility
+#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT)
+  // preserve configuration overloading through ports
+#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_)
+typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope;
+#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1
+#else
+#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0
+typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope;
+#endif
+
+}  // namespace cv
+
+#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP
diff --git a/modules/core/src/hal_internal.cpp b/modules/core/src/hal_internal.cpp
index cbe02780d2..8b74a35361 100644
--- a/modules/core/src/hal_internal.cpp
+++ b/modules/core/src/hal_internal.cpp
@@ -239,6 +239,21 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype
     else if(typeid(fptype) == typeid(double))
         OCV_LAPACK_FUNC(dgesdd)(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)buffer, &lwork, iworkBuf, info);
 
+#if defined(__clang__) && defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+    // Make sure MSAN sees the memory as having been written.
+    // MSAN does not think it has been written because a different language was called.
+    __msan_unpoison(a, a_step * n);
+    __msan_unpoison(buffer, sizeof(fptype) * (lwork + 1));
+    if (u)
+      __msan_unpoison(u, u_step * m);
+    if (vt)
+      __msan_unpoison(vt, v_step * n);
+    if (w)
+      __msan_unpoison(w, sizeof(fptype) * std::min(m, n));
+#endif  // __has_feature(memory_sanitizer)
+#endif  // defined(__clang__) && defined(__has_feature)
+
     if(!(flags & CV_HAL_SVD_NO_UV))
         transpose_square_inplace(vt, ldv, n);
 
diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp
index fe64d5d7c9..763b8a8d6d 100644
--- a/modules/core/src/parallel.cpp
+++ b/modules/core/src/parallel.cpp
@@ -153,6 +153,9 @@
 
 #include "opencv2/core/detail/exception_ptr.hpp"  // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+#include <opencv2/core/utils/fp_control.private.hpp>
+
 using namespace cv;
 
 namespace cv {
@@ -203,6 +206,9 @@ namespace {
 
             // propagate main thread state
             rng = cv::theRNG();
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
+            details::saveFPDenormalsState(fp_denormals_base_state);
+#endif
 
 #ifdef OPENCV_TRACE
             traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
@@ -283,6 +289,11 @@ namespace {
                 }
             }
         }
+
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
+        details::FPDenormalsModeState fp_denormals_base_state;
+#endif
+
     private:
         ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
         ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
@@ -319,6 +330,9 @@ namespace {
 
             // propagate main thread state
             cv::theRNG() = ctx.rng;
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
+            FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state);
+#endif
 
             cv::Range r;
             cv::Range wholeRange = ctx.wholeRange;
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index 3923f13b63..ebafee59e0 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -55,6 +55,9 @@
 
 #include <opencv2/core/utils/filesystem.private.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+#include <opencv2/core/utils/fp_control.private.hpp>
+
 #ifndef OPENCV_WITH_THREAD_SANITIZER
   #if defined(__clang__) && defined(__has_feature)
   #if __has_feature(thread_sanitizer)
@@ -630,7 +633,7 @@ struct HWFeatures
             }
         }
     #elif (defined __ppc64__ || defined __PPC64__) && defined __FreeBSD__
-        unsigned int hwcap = 0;
+        unsigned long hwcap = 0;
         elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
         if (hwcap & PPC_FEATURE_HAS_VSX) {
             elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
@@ -2772,6 +2775,82 @@ void setUseIPP_NotExact(bool flag)
 
 } // namespace ipp
 
+
+namespace details {
+
+#if OPENCV_IMPL_FP_HINTS_X86
+#ifndef _MM_DENORMALS_ZERO_ON  // requires pmmintrin.h (SSE3)
+#define _MM_DENORMALS_ZERO_ON 0x0040
+#endif
+#ifndef _MM_DENORMALS_ZERO_MASK  // requires pmmintrin.h (SSE3)
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#endif
+#endif
+
+void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state)
+{
+#if OPENCV_IMPL_FP_HINTS_X86
+    unsigned mask = _MM_FLUSH_ZERO_MASK;
+    unsigned value = ignore ? _MM_FLUSH_ZERO_ON : 0;
+    if (featuresEnabled.have[CPU_SSE3])
+    {
+        mask |= _MM_DENORMALS_ZERO_MASK;
+        value |= ignore ? _MM_DENORMALS_ZERO_ON : 0;
+    }
+    const unsigned old_flags = _mm_getcsr();
+    const unsigned old_value = old_flags & mask;
+    unsigned flags = (old_flags & ~mask) | value;
+    CV_LOG_DEBUG(NULL, "core: update FP mxcsr flags = " << cv::format("0x%08x", flags));
+    // save state
+    state.reserved[0] = (uint32_t)mask;
+    state.reserved[1] = (uint32_t)old_value;
+    _mm_setcsr(flags);
+#else
+    CV_UNUSED(ignore); CV_UNUSED(state);
+#endif
+}
+
+int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state)
+{
+#if OPENCV_IMPL_FP_HINTS_X86
+    unsigned mask = _MM_FLUSH_ZERO_MASK;
+    if (featuresEnabled.have[CPU_SSE3])
+    {
+        mask |= _MM_DENORMALS_ZERO_MASK;
+    }
+    const unsigned old_flags = _mm_getcsr();
+    const unsigned old_value = old_flags & mask;
+    // save state
+    state.reserved[0] = (uint32_t)mask;
+    state.reserved[1] = (uint32_t)old_value;
+    return 2;
+#else
+    CV_UNUSED(state);
+    return 0;
+#endif
+}
+
+bool restoreFPDenormalsState(const FPDenormalsModeState& state)
+{
+#if OPENCV_IMPL_FP_HINTS_X86
+    const unsigned mask = (unsigned)state.reserved[0];
+    CV_DbgAssert(mask != 0); // invalid state (ensure that state is properly saved earlier)
+    const unsigned value = (unsigned)state.reserved[1];
+    CV_DbgCheck((int)value, value == (value & mask), "invalid SSE FP state");
+    const unsigned old_flags = _mm_getcsr();
+    unsigned flags = (old_flags & ~mask) | value;
+    CV_LOG_DEBUG(NULL, "core: restore FP mxcsr flags = " << cv::format("0x%08x", flags));
+    _mm_setcsr(flags);
+    return true;
+#else
+    CV_UNUSED(state);
+    return false;
+#endif
+}
+
+}  // namespace details
+
+
 } // namespace cv
 
 /* End of file. */
diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp
index d9df475fa6..8ed0afe771 100644
--- a/modules/core/test/test_misc.cpp
+++ b/modules/core/test/test_misc.cpp
@@ -4,6 +4,15 @@
 #include "test_precomp.hpp"
 #include <cmath>
 
+#include "opencv2/core/utils/logger.hpp"
+
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
+#ifdef CV_CXX11
+#include <chrono>
+#include <thread>
+#endif
+
 namespace opencv_test { namespace {
 
 TEST(Core_OutputArrayCreate, _1997)
@@ -243,6 +252,62 @@ TEST(Core_Parallel, propagate_exceptions)
     }, cv::Exception);
 }
 
+class FPDenormalsHintCheckerParallelLoopBody : public cv::ParallelLoopBody
+{
+public:
+    FPDenormalsHintCheckerParallelLoopBody()
+        : isOK(true)
+    {
+        state_values_to_check = cv::details::saveFPDenormalsState(base_state);
+    }
+    ~FPDenormalsHintCheckerParallelLoopBody() {}
+    void operator()(const cv::Range& r) const
+    {
+        CV_UNUSED(r);
+        cv::details::FPDenormalsModeState state;
+        if (cv::details::saveFPDenormalsState(state))
+        {
+            for (int i = 0; i < state_values_to_check; ++i)
+            {
+                if (base_state.reserved[i] != state.reserved[i])
+                {
+                    CV_LOG_ERROR(NULL, cv::format("FP state[%d] mismatch: base=0x%08x thread=0x%08x", i, base_state.reserved[i], state.reserved[i]));
+                    isOK = false;
+                    cv::details::restoreFPDenormalsState(base_state);
+                }
+            }
+        }
+        else
+        {
+            // FP state is not supported
+            // no checks
+        }
+#ifdef CV_CXX11
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+#endif
+    }
+
+    cv::details::FPDenormalsModeState base_state;
+    int state_values_to_check;
+
+    mutable bool isOK;
+};
+
+TEST(Core_Parallel, propagate_fp_denormals_ignore_hint)
+{
+    int nThreads = std::max(1, cv::getNumThreads()) * 3;
+    for (int i = 0; i < 4; ++i)
+    {
+        SCOPED_TRACE(cv::format("Case=%d: FP denormals ignore hint: %s\n", i, ((i & 1) != 0) ? "enable" : "disable"));
+        FPDenormalsIgnoreHintScope fp_denormals_scope((i & 1) != 0);
+        FPDenormalsHintCheckerParallelLoopBody job;
+        ASSERT_NO_THROW({
+            parallel_for_(cv::Range(0, nThreads), job);
+        });
+        EXPECT_TRUE(job.isOK);
+    }
+}
+
 TEST(Core_Version, consistency)
 {
     // this test verifies that OpenCV version loaded in runtime
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 8b2cba9ce1..e5e8186fa8 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -542,6 +542,18 @@ CV__DNN_INLINE_NS_BEGIN
          */
         void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
 
+        /** @brief Registers network output with name
+         *
+         *  Function may create additional 'Identity' layer.
+         *
+         *  @param outputName identifier of the output
+         *  @param layerId identifier of the second layer
+         *  @param outputPort number of the second layer input
+         *
+         *  @returns index of bound layer (the same as layerId or newly created)
+         */
+        int registerOutput(const std::string& outputName, int layerId, int outputPort);
+
         /** @brief Sets outputs names of the network input pseudo layer.
          *
          * Each net always has special own the network input pseudo layer with id=0.
@@ -685,10 +697,14 @@ CV__DNN_INLINE_NS_BEGIN
         CV_WRAP inline Mat getParam(const String& layerName, int numParam = 0) const { return getParam(getLayerId(layerName), numParam); }
 
         /** @brief Returns indexes of layers with unconnected outputs.
+         *
+         * FIXIT: Rework API to registerOutput() approach, deprecate this call
          */
         CV_WRAP std::vector<int> getUnconnectedOutLayers() const;
 
         /** @brief Returns names of layers with unconnected outputs.
+         *
+         * FIXIT: Rework API to registerOutput() approach, deprecate this call
          */
         CV_WRAP std::vector<String> getUnconnectedOutLayersNames() const;
 
diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp
index 7fb64c7c0d..a8d2f28ca6 100644
--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
@@ -53,6 +53,8 @@
 #include "caffe_io.hpp"
 #endif
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 namespace cv {
 namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@@ -88,6 +90,8 @@ MatShape parseBlobShape(const caffe::BlobShape& _input_shape)
 
 class CaffeImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     caffe::NetParameter net;
     caffe::NetParameter netBinary;
 
diff --git a/modules/dnn/src/darknet/darknet_importer.cpp b/modules/dnn/src/darknet/darknet_importer.cpp
index f1269bd979..b5767af405 100644
--- a/modules/dnn/src/darknet/darknet_importer.cpp
+++ b/modules/dnn/src/darknet/darknet_importer.cpp
@@ -51,6 +51,7 @@
 
 #include "darknet_io.hpp"
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
 
 namespace cv {
 namespace dnn {
@@ -61,6 +62,8 @@ namespace
 
 class DarknetImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     darknet::NetParameter net;
 
 public:
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index d79083a45c..4f7887e121 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -66,6 +66,8 @@
 #include <opencv2/imgproc.hpp>
 #include <opencv2/dnn/layer_reg.private.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/logger.hpp>
 
@@ -1214,6 +1216,7 @@ struct Net::Impl : public detail::NetImplBase
     std::vector<LayerPin> blobsToKeep;
     MapIdToLayerData layers;
     std::map<String, int> layerNameToId;
+    std::map<std::string, int> outputNameToId;  // use registerOutput() to populate outputs
     BlobManager blobManager;
     int preferableBackend;
     int preferableTarget;
@@ -1632,6 +1635,38 @@ struct Net::Impl : public detail::NetImplBase
         return pins;
     }
 
+    // FIXIT remove dtype
+    int addLayer(const String &name, const String &type, const int &dtype, LayerParams &params)
+    {
+        int id = getLayerId(name);
+        if (id >= 0)
+        {
+            if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented")
+            {
+                CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
+                return -1;
+            }
+            else
+            {
+                LayerData& ld = layers.find(id)->second;
+                ld.type = type;
+                ld.params = params;
+                return -1;
+            }
+        }
+
+        id = ++lastLayerId;
+        layerNameToId.insert(std::make_pair(name, id));
+        layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params)));
+        if (params.get<bool>("has_dynamic_shapes", false))
+            hasDynamicShapes = true;
+
+        if (dtype == CV_8S)
+            netWasQuantized = true;
+
+        return id;
+    }
+
     void connect(int outLayerId, int outNum, int inLayerId, int inNum)
     {
         CV_Assert(outLayerId < inLayerId);
@@ -1641,6 +1676,40 @@ struct Net::Impl : public detail::NetImplBase
         addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
         ldOut.requiredOutputs.insert(outNum);
         ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
+
+        CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")");
+    }
+
+    int registerOutput(const std::string& outputName, int layerId, int outputPort)
+    {
+        int checkLayerId = getLayerId(outputName);
+        if (checkLayerId >= 0)
+        {
+            if (checkLayerId == layerId)
+            {
+                if (outputPort == 0)
+                {
+                    // layer name correlates with its output name
+                    CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked");
+                    outputNameToId.insert(std::make_pair(outputName, layerId));
+                    return checkLayerId;
+                }
+            }
+            CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort));
+        }
+#if 0  // TODO
+        if (outputPort == 0)
+            // make alias only, need to adopt getUnconnectedOutLayers() call
+#endif
+        LayerParams outputLayerParams;
+        outputLayerParams.name = outputName;
+        outputLayerParams.type = "Identity";
+        int dtype = CV_32F;  // FIXIT remove
+        int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams);
+        connect(layerId, outputPort, outputLayerId, 0);
+        CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort);
+        outputNameToId.insert(std::make_pair(outputName, outputLayerId));
+        return outputLayerId;
     }
 
     void initBackend(const std::vector<LayerPin>& blobsToKeep_)
@@ -4324,6 +4393,9 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
     CV_UNUSED(xml); CV_UNUSED(bin);
     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else
+
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
     InferenceEngine::CNNNetReader reader;
     reader.ReadNetwork(xml);
@@ -4360,6 +4432,8 @@ Net Net::readFromModelOptimizer(
     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else
 
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
     InferenceEngine::CNNNetReader reader;
 
@@ -4410,34 +4484,8 @@ Net::~Net()
 int Net::addLayer(const String &name, const String &type, const int &dtype, LayerParams &params)
 {
     CV_TRACE_FUNCTION();
-
-    int id = impl->getLayerId(name);
-    if (id >= 0)
-    {
-        if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented")
-        {
-            CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
-            return -1;
-        }
-        else
-        {
-            LayerData& ld = impl->layers.find(id)->second;
-            ld.type = type;
-            ld.params = params;
-            return -1;
-        }
-    }
-
-    id = ++impl->lastLayerId;
-    impl->layerNameToId.insert(std::make_pair(name, id));
-    impl->layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params)));
-    if (params.get<bool>("has_dynamic_shapes", false))
-        impl->hasDynamicShapes = true;
-
-    if (dtype == CV_8S)
-        impl->netWasQuantized = true;
-
-    return id;
+    CV_Assert(impl);
+    return impl->addLayer(name, type, dtype, params);
 }
 
 int Net::addLayer(const String &name, const String &type, LayerParams &params)
@@ -4481,10 +4529,18 @@ void Net::connect(String _outPin, String _inPin)
     impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
 }
 
+int Net::registerOutput(const std::string& outputName, int layerId, int outputPort)
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(impl);
+    return impl->registerOutput(outputName, layerId, outputPort);
+}
+
 Mat Net::forward(const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     String layerName = outputName;
 
@@ -4506,6 +4562,7 @@ AsyncArray Net::forwardAsync(const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
 #ifdef CV_CXX11
     String layerName = outputName;
@@ -4537,6 +4594,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     String layerName = outputName;
 
@@ -4618,6 +4676,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs,
                   const std::vector<String>& outBlobNames)
 {
     CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     std::vector<LayerPin> pins;
     for (int i = 0; i < outBlobNames.size(); i++)
@@ -4645,6 +4704,7 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
                      const std::vector<String>& outBlobNames)
 {
     CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     std::vector<LayerPin> pins;
     for (int i = 0; i < outBlobNames.size(); i++)
@@ -5006,6 +5066,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
 {
     CV_TRACE_FUNCTION();
     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     LayerPin pin;
     pin.lid = 0;
@@ -5441,8 +5502,22 @@ bool Net::empty() const
 
 std::vector<int> Net::getUnconnectedOutLayers() const
 {
+    CV_TRACE_FUNCTION();
+    CV_Assert(impl);
+
     std::vector<int> layersIds;
 
+    // registerOutput() flow
+    const std::map<std::string, int>& outputNameToId = impl->outputNameToId;
+    if (!outputNameToId.empty())
+    {
+        for (std::map<std::string, int>::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it)
+        {
+            layersIds.push_back(it->second);
+        }
+        return layersIds;
+    }
+
     Impl::MapIdToLayerData::const_iterator it;
     for (it = impl->layers.begin(); it != impl->layers.end(); it++)
     {
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 194397f557..df673fe864 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -1996,13 +1996,6 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-#if CV_SSE3
-        uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE();
-        uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE();
-        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
-        _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
-#endif
-
         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
@@ -2139,10 +2132,6 @@ public:
             ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
                             kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
         }
-#if CV_SSE3
-        _MM_SET_FLUSH_ZERO_MODE(ftzMode);
-        _MM_SET_DENORMALS_ZERO_MODE(dazMode);
-#endif
     }
 
 #ifdef HAVE_CUDA
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 73f394a799..f9fda41112 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -10,6 +10,8 @@
 
 #include <opencv2/dnn/layer_reg.private.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #undef CV_LOG_STRIP_LEVEL
 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
@@ -52,6 +54,8 @@ class ONNXLayerHandler;
 
 class ONNXImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     opencv_onnx::ModelProto model_proto;
     struct LayerInfo {
         int layerId;
@@ -96,6 +100,7 @@ protected:
 
     std::map<std::string, LayerInfo> layer_id;
     typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;
+    typedef std::map<std::string, LayerInfo>::const_iterator ConstIterLayerId_t;
 
     void handleNode(const opencv_onnx::NodeProto& node_proto);
 
@@ -552,7 +557,11 @@ void ONNXImporter::addLayer(LayerParams& layerParams,
     int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams);
     for (int i = 0; i < node_proto.output_size(); ++i)
     {
-        layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i)));
+        const std::string& output_name = node_proto.output(i);
+        if (!output_name.empty())
+        {
+            layer_id.insert(std::make_pair(output_name, LayerInfo(id, i)));
+        }
     }
 
     std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
@@ -575,7 +584,11 @@ void ONNXImporter::addLayer(LayerParams& layerParams,
     layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
     for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
     {
-        outShapes[node_proto.output(i)] = layerOutShapes[i];
+        const std::string& output_name = node_proto.output(i);
+        if (!output_name.empty())
+        {
+            outShapes[node_proto.output(i)] = layerOutShapes[i];
+        }
     }
 }
 
@@ -839,6 +852,27 @@ void ONNXImporter::populateNet()
         handleNode(node_proto);
     }
 
+    // register outputs
+    for (int i = 0; i < graph_proto.output_size(); ++i)
+    {
+        const std::string& output_name = graph_proto.output(i).name();
+        if (output_name.empty())
+        {
+            CV_LOG_ERROR(NULL, "DNN/ONNX: can't register output without name: " << i);
+            continue;
+        }
+        ConstIterLayerId_t layerIt = layer_id.find(output_name);
+        if (layerIt == layer_id.end())
+        {
+            CV_LOG_ERROR(NULL, "DNN/ONNX: can't find layer for output name: '" << output_name << "'. Does model imported properly?");
+            continue;
+        }
+
+        const LayerInfo& li = layerIt->second;
+        int outputId = dstNet.registerOutput(output_name, li.layerId, li.outputId); CV_UNUSED(outputId);
+        // no need to duplicate message from engine: CV_LOG_DEBUG(NULL, "DNN/ONNX: registered output='" << output_name << "' with id=" << outputId);
+    }
+
     CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!"));
 }
 
@@ -865,10 +899,30 @@ const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx:
     return it->second;
 }
 
+const std::string& extractNodeName(const opencv_onnx::NodeProto& node_proto)
+{
+    if (node_proto.has_name() && !node_proto.name().empty())
+    {
+        return node_proto.name();
+    }
+    for (int i = 0; i < node_proto.output_size(); ++i)
+    {
+        const std::string& name = node_proto.output(i);
+        // There are two ways to leave an optional input or output unspecified:
+        // the first, available only for trailing inputs and outputs, is to simply not provide that input;
+        // the second method is to use an empty string in place of an input or output name.
+        if (!name.empty())
+        {
+            return name;
+        }
+    }
+    CV_Error(Error::StsAssert, "Couldn't deduce Node name.");
+}
+
 void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
 {
     CV_Assert(node_proto.output_size() >= 1);
-    const std::string& name = node_proto.output(0);
+    const std::string& name = extractNodeName(node_proto);
     const std::string& layer_type = node_proto.op_type();
     const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
     const auto& dispatch = getDispatchMap(node_proto);
@@ -1037,6 +1091,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
     const std::string& layer_type = node_proto.op_type();
+    const std::string output_name = node_proto.output(0);
 
     CV_Assert(node_proto.input_size() == 1);
     layerParams.type = "Pooling";
@@ -1157,7 +1212,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node
         layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
 
         node_proto.set_input(0, node_proto.output(0));
-        node_proto.set_output(0, layerParams.name);
+        node_proto.set_output(0, output_name);
     }
     else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
     {
@@ -1190,7 +1245,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node
         layerParams.set("dim", DictValue::arrayInt(targetShape.data(), targetShape.size()));
 
         node_proto.set_input(0, node_proto.output(0));
-        node_proto.set_output(0, layerParams.name);
+        node_proto.set_output(0, output_name);
     }
     addLayer(layerParams, node_proto);
 }
@@ -1281,7 +1336,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP
                 {
                     Mat flipped;
                     flip(inp, flipped, 0);
-                    addConstant(layerParams.name, flipped);
+                    addConstant(node_proto.output(0), flipped);
                     return;
                 }
             }
@@ -1301,7 +1356,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP
         inputs.push_back(inp);
         runLayer(layerParams, inputs, sliced);
         CV_Assert(sliced.size() == 1);
-        addConstant(layerParams.name, sliced[0]);
+        addConstant(node_proto.output(0), sliced[0]);
         return;
     }
     addLayer(layerParams, node_proto);
@@ -1366,7 +1421,7 @@ void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodePr
         Mat blob_1 = getBlob(node_proto, 1);
         CV_Assert(blob_0.size == blob_1.size);
         Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1);
-        addConstant(layerParams.name, output);
+        addConstant(node_proto.output(0), output);
         return;
     }
     else if (is_const_0 || is_const_1)
@@ -1482,12 +1537,13 @@ void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::No
 {
     CV_Assert(node_proto.input_size() == 0);
     CV_Assert(layerParams.blobs.size() == 1);
-    addConstant(layerParams.name, layerParams.blobs[0]);
+    addConstant(node_proto.output(0), layerParams.blobs[0]);
 }
 
 void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
+    const std::string output_name = node_proto.output(0);
     LayerParams lstmParams = layerParams;
     lstmParams.name += "/lstm";
 
@@ -1579,13 +1635,14 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr
     layerParams.type = "Reshape";
     layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size()));
     node_proto.set_input(0, lstmParams.name);  // redirect input to LSTM
-    node_proto.set_output(0, layerParams.name);  // keep origin LSTM's name
+    node_proto.set_output(0, output_name);  // keep origin LSTM's name
     addLayer(layerParams, node_proto);
 }
 
 void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
+    const std::string output_name = node_proto.output(0);
     LayerParams gruParams = layerParams;
     gruParams.name += "/gru";
 
@@ -1619,7 +1676,7 @@ void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodePro
     layerParams.type = "Reshape";
     layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size()));
     node_proto.set_input(0, gruParams.name);  // redirect input to GRU
-    node_proto.set_output(0, layerParams.name);  // keep origin GRU's name
+    node_proto.set_output(0, output_name);  // keep origin GRU's name
     addLayer(layerParams, node_proto);
 }
 
@@ -1893,6 +1950,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
     const std::string& layer_type = node_proto.op_type();
+    const std::string output_name = node_proto.output(0);
     CV_Assert(node_proto.input_size() == 2);
 
     bool isDiv = layer_type == "Div";
@@ -1977,7 +2035,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro
 
         if (inp0.dims == 1 && inp1.dims == 1)
             out.dims = 1;  // to workaround dims == 1
-        addConstant(layerParams.name, out);
+        addConstant(output_name, out);
         return;
     }
     else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
@@ -1993,7 +2051,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro
             opencv_onnx::NodeProto proto;
             proto.add_input(node_proto.input(1));
             proto.add_input(node_proto.input(0));
-            proto.add_output(layerParams.name);
+            proto.add_output(output_name);
             node_proto = proto;
         }
 
@@ -2172,7 +2230,7 @@ void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::N
         std::vector<Mat> inputs(1, getBlob(node_proto, 0)), transposed;
         runLayer(layerParams, inputs, transposed);
         CV_Assert(transposed.size() == 1);
-        addConstant(layerParams.name, transposed[0]);
+        addConstant(node_proto.output(0), transposed[0]);
         return;
     }
     addLayer(layerParams, node_proto);
@@ -2224,7 +2282,7 @@ void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::Nod
         Mat inp = getBlob(node_proto, 0);
         Mat out = inp.reshape(1, outShape);
         out.dims = outShape.size();  // to workaround dims == 1
-        addConstant(layerParams.name, out);
+        addConstant(node_proto.output(0), out);
         return;
     }
     int depth = layerParams.get<int>("depth", CV_32F);
@@ -2253,7 +2311,7 @@ void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::Nod
         }
 
         Mat output = input.reshape(1, 2, out_size);
-        addConstant(layerParams.name, output);
+        addConstant(node_proto.output(0), output);
         return;
     }
     IterShape_t shapeIt = outShapes.find(node_proto.input(0));
@@ -2325,7 +2383,7 @@ void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::N
         }
 
         Mat out = input.reshape(0, dims);
-        addConstant(layerParams.name, out);
+        addConstant(node_proto.output(0), out);
         return;
     }
 
@@ -2364,6 +2422,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
     CV_CheckEQ(node_proto.input_size(), 2, "");
     const std::string& input0 = node_proto.input(0);
     const std::string& input1 = node_proto.input(1);
+    const std::string output_name = node_proto.output(0);
     Mat newShapeMat = getBlob(input1);
     MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
 
@@ -2433,7 +2492,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
         input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
         Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
         output = output.reshape(0, targetShape);
-        addConstant(layerParams.name, output);
+        addConstant(output_name, output);
         return;
     }
 
@@ -2463,7 +2522,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
 
         layerParams.set("axis", broadcast_axes[0]);
         layerParams.type = "Concat";
-        node_proto.set_output(0, layerParams.name);
+        node_proto.set_output(0, output_name);
     }
     else if (broadcast_axes.empty())
     {
@@ -2489,7 +2548,7 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod
         if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
             std::vector<Mat> inputs(1, getBlob(node_proto, 0)), outputs;
             runLayer(layerParams, inputs, outputs);
-            addConstant(layerParams.name, outputs[0]);
+            addConstant(node_proto.output(0), outputs[0]);
             return;
         }
     }
@@ -2503,7 +2562,7 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod
         if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
             Mat input = getBlob(node_proto, 0);
             Mat out = input.reshape(0, dim);
-            addConstant(layerParams.name, out);
+            addConstant(node_proto.output(0), out);
             return;
         }
         replaceLayerParam(layerParams, "shape", "dim");
@@ -2559,7 +2618,7 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP
         // Disabled to pass face detector tests from #20422
         // CV_Assert(!isDynamicShape);  // not supported
     }
-    addConstant(layerParams.name, shapeMat);
+    addConstant(node_proto.output(0), shapeMat);
 }
 
 void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
@@ -2583,7 +2642,7 @@ void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodePr
         Mat dst;
         blob.convertTo(dst, type);
         dst.dims = blob.dims;
-        addConstant(layerParams.name, dst);
+        addConstant(node_proto.output(0), dst);
         return;
     }
     else
@@ -2610,7 +2669,7 @@ void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx
     for (int i = 0; i < inpShape.size(); i++)
         CV_CheckGT(inpShape[i], 0, "");
     Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
-    addConstant(layerParams.name, tensor);
+    addConstant(node_proto.output(0), tensor);
 }
 
 void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
@@ -2638,7 +2697,7 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node
         } else {
             out.dims = 1;
         }
-        addConstant(layerParams.name, out);
+        addConstant(node_proto.output(0), out);
         return;
     }
     else
@@ -2732,7 +2791,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node
         runLayer(layerParams, inputs, concatenated);
 
         CV_Assert(concatenated.size() == 1);
-        addConstant(layerParams.name, concatenated[0]);
+        addConstant(node_proto.output(0), concatenated[0]);
         return;
     }
     else
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index fa190005ac..763abf3b4d 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -11,6 +11,8 @@ Implementation of Tensorflow models parser
 
 #include "../precomp.hpp"
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #include <opencv2/dnn/shape_utils.hpp>
 #undef CV_LOG_STRIP_LEVEL
@@ -513,6 +515,7 @@ class TFLayerHandler;
 
 class TFImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 public:
     TFImporter(Net& net, const char *model, const char *config = NULL);
     TFImporter(Net& net, const char *dataModel, size_t lenModel,
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
index 1e7f07a478..57a624d541 100644
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@@ -40,6 +40,9 @@
 //M*/
 
 #include "../precomp.hpp"
+
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <limits>
 #include <set>
 #include <map>
@@ -106,6 +109,8 @@ static inline bool endsWith(const String &str, const char *substr)
 
 struct TorchImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     typedef std::map<String, std::pair<int, Mat> > TensorsMap;
     Net net;
 
diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp
index 1c3877b7b2..0e912ede54 100644
--- a/modules/dnn/test/test_onnx_conformance.cpp
+++ b/modules/dnn/test/test_onnx_conformance.cpp
@@ -1181,10 +1181,10 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
     }
 
     std::vector<std::string> layerNames = net.getUnconnectedOutLayersNames();
-    std::vector< std::vector<Mat> > outputs_;
+    std::vector<Mat> outputs;
     try
     {
-        net.forward(outputs_, layerNames);
+        net.forward(outputs, layerNames);
     }
     catch (...)
     {
@@ -1192,8 +1192,7 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
         applyTestTag(CV_TEST_TAG_DNN_ERROR_FORWARD);
         throw;
     }
-    ASSERT_GE(outputs_.size(), 1);
-    const std::vector<Mat>& outputs = outputs_[0];
+    ASSERT_GE(outputs.size(), 1);
 
     if (checkLayersFallbacks && checkFallbacks(net))
     {
diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
index a7fdfc8b67..65d24e0ab0 100644
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@@ -84,6 +84,9 @@ if(HAVE_QT)
         list(APPEND qt_deps OpenGLWidgets)
       endif()
       list(APPEND qt_deps OpenGL)
+      if(OPENGL_LIBRARIES)
+        list(APPEND HIGHGUI_LIBRARIES "${OPENGL_LIBRARIES}")
+      endif()
     endif()
 
     foreach(dt_dep ${qt_deps})
@@ -93,8 +96,11 @@ if(HAVE_QT)
     endforeach()
   else()
     ocv_assert(QT_VERSION_MAJOR EQUAL 4)
-    if (HAVE_QT_OPENGL)
+    if(HAVE_QT_OPENGL)
       set(QT_USE_QTOPENGL TRUE)
+      if(OPENGL_LIBRARIES)
+        list(APPEND HIGHGUI_LIBRARIES "${OPENGL_LIBRARIES}")
+      endif()
     endif()
     include(${QT_USE_FILE})
 
@@ -157,6 +163,9 @@ if(TARGET ocv.3rdparty.win32ui)
     set(OPENCV_HIGHGUI_BUILTIN_BACKEND "WIN32UI")
     list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_w32.cpp)
     list(APPEND tgts ocv.3rdparty.win32ui)
+    if(HAVE_OPENGL AND OPENGL_LIBRARIES)
+      list(APPEND tgts "${OPENGL_LIBRARIES}")
+    endif()
   endif()
 endif()
 
@@ -271,14 +280,6 @@ if(APPLE)
   add_apple_compiler_options(${the_module})
 endif()
 
-if(OPENCV_HIGHGUI_BUILTIN_BACKEND STREQUAL "WIN32UI" AND HAVE_OPENGL AND OPENGL_LIBRARIES)
-  ocv_target_link_libraries(${the_module} PRIVATE "${OPENGL_LIBRARIES}")
-endif()
-
-if(OPENCV_HIGHGUI_BUILTIN_BACKEND MATCHES "^QT" AND HAVE_OPENGL AND OPENGL_LIBRARIES)
-  ocv_target_link_libraries(${the_module} PRIVATE "${OPENGL_LIBRARIES}")
-endif()
-
 if(MSVC AND NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
   set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /NODEFAULTLIB:libcmt.lib /DEBUG")
 endif()
diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index f56dc3b0de..148eea71e7 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -98,17 +98,17 @@ enum ImwriteFlags {
        IMWRITE_EXR_COMPRESSION     = (3 << 4) + 1, /* 49 */ //!< override EXR compression type (ZIP_COMPRESSION = 3 is default)
        IMWRITE_WEBP_QUALITY        = 64, //!< For WEBP, it can be a quality from 1 to 100 (the higher is the better). By default (without any parameter) and for quality above 100 the lossless compression is used.
        IMWRITE_PAM_TUPLETYPE       = 128,//!< For PAM, sets the TUPLETYPE field to the corresponding string value that is defined for the format
-       IMWRITE_TIFF_RESUNIT = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values
-       IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI
-       IMWRITE_TIFF_YDPI = 258, //!< For TIFF, use to specify the Y direction DPI
-       IMWRITE_TIFF_COMPRESSION = 259, //!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default.
+       IMWRITE_TIFF_RESUNIT        = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values
+       IMWRITE_TIFF_XDPI           = 257,//!< For TIFF, use to specify the X direction DPI
+       IMWRITE_TIFF_YDPI           = 258,//!< For TIFF, use to specify the Y direction DPI
+       IMWRITE_TIFF_COMPRESSION    = 259,//!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default.
        IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000.
      };
 
 enum ImwriteEXRTypeFlags {
        /*IMWRITE_EXR_TYPE_UNIT = 0, //!< not supported */
-       IMWRITE_EXR_TYPE_HALF = 1,   //!< store as HALF (FP16)
-       IMWRITE_EXR_TYPE_FLOAT = 2   //!< store as FP32 (default)
+       IMWRITE_EXR_TYPE_HALF   = 1, //!< store as HALF (FP16)
+       IMWRITE_EXR_TYPE_FLOAT  = 2  //!< store as FP32 (default)
      };
 
 enum ImwriteEXRCompressionFlags {
@@ -140,14 +140,14 @@ enum ImwritePNGFlags {
        IMWRITE_PNG_STRATEGY_FIXED        = 4  //!< Using this value prevents the use of dynamic Huffman codes, allowing for a simpler decoder for special applications.
      };
 
-//! Imwrite PAM specific tupletype flags used to define the 'TUPETYPE' field of a PAM file.
+//! Imwrite PAM specific tupletype flags used to define the 'TUPLETYPE' field of a PAM file.
 enum ImwritePAMFlags {
-       IMWRITE_PAM_FORMAT_NULL = 0,
-       IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1,
-       IMWRITE_PAM_FORMAT_GRAYSCALE = 2,
+       IMWRITE_PAM_FORMAT_NULL            = 0,
+       IMWRITE_PAM_FORMAT_BLACKANDWHITE   = 1,
+       IMWRITE_PAM_FORMAT_GRAYSCALE       = 2,
        IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA = 3,
-       IMWRITE_PAM_FORMAT_RGB = 4,
-       IMWRITE_PAM_FORMAT_RGB_ALPHA = 5,
+       IMWRITE_PAM_FORMAT_RGB             = 4,
+       IMWRITE_PAM_FORMAT_RGB_ALPHA       = 5
      };
 
 //! @} imgcodecs_flags
diff --git a/modules/imgproc/src/drawing.cpp b/modules/imgproc/src/drawing.cpp
index 59047247e8..5e31482dfa 100644
--- a/modules/imgproc/src/drawing.cpp
+++ b/modules/imgproc/src/drawing.cpp
@@ -673,7 +673,7 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
         pt1.y ^= pt2.y & j;
 
         x_step = XY_ONE;
-        y_step = (dy << XY_SHIFT) / (ax | 1);
+        y_step = dy * (1 << XY_SHIFT) / (ax | 1);
         ecount = (int)((pt2.x - pt1.x) >> XY_SHIFT);
     }
     else
@@ -686,7 +686,7 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
         pt2.y ^= pt1.y & i;
         pt1.y ^= pt2.y & i;
 
-        x_step = (dx << XY_SHIFT) / (ay | 1);
+        x_step = dx * (1 << XY_SHIFT) / (ay | 1);
         y_step = XY_ONE;
         ecount = (int)((pt2.y - pt1.y) >> XY_SHIFT);
     }
diff --git a/samples/python/camera_calibration_show_extrinsics.py b/samples/python/camera_calibration_show_extrinsics.py
index d676691f15..0ee2a19b68 100755
--- a/samples/python/camera_calibration_show_extrinsics.py
+++ b/samples/python/camera_calibration_show_extrinsics.py
@@ -1,5 +1,18 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+
+'''
+Plot camera calibration extrinsics.
+
+usage:
+    camera_calibration_show_extrinsics.py [--calibration <input path>] [--cam_width] [--cam_height] [--scale_focal] [--patternCentric ]
+
+default values:
+    --calibration    : left_intrinsics.yml
+    --cam_width      : 0.064/2
+    --cam_height     : 0.048/2
+    --scale_focal    : 40
+    --patternCentric : True
+'''
 
 # Python 2/3 compatibility
 from __future__ import print_function
diff --git a/samples/python/common.py b/samples/python/common.py
index 85cda62cd4..e7ad478b88 100755
--- a/samples/python/common.py
+++ b/samples/python/common.py
@@ -222,7 +222,7 @@ def mosaic(w, imgs):
     pad = np.zeros_like(img0)
     imgs = it.chain([img0], imgs)
     rows = grouper(w, imgs, pad)
-    return np.vstack(map(np.hstack, rows))
+    return np.vstack(list(map(np.hstack, rows)))
 
 def getsize(img):
     h, w = img.shape[:2]
diff --git a/samples/python/digits.py b/samples/python/digits.py
index e5d8ceb59a..25db411f94 100755
--- a/samples/python/digits.py
+++ b/samples/python/digits.py
@@ -191,3 +191,4 @@ if __name__ == '__main__':
     model.save('digits_svm.dat')
 
     cv.waitKey(0)
+    cv.destroyAllWindows()
diff --git a/samples/python/digits_video.py b/samples/python/digits_video.py
index 692da91219..17f44c333d 100755
--- a/samples/python/digits_video.py
+++ b/samples/python/digits_video.py
@@ -29,7 +29,7 @@ def main():
         src = sys.argv[1]
     except:
         src = 0
-    cap = video.create_capture(src)
+    cap = video.create_capture(src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('sudoku.png')))
 
     classifier_fn = 'digits_svm.dat'
     if not os.path.exists(classifier_fn):
diff --git a/samples/python/facedetect.py b/samples/python/facedetect.py
index 488c92d5e5..248206a7cd 100755
--- a/samples/python/facedetect.py
+++ b/samples/python/facedetect.py
@@ -39,13 +39,13 @@ def main():
     except:
         video_src = 0
     args = dict(args)
-    cascade_fn = args.get('--cascade', "data/haarcascades/haarcascade_frontalface_alt.xml")
-    nested_fn  = args.get('--nested-cascade', "data/haarcascades/haarcascade_eye.xml")
+    cascade_fn = args.get('--cascade', "haarcascades/haarcascade_frontalface_alt.xml")
+    nested_fn  = args.get('--nested-cascade', "haarcascades/haarcascade_eye.xml")
 
     cascade = cv.CascadeClassifier(cv.samples.findFile(cascade_fn))
     nested = cv.CascadeClassifier(cv.samples.findFile(nested_fn))
 
-    cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('samples/data/lena.jpg')))
+    cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('lena.jpg')))
 
     while True:
         _ret, img = cam.read()
diff --git a/samples/python/qrcode.py b/samples/python/qrcode.py
index b3253f96c6..21b1a59073 100644
--- a/samples/python/qrcode.py
+++ b/samples/python/qrcode.py
@@ -245,4 +245,6 @@ def main():
 
 
 if __name__ == '__main__':
+    print(__doc__)
     main()
+    cv.destroyAllWindows()
diff --git a/samples/python/text_skewness_correction.py b/samples/python/text_skewness_correction.py
index c8ee33b39d..c3e97a333b 100644
--- a/samples/python/text_skewness_correction.py
+++ b/samples/python/text_skewness_correction.py
@@ -15,7 +15,7 @@ import argparse
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--image", required=True, help="path to input image file")
+    parser.add_argument("-i", "--image", default="imageTextR.png", help="path to input image file")
     args = vars(parser.parse_args())
 
     # load the image from disk
@@ -37,9 +37,9 @@ def main():
     coords = cv.findNonZero(thresh)
     angle = cv.minAreaRect(coords)[-1]
     # the `cv.minAreaRect` function returns values in the
-    # range [-90, 0) if the angle is less than -45 we need to add 90 to it
-    if angle < -45:
-        angle = (90 + angle)
+    # range [0, 90) if the angle is more than 45 we need to subtract 90 from it
+    if angle > 45:
+        angle = (angle - 90)
 
     (h, w) = image.shape[:2]
     center = (w // 2, h // 2)
@@ -55,4 +55,6 @@ def main():
 
 
 if __name__ == "__main__":
+    print(__doc__)
     main()
+    cv.destroyAllWindows()