diff --git a/doc/opencv-logo.md b/doc/opencv-logo.md
new file mode 100644
index 0000000000..3ff85833a9
--- /dev/null
+++ b/doc/opencv-logo.md
@@ -0,0 +1,9 @@
+OpenCV logo has been originally designed and contributed to OpenCV by Adi Shavit in 2006. The graphical part consists of three stylized letters O, C, V, colored in the primary R, G, B color components, used by humans and computers to perceive the world. It is shaped in a way to mimic the famous [Kanizsa's triangle](https://en.wikipedia.org/wiki/Illusory_contours) to emphasize that the prior knowledge and internal processing are at least as important as the actually acquired "raw" data.
+
+The restyled version of the logo has been designed and contributed by [xperience.ai](https://xperience.ai/) in July 2020 for the [20th anniversary](https://opencv.org/anniversary/) of OpenCV.
+
+The logo uses [Exo 2](https://fonts.google.com/specimen/Exo+2#about) font by Natanael Gama distributed under OFL license.
+
+Higher-resolution version of the logo, as well as SVG version of it, can be obtained at OpenCV [Media Kit](https://opencv.org/resources/media-kit/).
+
+![](./opencv-logo2.png)
\ No newline at end of file
diff --git a/doc/opencv.bib b/doc/opencv.bib
index 975630a18d..6045a8434d 100644
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@@ -1238,3 +1238,10 @@
   number={2},
   pages={117-135},
 }
+@inproceedings{forstner1987fast,
+  title={A fast operator for detection and precise location of distincs points, corners and center of circular features},
+  author={FORSTNER, W},
+  booktitle={Proc. of the Intercommission Conference on Fast Processing of Photogrammetric Data, Interlaken, Switzerland, 1987},
+  pages={281--305},
+  year={1987}
+}
diff --git a/modules/core/src/trace.cpp b/modules/core/src/trace.cpp
index c316737b13..1c12c2a2e5 100644
--- a/modules/core/src/trace.cpp
+++ b/modules/core/src/trace.cpp
@@ -82,7 +82,12 @@ static bool getParameterTraceEnable()
 static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_DEPTH_OPENCV", 1);
 static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN_OPENCV", 1000);
 static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN", 10000);
-static cv::String param_traceLocation = utils::getConfigurationParameterString("OPENCV_TRACE_LOCATION", "OpenCVTrace");
+
+static const cv::String& getParameterTraceLocation()
+{
+    static cv::String param_traceLocation = utils::getConfigurationParameterString("OPENCV_TRACE_LOCATION", "OpenCVTrace");
+    return param_traceLocation;
+}
 
 #ifdef HAVE_OPENCL
 static bool param_synchronizeOpenCL = utils::getConfigurationParameterBool("OPENCV_TRACE_SYNC_OPENCL", false);
@@ -813,7 +818,7 @@ TraceStorage* TraceManagerThreadLocal::getStorage() const
         TraceStorage* global = getTraceManager().trace_storage.get();
         if (global)
         {
-            const std::string filepath = cv::format("%s-%03d.txt", param_traceLocation.c_str(), threadID).c_str();
+            const std::string filepath = cv::format("%s-%03d.txt", getParameterTraceLocation().c_str(), threadID).c_str();
             TraceMessage msg;
             const char* pos = strrchr(filepath.c_str(), '/'); // extract filename
 #ifdef _WIN32
@@ -848,7 +853,7 @@ TraceManager::TraceManager()
     activated = getParameterTraceEnable();
 
     if (activated)
-        trace_storage.reset(new SyncTraceStorage(std::string(param_traceLocation) + ".txt"));
+        trace_storage.reset(new SyncTraceStorage(std::string(getParameterTraceLocation()) + ".txt"));
 
 #ifdef OPENCV_WITH_ITT
     if (isITTEnabled())
diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp
index 233b3783a4..d5942d1edc 100644
--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@@ -80,6 +80,7 @@ UMatData::~UMatData()
     CV_Assert(mapcount == 0);
     data = origdata = 0;
     size = 0;
+    bool isAsyncCleanup = !!(flags & UMatData::ASYNC_CLEANUP);
     flags = static_cast<UMatData::MemoryFlag>(0);
     handle = 0;
     userdata = 0;
@@ -106,7 +107,7 @@ UMatData::~UMatData()
             showWarn = true;
         if (zero_Ref && zero_URef) // oops, we need to free resources
         {
-            showWarn = true;
+            showWarn = !isAsyncCleanup;
             // simulate UMat::deallocate
             u->currAllocator->deallocate(u);
         }
diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp
index d25916c18a..c2bd6eceba 100644
--- a/modules/core/test/test_umat.cpp
+++ b/modules/core/test/test_umat.cpp
@@ -1154,6 +1154,30 @@ TEST(UMat, map_unmap_counting)
 }
 
 
+static void process_with_async_cleanup(Mat& frame)
+{
+    UMat blurResult;
+    {
+        UMat umat_buffer = frame.getUMat(ACCESS_READ);
+        cv::blur(umat_buffer, blurResult, Size(3, 3));  // UMat doesn't support inplace, this call is not synchronized
+    }
+    Mat result;
+    blurResult.copyTo(result);
+    swap(result, frame);
+    // umat_buffer cleanup is done asynchronously, silence warning about original 'frame' cleanup here (through 'result')
+    // - release input 'frame' (as 'result')
+    // - release 'umat_buffer' asynchronously and silence warning about "parent" buffer (in debug builds)
+}
+TEST(UMat, async_cleanup_without_call_chain_warning)
+{
+    Mat frame(Size(640, 480), CV_8UC1, Scalar::all(128));
+    for (int i = 0; i < 10; i++)
+    {
+        process_with_async_cleanup(frame);
+    }
+}
+
+
 ///////////// oclCleanupCallback threadsafe check (#5062) /////////////////////
 
 // Case 1: reuse of old src Mat in OCL pipe. Hard to catch!
diff --git a/modules/features2d/src/brisk.cpp b/modules/features2d/src/brisk.cpp
index 67c412316a..adb8b43d30 100644
--- a/modules/features2d/src/brisk.cpp
+++ b/modules/features2d/src/brisk.cpp
@@ -373,13 +373,30 @@ BRISK_Impl::generateKernel(const std::vector<float> &radiusList,
   const int rings = (int)radiusList.size();
   CV_Assert(radiusList.size() != 0 && radiusList.size() == numberList.size());
   points_ = 0; // remember the total number of points
+  double sineThetaLookupTable[n_rot_];
+  double cosThetaLookupTable[n_rot_];
   for (int ring = 0; ring < rings; ring++)
   {
     points_ += numberList[ring];
   }
+
+  // using a sine/cosine approximation for the lookup table
+  // utilizes the trig identities:
+  // sin(a + b) = sin(a)cos(b) + cos(a)sin(b)
+  // cos(a + b) = cos(a)cos(b) - sin(a)sin(b)
+  // and the fact that sin(0) = 0, cos(0) = 1
+  double cosval = 1., sinval = 0.;
+  double dcos = cos(2*CV_PI/double(n_rot_)), dsin = sin(2*CV_PI/double(n_rot_));
+  for( size_t rot = 0; rot < n_rot_; ++rot)
+  {
+    sineThetaLookupTable[rot] = sinval;
+    cosThetaLookupTable[rot] = cosval;
+    double t = sinval*dcos + cosval*dsin;
+    cosval = cosval*dcos - sinval*dsin;
+    sinval = t;
+  }
   // set up the patterns
   patternPoints_ = new BriskPatternPoint[points_ * scales_ * n_rot_];
-  BriskPatternPoint* patternIterator = patternPoints_;
 
   // define the scale discretization:
   static const float lb_scale = (float)(std::log(scalerange_) / std::log(2.0));
@@ -390,46 +407,51 @@ BRISK_Impl::generateKernel(const std::vector<float> &radiusList,
 
   const float sigma_scale = 1.3f;
 
-  for (unsigned int scale = 0; scale < scales_; ++scale)
-  {
-    scaleList_[scale] = (float)std::pow((double) 2.0, (double) (scale * lb_scale_step));
-    sizeList_[scale] = 0;
-
-    // generate the pattern points look-up
-    double alpha, theta;
-    for (size_t rot = 0; rot < n_rot_; ++rot)
-    {
-      theta = double(rot) * 2 * CV_PI / double(n_rot_); // this is the rotation of the feature
-      for (int ring = 0; ring < rings; ++ring)
-      {
-        for (int num = 0; num < numberList[ring]; ++num)
-        {
-          // the actual coordinates on the circle
-          alpha = (double(num)) * 2 * CV_PI / double(numberList[ring]);
-          patternIterator->x = (float)(scaleList_[scale] * radiusList[ring] * cos(alpha + theta)); // feature rotation plus angle of the point
-          patternIterator->y = (float)(scaleList_[scale] * radiusList[ring] * sin(alpha + theta));
-          // and the gaussian kernel sigma
-          if (ring == 0)
-          {
-            patternIterator->sigma = sigma_scale * scaleList_[scale] * 0.5f;
-          }
-          else
-          {
-            patternIterator->sigma = (float)(sigma_scale * scaleList_[scale] * (double(radiusList[ring]))
-                                     * sin(CV_PI / numberList[ring]));
+  for (unsigned int scale = 0; scale < scales_; ++scale) {
+      scaleList_[scale] = (float) std::pow((double) 2.0, (double) (scale * lb_scale_step));
+      sizeList_[scale] = 0;
+      BriskPatternPoint *patternIteratorOuter = patternPoints_ + (scale * n_rot_ * points_);
+      // generate the pattern points look-up
+      for (int ring = 0; ring < rings; ++ring) {
+          double scaleRadiusProduct = scaleList_[scale] * radiusList[ring];
+          float patternSigma = 0.0f;
+          if (ring == 0) {
+              patternSigma = sigma_scale * scaleList_[scale] * 0.5f;
+          } else {
+              patternSigma = (float) (sigma_scale * scaleList_[scale] * (double(radiusList[ring]))
+                                      * sin(CV_PI / numberList[ring]));
           }
           // adapt the sizeList if necessary
-          const unsigned int size = cvCeil(((scaleList_[scale] * radiusList[ring]) + patternIterator->sigma)) + 1;
-          if (sizeList_[scale] < size)
-          {
-            sizeList_[scale] = size;
+          const unsigned int size = cvCeil(((scaleList_[scale] * radiusList[ring]) + patternSigma)) + 1;
+          if (sizeList_[scale] < size) {
+              sizeList_[scale] = size;
           }
+          for (int num = 0; num < numberList[ring]; ++num) {
+              BriskPatternPoint *patternIterator = patternIteratorOuter;
+              double alpha = (double(num)) * 2 * CV_PI / double(numberList[ring]);
+              double sine_alpha = sin(alpha);
+              double cosine_alpha = cos(alpha);
 
-          // increment the iterator
-          ++patternIterator;
-        }
+              for (size_t rot = 0; rot < n_rot_; ++rot) {
+                  double cosine_theta = cosThetaLookupTable[rot];
+                  double sine_theta = sineThetaLookupTable[rot];
+
+                  // the actual coordinates on the circle
+                  // sin(a + b) = sin(a) cos(b) + cos(a) sin(b)
+                  // cos(a + b) = cos(a) cos(b) - sin(a) sin(b)
+                  patternIterator->x = (float) (scaleRadiusProduct *
+                                                (cosine_theta * cosine_alpha -
+                                                 sine_theta * sine_alpha)); // feature rotation plus angle of the point
+                  patternIterator->y = (float) (scaleRadiusProduct *
+                                                (sine_theta * cosine_alpha + cosine_theta * sine_alpha));
+                  patternIterator->sigma = patternSigma;
+                  // and the gaussian kernel sigma
+                  // increment the iterator
+                  patternIterator += points_;
+              }
+              ++patternIteratorOuter;
+          }
       }
-    }
   }
 
   // now also generate pairings
diff --git a/modules/flann/include/opencv2/flann.hpp b/modules/flann/include/opencv2/flann.hpp
index 78cf21a79f..04689037eb 100644
--- a/modules/flann/include/opencv2/flann.hpp
+++ b/modules/flann/include/opencv2/flann.hpp
@@ -95,6 +95,8 @@ using ::cvflann::MaxDistance;
 using ::cvflann::HammingLUT;
 using ::cvflann::Hamming;
 using ::cvflann::Hamming2;
+using ::cvflann::DNAmmingLUT;
+using ::cvflann::DNAmming2;
 using ::cvflann::HistIntersectionDistance;
 using ::cvflann::HellingerDistance;
 using ::cvflann::ChiSquareDistance;
@@ -131,6 +133,14 @@ performed using library calls, if available. Lookup table implementation is used
 cv::flann::Hamming2 - %Hamming distance functor. Population count is
 implemented in 12 arithmetic operations (one of which is multiplication).
 
+cv::flann::DNAmmingLUT -  %Adaptation of the Hamming distance functor to DNA comparison.
+As the four bases A, C, G, T of the DNA (or A, G, C, U for RNA) can be coded on 2 bits,
+it counts the bits pairs differences between two sequences using a lookup table implementation.
+
+cv::flann::DNAmming2 - %Adaptation of the Hamming distance functor to DNA comparison.
+Bases differences count are vectorised thanks to arithmetic operations using standard
+registers (AVX2 and AVX-512 should come in a near future).
+
 cv::flann::HistIntersectionDistance - The histogram
 intersection distance functor.
 
diff --git a/modules/flann/include/opencv2/flann/defines.h b/modules/flann/include/opencv2/flann/defines.h
index 884c6004d4..8ab83293ff 100644
--- a/modules/flann/include/opencv2/flann/defines.h
+++ b/modules/flann/include/opencv2/flann/defines.h
@@ -128,6 +128,7 @@ enum flann_distance_t
     FLANN_DIST_KULLBACK_LEIBLER  = 8,
     FLANN_DIST_KL                = 8,
     FLANN_DIST_HAMMING          = 9,
+    FLANN_DIST_DNAMMING          = 10,
 
     // deprecated constants, should use the FLANN_DIST_* ones instead
     EUCLIDEAN = 1,
diff --git a/modules/flann/include/opencv2/flann/dist.h b/modules/flann/include/opencv2/flann/dist.h
index e41b994d7e..608f8a507a 100644
--- a/modules/flann/include/opencv2/flann/dist.h
+++ b/modules/flann/include/opencv2/flann/dist.h
@@ -683,6 +683,8 @@ struct Hamming2
     template <typename Iterator1, typename Iterator2>
     ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
     {
+        CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
+
 #ifdef FLANN_PLATFORM_64_BIT
         const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
         const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
@@ -711,6 +713,8 @@ struct Hamming2
     template <typename Iterator1>
     ResultType operator()(const Iterator1 a, ZeroIterator<unsigned char> b, size_t size, ResultType /*worst_dist*/ = -1) const
     {
+        CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
+
         (void)b;
 #ifdef FLANN_PLATFORM_64_BIT
         const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
@@ -744,6 +748,157 @@ private:
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+struct DNAmmingLUT
+{
+    typedef False is_kdtree_distance;
+    typedef False is_vector_space_distance;
+
+    typedef unsigned char ElementType;
+    typedef int ResultType;
+    typedef ElementType CentersType;
+
+    /** this will count the bits in a ^ b
+     */
+    template<typename Iterator2>
+    ResultType operator()(const unsigned char* a, const Iterator2 b, size_t size) const
+    {
+        static const uchar popCountTable[] =
+        {
+            0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
+        };
+        ResultType result = 0;
+        const unsigned char* b2 = reinterpret_cast<const unsigned char*> (b);
+        for (size_t i = 0; i < size; i++) {
+            result += popCountTable[a[i] ^ b2[i]];
+        }
+        return result;
+    }
+
+
+    ResultType operator()(const unsigned char* a, const ZeroIterator<unsigned char> b, size_t size) const
+    {
+        (void)b;
+        static const uchar popCountTable[] =
+        {
+            0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+            2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
+        };
+        ResultType result = 0;
+        for (size_t i = 0; i < size; i++) {
+            result += popCountTable[a[i]];
+        }
+        return result;
+    }
+};
+
+
+template<typename T>
+struct DNAmming2
+{
+    typedef False is_kdtree_distance;
+    typedef False is_vector_space_distance;
+
+    typedef T ElementType;
+    typedef int ResultType;
+    typedef ElementType CentersType;
+
+    /** This is popcount_3() from:
+     * http://en.wikipedia.org/wiki/Hamming_weight */
+    unsigned int popcnt32(uint32_t n) const
+    {
+        n = ((n >> 1) | n) & 0x55555555;
+        n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
+        return (((n + (n >> 4))& 0x0F0F0F0F)* 0x01010101) >> 24;
+    }
+
+#ifdef FLANN_PLATFORM_64_BIT
+    unsigned int popcnt64(uint64_t n) const
+    {
+        n = ((n >> 1) | n) & 0x5555555555555555;
+        n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
+        return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
+    }
+#endif
+
+    template <typename Iterator1, typename Iterator2>
+    ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
+    {
+        CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
+
+#ifdef FLANN_PLATFORM_64_BIT
+        const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
+        const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
+        ResultType result = 0;
+        size /= long_word_size_;
+        for(size_t i = 0; i < size; ++i ) {
+            result += popcnt64(*pa ^ *pb);
+            ++pa;
+            ++pb;
+        }
+#else
+        const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
+        const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
+        ResultType result = 0;
+        size /= long_word_size_;
+        for(size_t i = 0; i < size; ++i ) {
+            result += popcnt32(*pa ^ *pb);
+            ++pa;
+            ++pb;
+        }
+#endif
+        return result;
+    }
+
+
+    template <typename Iterator1>
+    ResultType operator()(const Iterator1 a, ZeroIterator<unsigned char> b, size_t size, ResultType /*worst_dist*/ = -1) const
+    {
+        CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
+
+        (void)b;
+#ifdef FLANN_PLATFORM_64_BIT
+        const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
+        ResultType result = 0;
+        size /= long_word_size_;
+        for(size_t i = 0; i < size; ++i ) {
+            result += popcnt64(*pa);
+            ++pa;
+        }
+#else
+        const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
+        ResultType result = 0;
+        size /= long_word_size_;
+        for(size_t i = 0; i < size; ++i ) {
+            result += popcnt32(*pa);
+            ++pa;
+        }
+#endif
+        return result;
+    }
+
+private:
+#ifdef FLANN_PLATFORM_64_BIT
+    static const size_t long_word_size_= sizeof(uint64_t)/sizeof(unsigned char);
+#else
+    static const size_t long_word_size_= sizeof(uint32_t)/sizeof(unsigned char);
+#endif
+};
+
+
+
 template<class T>
 struct HistIntersectionDistance
 {
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index 4ac0d9db50..156d3498cc 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -1899,8 +1899,8 @@ CV_EXPORTS_W void preCornerDetect( InputArray src, OutputArray dst, int ksize,
 
 /** @brief Refines the corner locations.
 
-The function iterates to find the sub-pixel accurate location of corners or radial saddle points, as
-shown on the figure below.
+The function iterates to find the sub-pixel accurate location of corners or radial saddle
+points as described in @cite forstner1987fast, and as shown on the figure below.
 
 ![image](pics/cornersubpix.png)
 
diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp
index 58390eeb40..55b55bcc4b 100644
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -152,6 +152,8 @@ extern "C" {
 #define AV_PIX_FMT_BGR24 PIX_FMT_BGR24
 #define AV_PIX_FMT_RGB24 PIX_FMT_RGB24
 #define AV_PIX_FMT_GRAY8 PIX_FMT_GRAY8
+#define AV_PIX_FMT_BGRA PIX_FMT_BGRA
+#define AV_PIX_FMT_RGBA PIX_FMT_RGBA
 #define AV_PIX_FMT_YUV422P PIX_FMT_YUV422P
 #define AV_PIX_FMT_YUV420P PIX_FMT_YUV420P
 #define AV_PIX_FMT_YUV444P PIX_FMT_YUV444P
@@ -369,7 +371,7 @@ struct AVInterruptCallbackMetadata
 
 // https://github.com/opencv/opencv/pull/12693#issuecomment-426236731
 static
-inline const char* _opencv_avcodec_get_name(AVCodecID id)
+inline const char* _opencv_avcodec_get_name(CV_CODEC_ID id)
 {
 #if LIBAVCODEC_VERSION_MICRO >= 100 \
     && LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(53, 47, 100)
@@ -1098,11 +1100,11 @@ bool CvCapture_FFMPEG::processRawPacket()
     {
         rawModeInitialized = true;
 #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
-        AVCodecID eVideoCodec = ic->streams[video_stream]->codecpar->codec_id;
+        CV_CODEC_ID eVideoCodec = ic->streams[video_stream]->codecpar->codec_id;
 #elif LIBAVFORMAT_BUILD > 4628
-        AVCodecID eVideoCodec = video_st->codec->codec_id;
+        CV_CODEC_ID eVideoCodec = video_st->codec->codec_id;
 #else
-        AVCodecID eVideoCodec = video_st->codec.codec_id;
+        CV_CODEC_ID eVideoCodec = video_st->codec.codec_id;
 #endif
         const char* filterName = NULL;
         if (eVideoCodec == CV_CODEC(CODEC_ID_H264)
@@ -1397,7 +1399,7 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const
     if( !video_st ) return 0;
 
     double codec_tag = 0;
-    AVCodecID codec_id = AV_CODEC_ID_NONE;
+    CV_CODEC_ID codec_id = AV_CODEC_ID_NONE;
     const char* codec_fourcc = NULL;
 
     switch( property_id )
@@ -1861,7 +1863,7 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc,
 
 #if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54,25,0)
     // Set per-codec defaults
-    AVCodecID c_id = c->codec_id;
+    CV_CODEC_ID c_id = c->codec_id;
     avcodec_get_context_defaults3(c, codec);
     // avcodec_get_context_defaults3 erases codec_id for some reason
     c->codec_id = c_id;
diff --git a/samples/cpp/stereo_match.cpp b/samples/cpp/stereo_match.cpp
index 0cfaba6f70..55cd89c26e 100644
--- a/samples/cpp/stereo_match.cpp
+++ b/samples/cpp/stereo_match.cpp
@@ -14,15 +14,16 @@
 #include "opencv2/core/utility.hpp"
 
 #include <stdio.h>
+#include <sstream>
 
 using namespace cv;
 
 static void print_help(char** argv)
 {
     printf("\nDemo stereo matching converting L and R images into disparity and point clouds\n");
-    printf("\nUsage: %s <left_image> <right_image> [--algorithm=bm|sgbm|hh|sgbm3way] [--blocksize=<block_size>]\n"
+    printf("\nUsage: %s <left_image> <right_image> [--algorithm=bm|sgbm|hh|hh4|sgbm3way] [--blocksize=<block_size>]\n"
            "[--max-disparity=<max_disparity>] [--scale=scale_factor>] [-i=<intrinsic_filename>] [-e=<extrinsic_filename>]\n"
-           "[--no-display] [-o=<disparity_image>] [-p=<point_cloud_file>]\n", argv[0]);
+           "[--no-display] [--color] [-o=<disparity_image>] [-p=<point_cloud_file>]\n", argv[0]);
 }
 
 static void saveXYZ(const char* filename, const Mat& mat)
@@ -50,16 +51,17 @@ int main(int argc, char** argv)
     std::string disparity_filename = "";
     std::string point_cloud_filename = "";
 
-    enum { STEREO_BM=0, STEREO_SGBM=1, STEREO_HH=2, STEREO_VAR=3, STEREO_3WAY=4 };
+    enum { STEREO_BM=0, STEREO_SGBM=1, STEREO_HH=2, STEREO_VAR=3, STEREO_3WAY=4, STEREO_HH4=5 };
     int alg = STEREO_SGBM;
     int SADWindowSize, numberOfDisparities;
     bool no_display;
+    bool color_display;
     float scale;
 
     Ptr<StereoBM> bm = StereoBM::create(16,9);
     Ptr<StereoSGBM> sgbm = StereoSGBM::create(0,16,3);
     cv::CommandLineParser parser(argc, argv,
-        "{@arg1||}{@arg2||}{help h||}{algorithm||}{max-disparity|0|}{blocksize|0|}{no-display||}{scale|1|}{i||}{e||}{o||}{p||}");
+        "{@arg1||}{@arg2||}{help h||}{algorithm||}{max-disparity|0|}{blocksize|0|}{no-display||}{color||}{scale|1|}{i||}{e||}{o||}{p||}");
     if(parser.has("help"))
     {
         print_help(argv);
@@ -74,12 +76,14 @@ int main(int argc, char** argv)
             _alg == "sgbm" ? STEREO_SGBM :
             _alg == "hh" ? STEREO_HH :
             _alg == "var" ? STEREO_VAR :
+            _alg == "hh4" ? STEREO_HH4 :
             _alg == "sgbm3way" ? STEREO_3WAY : -1;
     }
     numberOfDisparities = parser.get<int>("max-disparity");
     SADWindowSize = parser.get<int>("blocksize");
     scale = parser.get<float>("scale");
     no_display = parser.has("no-display");
+    color_display = parser.has("color");
     if( parser.has("i") )
         intrinsic_filename = parser.get<std::string>("i");
     if( parser.has("e") )
@@ -238,6 +242,8 @@ int main(int argc, char** argv)
         sgbm->setMode(StereoSGBM::MODE_HH);
     else if(alg==STEREO_SGBM)
         sgbm->setMode(StereoSGBM::MODE_SGBM);
+    else if(alg==STEREO_HH4)
+        sgbm->setMode(StereoSGBM::MODE_HH4);
     else if(alg==STEREO_3WAY)
         sgbm->setMode(StereoSGBM::MODE_SGBM_3WAY);
 
@@ -254,7 +260,7 @@ int main(int argc, char** argv)
         if (disp.type() == CV_16S)
             disparity_multiplier = 16.0f;
     }
-    else if( alg == STEREO_SGBM || alg == STEREO_HH || alg == STEREO_3WAY )
+    else if( alg == STEREO_SGBM || alg == STEREO_HH || alg == STEREO_HH4 || alg == STEREO_3WAY )
     {
         sgbm->compute(img1, img2, disp);
         if (disp.type() == CV_16S)
@@ -268,22 +274,13 @@ int main(int argc, char** argv)
         disp.convertTo(disp8, CV_8U, 255/(numberOfDisparities*16.));
     else
         disp.convertTo(disp8, CV_8U);
-    if( !no_display )
-    {
-        namedWindow("left", 1);
-        imshow("left", img1);
-        namedWindow("right", 1);
-        imshow("right", img2);
-        namedWindow("disparity", 0);
-        imshow("disparity", disp8);
-        printf("press any key to continue...");
-        fflush(stdout);
-        waitKey();
-        printf("\n");
-    }
+
+    Mat disp8_3c;
+    if (color_display)
+        cv::applyColorMap(disp8, disp8_3c, COLORMAP_TURBO);
 
     if(!disparity_filename.empty())
-        imwrite(disparity_filename, disp8);
+        imwrite(disparity_filename, color_display ? disp8_3c : disp8);
 
     if(!point_cloud_filename.empty())
     {
@@ -297,5 +294,35 @@ int main(int argc, char** argv)
         printf("\n");
     }
 
+    if( !no_display )
+    {
+        std::ostringstream oss;
+        oss << "disparity  " << (alg==STEREO_BM ? "bm" :
+                                 alg==STEREO_SGBM ? "sgbm" :
+                                 alg==STEREO_HH ? "hh" :
+                                 alg==STEREO_VAR ? "var" :
+                                 alg==STEREO_HH4 ? "hh4" :
+                                 alg==STEREO_3WAY ? "sgbm3way" : "");
+        oss << "  blocksize:" << (alg==STEREO_BM ? SADWindowSize : sgbmWinSize);
+        oss << "  max-disparity:" << numberOfDisparities;
+        std::string disp_name = oss.str();
+
+        namedWindow("left", cv::WINDOW_NORMAL);
+        imshow("left", img1);
+        namedWindow("right", cv::WINDOW_NORMAL);
+        imshow("right", img2);
+        namedWindow(disp_name, cv::WINDOW_AUTOSIZE);
+        imshow(disp_name, color_display ? disp8_3c : disp8);
+
+        printf("press ESC key or CTRL+C to close...");
+        fflush(stdout);
+        printf("\n");
+        while(1)
+        {
+            if(waitKey() == 27) //ESC (prevents closing on actions like taking screenshots)
+                break;
+        }
+    }
+
     return 0;
 }