diff --git a/3rdparty/carotene/CMakeLists.txt b/3rdparty/carotene/CMakeLists.txt
index 4319815708..bd26a2d7ef 100644
--- a/3rdparty/carotene/CMakeLists.txt
+++ b/3rdparty/carotene/CMakeLists.txt
@@ -27,7 +27,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
     endif()
 endif()
 
-add_library(carotene_objs OBJECT
+add_library(carotene_objs OBJECT EXCLUDE_FROM_ALL
   ${carotene_headers}
   ${carotene_sources}
 )
@@ -41,4 +41,4 @@ if(WITH_NEON)
 endif()
 
 # we add dummy file to fix XCode build
-add_library(carotene STATIC EXCLUDE_FROM_ALL "$<TARGET_OBJECTS:carotene_objs>" "${CAROTENE_SOURCE_DIR}/dummy.cpp")
+add_library(carotene STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} "$<TARGET_OBJECTS:carotene_objs>" "${CAROTENE_SOURCE_DIR}/dummy.cpp")
diff --git a/3rdparty/cpufeatures/CMakeLists.txt b/3rdparty/cpufeatures/CMakeLists.txt
index 92bce6abf8..bf7af0ecde 100644
--- a/3rdparty/cpufeatures/CMakeLists.txt
+++ b/3rdparty/cpufeatures/CMakeLists.txt
@@ -14,7 +14,7 @@ if(NOT DEFINED CPUFEATURES_SOURCES)
 endif()
 
 include_directories(${CPUFEATURES_INCLUDE_DIRS})
-add_library(${OPENCV_CPUFEATURES_TARGET_NAME} STATIC ${CPUFEATURES_SOURCES})
+add_library(${OPENCV_CPUFEATURES_TARGET_NAME} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${CPUFEATURES_SOURCES})
 
 set_target_properties(${OPENCV_CPUFEATURES_TARGET_NAME}
   PROPERTIES OUTPUT_NAME cpufeatures
@@ -29,7 +29,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${OPENCV_CPUFEATURES_TARGET_NAME} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${OPENCV_CPUFEATURES_TARGET_NAME} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(cpufeatures LICENSE README.md)
diff --git a/3rdparty/ippicv/CMakeLists.txt b/3rdparty/ippicv/CMakeLists.txt
index 7931832737..43ad806dd7 100644
--- a/3rdparty/ippicv/CMakeLists.txt
+++ b/3rdparty/ippicv/CMakeLists.txt
@@ -17,7 +17,7 @@ file(GLOB lib_hdrs ${IPP_IW_PATH}/include/*.h ${IPP_IW_PATH}/include/iw/*.h ${IP
 #         Define the library target:
 # ----------------------------------------------------------------------------------
 
-add_library(${IPP_IW_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
+add_library(${IPP_IW_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs})
 
 if(UNIX)
   if(CV_GCC OR CV_CLANG OR CV_ICC)
@@ -41,5 +41,5 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${IPP_IW_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${IPP_IW_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
diff --git a/3rdparty/ittnotify/CMakeLists.txt b/3rdparty/ittnotify/CMakeLists.txt
index c2caf76723..a227aff88e 100644
--- a/3rdparty/ittnotify/CMakeLists.txt
+++ b/3rdparty/ittnotify/CMakeLists.txt
@@ -37,7 +37,7 @@ set(ITT_SRCS
     src/ittnotify/jitprofiling.c
 )
 
-add_library(${ITT_LIBRARY} STATIC ${ITT_SRCS} ${ITT_PUBLIC_HDRS} ${ITT_PRIVATE_HDRS})
+add_library(${ITT_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${ITT_SRCS} ${ITT_PUBLIC_HDRS} ${ITT_PRIVATE_HDRS})
 
 if(NOT WIN32)
   if(HAVE_DL_LIBRARY)
@@ -60,7 +60,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${ITT_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${ITT_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(ittnotify src/ittnotify/LICENSE.BSD src/ittnotify/LICENSE.GPL)
diff --git a/3rdparty/libjasper/CMakeLists.txt b/3rdparty/libjasper/CMakeLists.txt
index 897b6ae606..9f05d89733 100644
--- a/3rdparty/libjasper/CMakeLists.txt
+++ b/3rdparty/libjasper/CMakeLists.txt
@@ -17,7 +17,7 @@ file(GLOB lib_ext_hdrs jasper/*.h)
 #         Define the library target:
 # ----------------------------------------------------------------------------------
 
-add_library(${JASPER_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs} ${lib_ext_hdrs})
+add_library(${JASPER_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs} ${lib_ext_hdrs})
 
 if(WIN32 AND NOT MINGW)
   add_definitions(-DJAS_WIN_MSVC_BUILD)
@@ -46,7 +46,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${JASPER_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${JASPER_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(jasper LICENSE README copyright)
diff --git a/3rdparty/libjpeg-turbo/CMakeLists.txt b/3rdparty/libjpeg-turbo/CMakeLists.txt
index 374d7875de..901669a4a8 100644
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@@ -4,9 +4,9 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-6
 
 set(VERSION_MAJOR 2)
 set(VERSION_MINOR 0)
-set(VERSION_REVISION 5)
+set(VERSION_REVISION 6)
 set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
-set(LIBJPEG_TURBO_VERSION_NUMBER 2000005)
+set(LIBJPEG_TURBO_VERSION_NUMBER 2000006)
 
 string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -106,7 +106,7 @@ set(JPEG_SOURCES ${JPEG_SOURCES} jsimd_none.c)
 
 ocv_list_add_prefix(JPEG_SOURCES src/)
 
-add_library(${JPEG_LIBRARY} STATIC ${JPEG_SOURCES} ${SIMD_OBJS})
+add_library(${JPEG_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${JPEG_SOURCES} ${SIMD_OBJS})
 
 set_target_properties(${JPEG_LIBRARY}
   PROPERTIES OUTPUT_NAME ${JPEG_LIBRARY}
@@ -121,7 +121,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${JPEG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${JPEG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(libjpeg-turbo README.md LICENSE.md README.ijg)
diff --git a/3rdparty/libjpeg-turbo/LICENSE.md b/3rdparty/libjpeg-turbo/LICENSE.md
index 5ca512b34d..99c9aadcc4 100644
--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@@ -91,7 +91,7 @@ best of our understanding.
 The Modified (3-clause) BSD License
 ===================================
 
-Copyright (C)2009-2019 D. R. Commander.  All Rights Reserved.
+Copyright (C)2009-2020 D. R. Commander.  All Rights Reserved.
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/3rdparty/libjpeg-turbo/README.ijg b/3rdparty/libjpeg-turbo/README.ijg
index 2e39f965c2..d681cf1273 100644
--- a/3rdparty/libjpeg-turbo/README.ijg
+++ b/3rdparty/libjpeg-turbo/README.ijg
@@ -223,12 +223,12 @@ https://www.iso.org/standard/54989.html and http://www.itu.int/rec/T-REC-T.871.
 A PDF file of the older JFIF 1.02 specification is available at
 http://www.w3.org/Graphics/JPEG/jfif3.pdf.
 
-The TIFF 6.0 file format specification can be obtained by FTP from
-ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
-found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
-IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
-Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
-(Compression tag 7).  Copies of this Note can be obtained from
+The TIFF 6.0 file format specification can be obtained from
+http://mirrors.ctan.org/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation
+scheme found in the TIFF 6.0 spec of 3-June-92 has a number of serious
+problems.  IJG does not recommend use of the TIFF 6.0 design (TIFF Compression
+tag 6).  Instead, we recommend the JPEG design proposed by TIFF Technical Note
+#2 (Compression tag 7).  Copies of this Note can be obtained from
 http://www.ijg.org/files/.  It is expected that the next revision
 of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
 Although IJG's own code does not support TIFF/JPEG, the free libtiff library
@@ -243,14 +243,8 @@ The most recent released version can always be found there in
 directory "files".
 
 The JPEG FAQ (Frequently Asked Questions) article is a source of some
-general information about JPEG.
-It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
-and other news.answers archive sites, including the official news.answers
-archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
-If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
-with body
-        send usenet/news.answers/jpeg-faq/part1
-        send usenet/news.answers/jpeg-faq/part2
+general information about JPEG.  It is available at
+http://www.faqs.org/faqs/jpeg-faq.
 
 
 FILE FORMAT COMPATIBILITY
diff --git a/3rdparty/libjpeg-turbo/README.md b/3rdparty/libjpeg-turbo/README.md
index e7ff743a47..90a4a43ee1 100644
--- a/3rdparty/libjpeg-turbo/README.md
+++ b/3rdparty/libjpeg-turbo/README.md
@@ -2,7 +2,7 @@ Background
 ==========
 
 libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
-baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
+baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
 MIPS systems, as well as progressive JPEG compression on x86 and x86-64
 systems.  On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
 all else being equal.  On other types of systems, libjpeg-turbo can still
@@ -179,8 +179,8 @@ supported and which aren't.
 
 NOTE:  As of this writing, extensive research has been conducted into the
 usefulness of DCT scaling as a means of data reduction and SmartScale as a
-means of quality improvement.  The reader is invited to peruse the research at
-<http://www.libjpeg-turbo.org/About/SmartScale> and draw his/her own conclusions,
+means of quality improvement.  Readers are invited to peruse the research at
+<http://www.libjpeg-turbo.org/About/SmartScale> and draw their own conclusions,
 but it is the general belief of our project that these features have not
 demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.
 
@@ -287,12 +287,13 @@ following reasons:
   (and slightly faster) floating point IDCT algorithm introduced in libjpeg
   v8a as opposed to the algorithm used in libjpeg v6b.  It should be noted,
   however, that this algorithm basically brings the accuracy of the floating
-  point IDCT in line with the accuracy of the slow integer IDCT.  The floating
-  point DCT/IDCT algorithms are mainly a legacy feature, and they do not
-  produce significantly more accuracy than the slow integer algorithms (to put
-  numbers on this, the typical difference in PNSR between the two algorithms
-  is less than 0.10 dB, whereas changing the quality level by 1 in the upper
-  range of the quality scale is typically more like a 1.0 dB difference.)
+  point IDCT in line with the accuracy of the accurate integer IDCT.  The
+  floating point DCT/IDCT algorithms are mainly a legacy feature, and they do
+  not produce significantly more accuracy than the accurate integer algorithms
+  (to put numbers on this, the typical difference in PNSR between the two
+  algorithms is less than 0.10 dB, whereas changing the quality level by 1 in
+  the upper range of the quality scale is typically more like a 1.0 dB
+  difference.)
 
 - If the floating point algorithms in libjpeg-turbo are not implemented using
   SIMD instructions on a particular platform, then the accuracy of the
@@ -340,7 +341,7 @@ The algorithm used by the SIMD-accelerated quantization function cannot produce
 correct results whenever the fast integer forward DCT is used along with a JPEG
 quality of 98-100.  Thus, libjpeg-turbo must use the non-SIMD quantization
 function in those cases.  This causes performance to drop by as much as 40%.
-It is therefore strongly advised that you use the slow integer forward DCT
+It is therefore strongly advised that you use the accurate integer forward DCT
 whenever encoding images with a JPEG quality of 98 or higher.
 
 
diff --git a/3rdparty/libjpeg-turbo/src/jchuff.c b/3rdparty/libjpeg-turbo/src/jchuff.c
index cb05055d99..db85ce114f 100644
--- a/3rdparty/libjpeg-turbo/src/jchuff.c
+++ b/3rdparty/libjpeg-turbo/src/jchuff.c
@@ -34,10 +34,10 @@
  * memory footprint by 64k, which is important for some mobile applications
  * that create many isolated instances of libjpeg-turbo (web browsers, for
  * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on ARM processors, because some x86
+ * This feature is enabled by default only on Arm processors, because some x86
  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
  * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for ARMv6, you can
+ * have a fast implementation of it.  When building for Armv6, you can
  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
  * flags (this defines __thumb__).
  */
diff --git a/3rdparty/libjpeg-turbo/src/jcinit.c b/3rdparty/libjpeg-turbo/src/jcinit.c
index 78aa465786..157353a22e 100644
--- a/3rdparty/libjpeg-turbo/src/jcinit.c
+++ b/3rdparty/libjpeg-turbo/src/jcinit.c
@@ -1,8 +1,10 @@
 /*
  * jcinit.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,6 +21,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /*
diff --git a/3rdparty/libjpeg-turbo/src/jcphuff.c b/3rdparty/libjpeg-turbo/src/jcphuff.c
index 8c4efaf16c..a8b94bed84 100644
--- a/3rdparty/libjpeg-turbo/src/jcphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jcphuff.c
@@ -43,10 +43,10 @@
  * memory footprint by 64k, which is important for some mobile applications
  * that create many isolated instances of libjpeg-turbo (web browsers, for
  * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on ARM processors, because some x86
+ * This feature is enabled by default only on Arm processors, because some x86
  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
  * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for ARMv6, you can
+ * have a fast implementation of it.  When building for Armv6, you can
  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
  * flags (this defines __thumb__).
  */
diff --git a/3rdparty/libjpeg-turbo/src/jctrans.c b/3rdparty/libjpeg-turbo/src/jctrans.c
index ce70a30940..ab6a2186db 100644
--- a/3rdparty/libjpeg-turbo/src/jctrans.c
+++ b/3rdparty/libjpeg-turbo/src/jctrans.c
@@ -4,8 +4,8 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1998, Thomas G. Lane.
  * Modified 2000-2009 by Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -17,6 +17,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Forward declarations */
diff --git a/3rdparty/libjpeg-turbo/src/jdapistd.c b/3rdparty/libjpeg-turbo/src/jdapistd.c
index 2c808fa564..38bd1110d9 100644
--- a/3rdparty/libjpeg-turbo/src/jdapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jdapistd.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2018, D. R. Commander.
+ * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -21,6 +21,8 @@
 #include "jinclude.h"
 #include "jdmainct.h"
 #include "jdcoefct.h"
+#include "jdmaster.h"
+#include "jdmerge.h"
 #include "jdsample.h"
 #include "jmemsys.h"
 
@@ -316,6 +318,8 @@ LOCAL(void)
 read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   JDIMENSION n;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+  JSAMPARRAY scanlines = NULL;
   void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                          JDIMENSION input_row, JSAMPARRAY output_buf,
                          int num_rows) = NULL;
@@ -332,8 +336,13 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
     cinfo->cquantize->color_quantize = noop_quantize;
   }
 
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+    scanlines = &upsample->spare_row;
+  }
+
   for (n = 0; n < num_lines; n++)
-    jpeg_read_scanlines(cinfo, NULL, 1);
+    jpeg_read_scanlines(cinfo, scanlines, 1);
 
   if (color_convert)
     cinfo->cconvert->color_convert = color_convert;
@@ -353,6 +362,12 @@ increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows)
 {
   JDIMENSION rows_left;
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    read_and_discard_scanlines(cinfo, rows);
+    return;
+  }
 
   /* Increment the counter to the next row group after the skipped rows. */
   main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor;
@@ -382,21 +397,27 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
   my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
   JDIMENSION i, x;
   int y;
   JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
   JDIMENSION lines_to_skip, lines_to_read;
 
+  /* Two-pass color quantization is not supported. */
+  if (cinfo->quantize_colors && cinfo->two_pass_quantize)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   if (cinfo->global_state != DSTATE_SCANNING)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   /* Do not skip past the bottom of the image. */
   if (cinfo->output_scanline + num_lines >= cinfo->output_height) {
+    num_lines = cinfo->output_height - cinfo->output_scanline;
     cinfo->output_scanline = cinfo->output_height;
     (*cinfo->inputctl->finish_input_pass) (cinfo);
     cinfo->inputctl->eoi_reached = TRUE;
-    return cinfo->output_height - cinfo->output_scanline;
+    return num_lines;
   }
 
   if (num_lines == 0)
@@ -445,8 +466,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
     main_ptr->buffer_full = FALSE;
     main_ptr->rowgroup_ctr = 0;
     main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
-    upsample->next_row_out = cinfo->max_v_samp_factor;
-    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    if (!master->using_merged_upsample) {
+      upsample->next_row_out = cinfo->max_v_samp_factor;
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    }
   }
 
   /* Skipping is much simpler when context rows are not required. */
@@ -458,8 +481,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
       cinfo->output_scanline += lines_left_in_iMCU_row;
       main_ptr->buffer_full = FALSE;
       main_ptr->rowgroup_ctr = 0;
-      upsample->next_row_out = cinfo->max_v_samp_factor;
-      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+      if (!master->using_merged_upsample) {
+        upsample->next_row_out = cinfo->max_v_samp_factor;
+        upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+      }
     }
   }
 
@@ -494,7 +519,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
       cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
       increment_simple_rowgroup_ctr(cinfo, lines_to_read);
     }
-    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    if (!master->using_merged_upsample)
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
     return num_lines;
   }
 
@@ -535,7 +561,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
    * bit odd, since "rows_to_go" seems to be redundantly keeping track of
    * output_scanline.
    */
-  upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+  if (!master->using_merged_upsample)
+    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
 
   /* Always skip the requested number of lines. */
   return num_lines;
diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.c b/3rdparty/libjpeg-turbo/src/jdcoefct.c
index 723a9ac2be..2ba6aa11e4 100644
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.c
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.c
@@ -6,7 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2010, 2015-2016, D. R. Commander.
- * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, 2020, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -495,11 +495,13 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
       if (first_row && block_row == 0)
         prev_block_row = buffer_ptr;
       else
-        prev_block_row = buffer[block_row - 1];
+        prev_block_row = buffer[block_row - 1] +
+                         cinfo->master->first_MCU_col[ci];
       if (last_row && block_row == block_rows - 1)
         next_block_row = buffer_ptr;
       else
-        next_block_row = buffer[block_row + 1];
+        next_block_row = buffer[block_row + 1] +
+                         cinfo->master->first_MCU_col[ci];
       /* We fetch the surrounding DC values using a sliding-register approach.
        * Initialize all nine here so as to do the right thing on narrow pics.
        */
diff --git a/3rdparty/libjpeg-turbo/src/jdcolor.c b/3rdparty/libjpeg-turbo/src/jdcolor.c
index dc0e3b6c0e..d3ae40c7da 100644
--- a/3rdparty/libjpeg-turbo/src/jdcolor.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolor.c
@@ -571,11 +571,10 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  * RGB565 conversion
  */
 
-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
 
 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
diff --git a/3rdparty/libjpeg-turbo/src/jdmerge.c b/3rdparty/libjpeg-turbo/src/jdmerge.c
index dff5a35087..3a456d6581 100644
--- a/3rdparty/libjpeg-turbo/src/jdmerge.c
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2020, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -40,41 +40,13 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jdmerge.h"
 #include "jsimd.h"
 #include "jconfigint.h"
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
 
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;    /* public fields */
-
-  /* Pointer to routine to do actual upsampling/conversion of one row group */
-  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
-
-  /* Private state for YCC->RGB conversion */
-  int *Cr_r_tab;                /* => table for Cr to R conversion */
-  int *Cb_b_tab;                /* => table for Cb to B conversion */
-  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
-  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
-
-  /* For 2:1 vertical sampling, we produce two output rows at a time.
-   * We need a "spare" row buffer to hold the second output row if the
-   * application provides just a one-row buffer; we also use the spare
-   * to discard the dummy last row if the image height is odd.
-   */
-  JSAMPROW spare_row;
-  boolean spare_full;           /* T if spare buffer is occupied */
-
-  JDIMENSION out_row_width;     /* samples per output row */
-  JDIMENSION rows_to_go;        /* counts rows remaining in image */
-} my_upsampler;
-
-typedef my_upsampler *my_upsample_ptr;
-
 #define SCALEBITS       16      /* speediest right-shift on some machines */
 #define ONE_HALF        ((JLONG)1 << (SCALEBITS - 1))
 #define FIX(x)          ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
@@ -189,7 +161,7 @@ typedef my_upsampler *my_upsample_ptr;
 LOCAL(void)
 build_ycc_rgb_table(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   int i;
   JLONG x;
   SHIFT_TEMPS
@@ -232,7 +204,7 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
 METHODDEF(void)
 start_pass_merged_upsample(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
 
   /* Mark the spare buffer empty */
   upsample->spare_full = FALSE;
@@ -254,7 +226,7 @@ merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 2:1 vertical sampling case: may need a spare row. */
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   JSAMPROW work_ptrs[2];
   JDIMENSION num_rows;          /* number of rows returned to caller */
 
@@ -305,7 +277,7 @@ merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 1:1 vertical sampling case: much easier, never need a spare row. */
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
 
   /* Just do the upsampling. */
   (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
@@ -420,11 +392,10 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  * RGB565 conversion
  */
 
-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
 
 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
@@ -566,11 +537,11 @@ h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 GLOBAL(void)
 jinit_merged_upsampler(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample;
+  my_merged_upsample_ptr upsample;
 
-  upsample = (my_upsample_ptr)
+  upsample = (my_merged_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                sizeof(my_upsampler));
+                                sizeof(my_merged_upsampler));
   cinfo->upsample = (struct jpeg_upsampler *)upsample;
   upsample->pub.start_pass = start_pass_merged_upsample;
   upsample->pub.need_context_rows = FALSE;
diff --git a/3rdparty/libjpeg-turbo/src/jdmerge.h b/3rdparty/libjpeg-turbo/src/jdmerge.h
new file mode 100644
index 0000000000..b583396b10
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.h
@@ -0,0 +1,47 @@
+/*
+ * jdmerge.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;    /* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+
+  /* Private state for YCC->RGB conversion */
+  int *Cr_r_tab;                /* => table for Cr to R conversion */
+  int *Cb_b_tab;                /* => table for Cb to B conversion */
+  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
+  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;           /* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;     /* samples per output row */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
+} my_merged_upsampler;
+
+typedef my_merged_upsampler *my_merged_upsample_ptr;
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jdmrg565.c b/3rdparty/libjpeg-turbo/src/jdmrg565.c
index 1b87e3718d..53f1e16700 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrg565.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrg565.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014-2015, 2018, D. R. Commander.
+ * Copyright (C) 2014-2015, 2018, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,7 +19,7 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                   JDIMENSION in_row_group_ctr,
                                   JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr;
@@ -90,7 +90,7 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
                                    JDIMENSION in_row_group_ctr,
                                    JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr;
@@ -163,7 +163,7 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                   JDIMENSION in_row_group_ctr,
                                   JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr0, outptr1;
@@ -259,7 +259,7 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
                                    JDIMENSION in_row_group_ctr,
                                    JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr0, outptr1;
diff --git a/3rdparty/libjpeg-turbo/src/jdmrgext.c b/3rdparty/libjpeg-turbo/src/jdmrgext.c
index b1c27df56a..c9a44d8219 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrgext.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrgext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -25,7 +25,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                               JDIMENSION in_row_group_ctr,
                               JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr;
@@ -97,7 +97,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                               JDIMENSION in_row_group_ctr,
                               JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr0, outptr1;
diff --git a/3rdparty/libjpeg-turbo/src/jdtrans.c b/3rdparty/libjpeg-turbo/src/jdtrans.c
index 56713efe64..d7ec4b83b3 100644
--- a/3rdparty/libjpeg-turbo/src/jdtrans.c
+++ b/3rdparty/libjpeg-turbo/src/jdtrans.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -16,6 +16,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Forward declarations */
diff --git a/3rdparty/libjpeg-turbo/src/jfdctint.c b/3rdparty/libjpeg-turbo/src/jfdctint.c
index b47c3061ac..c95a3a7fb8 100644
--- a/3rdparty/libjpeg-turbo/src/jfdctint.c
+++ b/3rdparty/libjpeg-turbo/src/jfdctint.c
@@ -4,11 +4,11 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
- * This file contains a slow-but-accurate integer implementation of the
+ * This file contains a slower but more accurate integer implementation of the
  * forward DCT (Discrete Cosine Transform).
  *
  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
diff --git a/3rdparty/libjpeg-turbo/src/jidctint.c b/3rdparty/libjpeg-turbo/src/jidctint.c
index 98425d5fd0..50f385da33 100644
--- a/3rdparty/libjpeg-turbo/src/jidctint.c
+++ b/3rdparty/libjpeg-turbo/src/jidctint.c
@@ -5,11 +5,11 @@
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modification developed 2002-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
- * This file contains a slow-but-accurate integer implementation of the
+ * This file contains a slower but more accurate integer implementation of the
  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
  * must also perform dequantization of the input coefficients.
  *
diff --git a/3rdparty/libjpeg-turbo/src/jmorecfg.h b/3rdparty/libjpeg-turbo/src/jmorecfg.h
index d0b930079a..aa29f0f9f1 100644
--- a/3rdparty/libjpeg-turbo/src/jmorecfg.h
+++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, 2014-2015, 2018, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -273,9 +273,9 @@ typedef int boolean;
 
 /* Capability options common to encoder and decoder: */
 
-#define DCT_ISLOW_SUPPORTED     /* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED     /* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED     /* floating-point: accurate, fast on fast HW */
+#define DCT_ISLOW_SUPPORTED     /* accurate integer method */
+#define DCT_IFAST_SUPPORTED     /* less accurate int method [legacy feature] */
+#define DCT_FLOAT_SUPPORTED     /* floating-point method [legacy feature] */
 
 /* Encoder capability options: */
 
diff --git a/3rdparty/libjpeg-turbo/src/jpegcomp.h b/3rdparty/libjpeg-turbo/src/jpegcomp.h
index b32d544bf1..c4834ac0df 100644
--- a/3rdparty/libjpeg-turbo/src/jpegcomp.h
+++ b/3rdparty/libjpeg-turbo/src/jpegcomp.h
@@ -1,7 +1,7 @@
 /*
  * jpegcomp.h
  *
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,6 +19,7 @@
 #define _min_DCT_v_scaled_size  min_DCT_v_scaled_size
 #define _jpeg_width  jpeg_width
 #define _jpeg_height  jpeg_height
+#define JERR_ARITH_NOTIMPL  JERR_NOT_COMPILED
 #else
 #define _DCT_scaled_size  DCT_scaled_size
 #define _DCT_h_scaled_size  DCT_scaled_size
diff --git a/3rdparty/libjpeg-turbo/src/jpeglib.h b/3rdparty/libjpeg-turbo/src/jpeglib.h
index 33f8ad2791..d7664f0630 100644
--- a/3rdparty/libjpeg-turbo/src/jpeglib.h
+++ b/3rdparty/libjpeg-turbo/src/jpeglib.h
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2002-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2013-2014, 2016-2017, D. R. Commander.
+ * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -244,9 +244,9 @@ typedef enum {
 /* DCT/IDCT algorithm options. */
 
 typedef enum {
-  JDCT_ISLOW,             /* slow but accurate integer algorithm */
-  JDCT_IFAST,             /* faster, less accurate integer method */
-  JDCT_FLOAT              /* floating-point: accurate, fast on fast HW */
+  JDCT_ISLOW,             /* accurate integer method */
+  JDCT_IFAST,             /* less accurate integer method [legacy feature] */
+  JDCT_FLOAT              /* floating-point method [legacy feature] */
 } J_DCT_METHOD;
 
 #ifndef JDCT_DEFAULT            /* may be overridden in jconfig.h */
diff --git a/3rdparty/libjpeg-turbo/src/jquant2.c b/3rdparty/libjpeg-turbo/src/jquant2.c
index 0ce0ca5472..6570613bb9 100644
--- a/3rdparty/libjpeg-turbo/src/jquant2.c
+++ b/3rdparty/libjpeg-turbo/src/jquant2.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2014-2015, D. R. Commander.
+ * Copyright (C) 2009, 2014-2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -1145,7 +1145,7 @@ start_pass_2_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
   int i;
 
   /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
+  /* If user asks for ordered dither, give them F-S. */
   if (cinfo->dither_mode != JDITHER_NONE)
     cinfo->dither_mode = JDITHER_FS;
 
@@ -1263,7 +1263,7 @@ jinit_2pass_quantizer(j_decompress_ptr cinfo)
     cquantize->sv_colormap = NULL;
 
   /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
+  /* If user asks for ordered dither, give them F-S. */
   if (cinfo->dither_mode != JDITHER_NONE)
     cinfo->dither_mode = JDITHER_FS;
 
diff --git a/3rdparty/libjpeg-turbo/src/jversion.h b/3rdparty/libjpeg-turbo/src/jversion.h
index ab4a2c5703..4462b94104 100644
--- a/3rdparty/libjpeg-turbo/src/jversion.h
+++ b/3rdparty/libjpeg-turbo/src/jversion.h
@@ -30,23 +30,25 @@
  * NOTE: It is our convention to place the authors in the following order:
  * - libjpeg-turbo authors (2009-) in descending order of the date of their
  *   most recent contribution to the project, then in ascending order of the
- *   date of their first contribution to the project
+ *   date of their first contribution to the project, then in alphabetical
+ *   order
  * - Upstream authors in descending order of the date of the first inclusion of
  *   their code
  */
 
 #define JCOPYRIGHT \
   "Copyright (C) 2009-2020 D. R. Commander\n" \
-  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
+  "Copyright (C) 2015, 2020 Google, Inc.\n" \
+  "Copyright (C) 2019 Arm Limited\n" \
   "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
+  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
   "Copyright (C) 2015 Intel Corporation\n" \
-  "Copyright (C) 2015 Google, Inc.\n" \
+  "Copyright (C) 2013-2014 Linaro Limited\n" \
   "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
-  "Copyright (C) 2013 Linaro Limited\n" \
+  "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
   "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
-  "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
   "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-  "Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding"
+  "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"
 
 #define JCOPYRIGHT_SHORT \
   "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
diff --git a/3rdparty/libjpeg/CMakeLists.txt b/3rdparty/libjpeg/CMakeLists.txt
index b50fc09840..c0524cc38a 100644
--- a/3rdparty/libjpeg/CMakeLists.txt
+++ b/3rdparty/libjpeg/CMakeLists.txt
@@ -19,7 +19,7 @@ endif()
 #         Define the library target:
 # ----------------------------------------------------------------------------------
 
-add_library(${JPEG_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
+add_library(${JPEG_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs})
 
 if(CV_GCC OR CV_CLANG)
   set_source_files_properties(jcdctmgr.c PROPERTIES COMPILE_FLAGS "-O1")
@@ -42,7 +42,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${JPEG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${JPEG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(libjpeg README)
diff --git a/3rdparty/libpng/CMakeLists.txt b/3rdparty/libpng/CMakeLists.txt
index 31e77676e8..efa59627eb 100644
--- a/3rdparty/libpng/CMakeLists.txt
+++ b/3rdparty/libpng/CMakeLists.txt
@@ -74,7 +74,7 @@ if(MSVC)
   add_definitions(-D_CRT_SECURE_NO_DEPRECATE)
 endif(MSVC)
 
-add_library(${PNG_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
+add_library(${PNG_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs})
 target_link_libraries(${PNG_LIBRARY} ${ZLIB_LIBRARIES})
 
 ocv_warnings_disable(CMAKE_C_FLAGS -Wundef -Wcast-align -Wimplicit-fallthrough -Wunused-parameter -Wsign-compare)
@@ -92,7 +92,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${PNG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${PNG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(libpng LICENSE README)
diff --git a/3rdparty/libtiff/CMakeLists.txt b/3rdparty/libtiff/CMakeLists.txt
index 16cb598955..61e40b2885 100644
--- a/3rdparty/libtiff/CMakeLists.txt
+++ b/3rdparty/libtiff/CMakeLists.txt
@@ -462,7 +462,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4456 /wd4457 /wd4312) # vs2015
 
 ocv_warnings_disable(CMAKE_C_FLAGS /wd4267 /wd4244 /wd4018 /wd4311 /wd4312)
 
-add_library(${TIFF_LIBRARY} STATIC ${lib_srcs})
+add_library(${TIFF_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs})
 target_link_libraries(${TIFF_LIBRARY} ${ZLIB_LIBRARIES})
 
 set_target_properties(${TIFF_LIBRARY}
@@ -479,7 +479,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${TIFF_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${TIFF_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(libtiff COPYRIGHT)
diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt
index 83884c9d4d..80ab0b86ab 100644
--- a/3rdparty/libwebp/CMakeLists.txt
+++ b/3rdparty/libwebp/CMakeLists.txt
@@ -34,7 +34,7 @@ endif()
 
 add_definitions(-DWEBP_USE_THREAD)
 
-add_library(${WEBP_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
+add_library(${WEBP_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs})
 if(ANDROID)
   target_link_libraries(${WEBP_LIBRARY} ${CPUFEATURES_LIBRARIES})
 endif()
@@ -59,6 +59,6 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${WEBP_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${WEBP_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
diff --git a/3rdparty/openexr/CMakeLists.txt b/3rdparty/openexr/CMakeLists.txt
index 2ee5146a3d..8d10e7d968 100644
--- a/3rdparty/openexr/CMakeLists.txt
+++ b/3rdparty/openexr/CMakeLists.txt
@@ -109,6 +109,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow -Wunused -Wsign-compare -Wundef -W
                                      -Wmissing-prototypes  # gcc/clang
                                      -Wreorder
                                      -Wunused-result
+                                     -Wimplicit-const-int-float-conversion  # clang
 )
 if(CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
   ocv_warnings_disable(CMAKE_CXX_FLAGS -Wclass-memaccess)
@@ -125,7 +126,7 @@ if(MSVC AND CV_ICC)
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qrestrict")
 endif()
 
-add_library(IlmImf STATIC ${lib_hdrs} ${lib_srcs})
+add_library(IlmImf STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_hdrs} ${lib_srcs})
 target_link_libraries(IlmImf ${ZLIB_LIBRARIES})
 
 set_target_properties(IlmImf
@@ -142,7 +143,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(IlmImf EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(IlmImf EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(openexr LICENSE AUTHORS.ilmbase AUTHORS.openexr)
diff --git a/3rdparty/openjpeg/CMakeLists.txt b/3rdparty/openjpeg/CMakeLists.txt
index ec15bba850..b38bf28f05 100644
--- a/3rdparty/openjpeg/CMakeLists.txt
+++ b/3rdparty/openjpeg/CMakeLists.txt
@@ -11,6 +11,10 @@ set(OPENJPEG_LIBRARY_NAME libopenjp2)
 
 project(openjpeg C)
 
+ocv_warnings_disable(CMAKE_C_FLAGS
+    -Wimplicit-const-int-float-conversion  # clang
+)
+
 #-----------------------------------------------------------------------------
 # OPENJPEG version number, useful for packaging and doxygen doc:
 set(OPENJPEG_VERSION_MAJOR 2)
diff --git a/3rdparty/protobuf/CMakeLists.txt b/3rdparty/protobuf/CMakeLists.txt
index 26d6523988..c71bf9faff 100644
--- a/3rdparty/protobuf/CMakeLists.txt
+++ b/3rdparty/protobuf/CMakeLists.txt
@@ -140,7 +140,8 @@ append_if_exist(Protobuf_SRCS
   ${PROTOBUF_ROOT}/src/google/protobuf/wrappers.pb.cc
 )
 
-add_library(libprotobuf STATIC ${Protobuf_SRCS})
+include_directories(BEFORE "${PROTOBUF_ROOT}/src")  # ensure using if own headers: https://github.com/opencv/opencv/issues/13328
+add_library(libprotobuf STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${Protobuf_SRCS})
 target_include_directories(libprotobuf SYSTEM PUBLIC $<BUILD_INTERFACE:${PROTOBUF_ROOT}/src>)
 set_target_properties(libprotobuf
     PROPERTIES
@@ -156,7 +157,7 @@ get_protobuf_version(Protobuf_VERSION "${PROTOBUF_ROOT}/src")
 set(Protobuf_VERSION ${Protobuf_VERSION} CACHE INTERNAL "" FORCE)
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(libprotobuf EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(libprotobuf EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(protobuf LICENSE README.md)
diff --git a/3rdparty/quirc/CMakeLists.txt b/3rdparty/quirc/CMakeLists.txt
index 7a6b2bb222..c0464c16ae 100644
--- a/3rdparty/quirc/CMakeLists.txt
+++ b/3rdparty/quirc/CMakeLists.txt
@@ -8,7 +8,7 @@ ocv_include_directories(${CURR_INCLUDE_DIR})
 file(GLOB_RECURSE quirc_headers RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "include/*.h")
 file(GLOB_RECURSE quirc_sources RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "src/*.c")
 
-add_library(${PROJECT_NAME} STATIC ${quirc_headers} ${quirc_sources})
+add_library(${PROJECT_NAME} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${quirc_headers} ${quirc_sources})
 ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-variable -Wshadow)
 
 set_target_properties(${PROJECT_NAME}
@@ -24,7 +24,7 @@ if(ENABLE_SOLUTION_FOLDERS)
 endif()
 
 if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(${PROJECT_NAME} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+  ocv_install_target(${PROJECT_NAME} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL)
 endif()
 
 ocv_install_3rdparty_licenses(${PROJECT_NAME} LICENSE)
diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt
index 2aa9127da0..a085b0f3ca 100644
--- a/3rdparty/tbb/CMakeLists.txt
+++ b/3rdparty/tbb/CMakeLists.txt
@@ -108,7 +108,7 @@ set(tbb_version_file "version_string.ver")
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${tbb_version_file}.cmakein" "${CMAKE_CURRENT_BINARY_DIR}/${tbb_version_file}" @ONLY)
 list(APPEND TBB_SOURCE_FILES "${CMAKE_CURRENT_BINARY_DIR}/${tbb_version_file}")
 
-add_library(tbb ${TBB_SOURCE_FILES})
+add_library(tbb ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${TBB_SOURCE_FILES})
 target_compile_definitions(tbb PUBLIC
     TBB_USE_GCC_BUILTINS=1
     __TBB_GCC_BUILTIN_ATOMICS_PRESENT=1
@@ -165,6 +165,7 @@ ocv_install_target(tbb EXPORT OpenCVModules
     RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs
     LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT libs
     ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev
+    OPTIONAL
     )
 
 ocv_install_3rdparty_licenses(tbb "${tbb_src_dir}/LICENSE" "${tbb_src_dir}/README")
diff --git a/3rdparty/zlib/CMakeLists.txt b/3rdparty/zlib/CMakeLists.txt
index 553700bacc..9758861a6b 100644
--- a/3rdparty/zlib/CMakeLists.txt
+++ b/3rdparty/zlib/CMakeLists.txt
@@ -76,7 +76,7 @@ set(ZLIB_SRCS
     zutil.c
 )
 
-add_library(${ZLIB_LIBRARY} STATIC ${ZLIB_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+add_library(${ZLIB_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${ZLIB_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
 set_target_properties(${ZLIB_LIBRARY} PROPERTIES DEFINE_SYMBOL ZLIB_DLL)
 
 ocv_warnings_disable(CMAKE_C_FLAGS -Wshorten-64-to-32 -Wattributes -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wshift-negative-value
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cd0341aea2..487efd5f7e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -368,6 +368,9 @@ OCV_OPTION(WITH_MSMF_DXVA "Enable hardware acceleration in Media Foundation back
 OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
   VISIBLE_IF NOT ANDROID AND NOT WINRT
   VERIFY HAVE_XIMEA)
+OCV_OPTION(WITH_UEYE "Include UEYE camera support" OFF
+  VISIBLE_IF NOT ANDROID AND NOT APPLE AND NOT WINRT
+  VERIFY HAVE_UEYE)
 OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF
   VISIBLE_IF UNIX AND NOT APPLE AND NOT ANDROID
   VERIFY HAVE_XINE)
@@ -440,6 +443,9 @@ OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)"
 OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF
   VISIBLE_IF (ARM OR AARCH64) AND (UNIX OR ANDROID) AND NOT IOS
   VERIFY HAVE_TENGINE)
+OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF
+  VISIBLE_IF TRUE
+  VERIFY HAVE_ONNX)
 
 # OpenCV build components
 # ===================================================
@@ -782,6 +788,11 @@ if(WITH_QUIRC)
   add_subdirectory(3rdparty/quirc)
   set(HAVE_QUIRC TRUE)
 endif()
+
+if(WITH_ONNX)
+  include(cmake/FindONNX.cmake)
+endif()
+
 # ----------------------------------------------------------------------------
 # OpenCV HAL
 # ----------------------------------------------------------------------------
@@ -1383,6 +1394,10 @@ if(WITH_XIMEA OR HAVE_XIMEA)
   status("    XIMEA:" HAVE_XIMEA THEN YES ELSE NO)
 endif()
 
+if(WITH_UEYE OR HAVE_UEYE)
+  status("    uEye:" HAVE_UEYE THEN YES ELSE NO)
+endif()
+
 if(WITH_XINE OR HAVE_XINE)
   status("    Xine:"           HAVE_XINE           THEN "YES (ver ${XINE_VERSION})"     ELSE NO)
 endif()
@@ -1567,6 +1582,15 @@ if(WITH_OPENCL OR HAVE_OPENCL)
   endif()
 endif()
 
+if(WITH_ONNX OR HAVE_ONNX)
+  status("")
+  status("  ONNX:"     HAVE_ONNX THEN "YES" ELSE "NO")
+  if(HAVE_ONNX)
+    status("    Include path:"  ONNX_INCLUDE_DIR THEN "${ONNX_INCLUDE_DIR}" ELSE "NO")
+    status("    Link libraries:" ONNX_LIBRARIES THEN "${ONNX_LIBRARIES}" ELSE "NO")
+  endif()
+endif()
+
 # ========================== python ==========================
 if(BUILD_opencv_python2)
   status("")
diff --git a/apps/interactive-calibration/parametersController.cpp b/apps/interactive-calibration/parametersController.cpp
index c76b915c63..3bcf5b86e9 100644
--- a/apps/interactive-calibration/parametersController.cpp
+++ b/apps/interactive-calibration/parametersController.cpp
@@ -32,7 +32,7 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName)
 
     if(!reader.isOpened()) {
         std::cerr << "Warning: Unable to open " << inputFileName <<
-                     " Applicatioin stated with default advanced parameters" << std::endl;
+                     " Application started with default advanced parameters" << std::endl;
         return true;
     }
 
diff --git a/cmake/FindONNX.cmake b/cmake/FindONNX.cmake
new file mode 100644
index 0000000000..56dd6d5098
--- /dev/null
+++ b/cmake/FindONNX.cmake
@@ -0,0 +1,36 @@
+ocv_clear_vars(HAVE_ONNX)
+
+set(ONNXRT_ROOT_DIR "" CACHE PATH "ONNX Runtime install directory")
+
+# For now, check the old name ORT_INSTALL_DIR
+if(ORT_INSTALL_DIR AND NOT ONNXRT_ROOT_DIR)
+  set(ONNXRT_ROOT_DIR ${ORT_INSTALL_DIR})
+endif()
+
+if(ONNXRT_ROOT_DIR)
+  find_library(ORT_LIB onnxruntime
+    ${ONNXRT_ROOT_DIR}/lib
+    CMAKE_FIND_ROOT_PATH_BOTH)
+  find_path(ORT_INCLUDE onnxruntime_cxx_api.h
+    ${ONNXRT_ROOT_DIR}/include/onnxruntime/core/session
+    CMAKE_FIND_ROOT_PATH_BOTH)
+endif()
+
+if(ORT_LIB AND ORT_INCLUDE)
+  set(HAVE_ONNX TRUE)
+  # For CMake output only
+  set(ONNX_LIBRARIES "${ORT_LIB}" CACHE STRING "ONNX Runtime libraries")
+  set(ONNX_INCLUDE_DIR "${ORT_INCLUDE}" CACHE STRING "ONNX Runtime include path")
+
+  # Link target with associated interface headers
+  set(ONNX_LIBRARY "onnxruntime" CACHE STRING "ONNX Link Target")
+  ocv_add_library(${ONNX_LIBRARY} SHARED IMPORTED)
+  set_target_properties(${ONNX_LIBRARY} PROPERTIES
+                        INTERFACE_INCLUDE_DIRECTORIES ${ORT_INCLUDE}
+                        IMPORTED_LOCATION ${ORT_LIB}
+                        IMPORTED_IMPLIB ${ORT_LIB})
+endif()
+
+if(NOT HAVE_ONNX)
+  ocv_clear_vars(HAVE_ONNX ORT_LIB ORT_INCLUDE_DIR)
+endif()
diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index 080c78c547..929c5b5e51 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -153,7 +153,7 @@ if(CV_GCC OR CV_CLANG)
     if(CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
       add_extra_compiler_option(-Wno-missing-field-initializers)  # GCC 4.x emits warnings about {}, fixed in GCC 5+
     endif()
-    if(CV_CLANG AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.0)
+    if(CV_CLANG AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.0)
       add_extra_compiler_option(-Wno-deprecated-enum-enum-conversion)
       add_extra_compiler_option(-Wno-deprecated-anon-enum-enum-conversion)
     endif()
diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake
index a120ca0cef..d12a9e68ea 100644
--- a/cmake/OpenCVDetectCUDA.cmake
+++ b/cmake/OpenCVDetectCUDA.cmake
@@ -208,7 +208,7 @@ if(CUDA_FOUND)
 
       if(${status} EQUAL 0)
         # cache detected values
-        set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${result_list}} CACHE INTERNAL "")
+        set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${output}} CACHE INTERNAL "")
         set(OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check}" CACHE INTERNAL "")
       endif()
     endif()
diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake
index 63ef8f0b8f..c838a40409 100644
--- a/cmake/OpenCVDetectInferenceEngine.cmake
+++ b/cmake/OpenCVDetectInferenceEngine.cmake
@@ -129,9 +129,9 @@ endif()
 
 if(INF_ENGINE_TARGET)
   if(NOT INF_ENGINE_RELEASE)
-    message(WARNING "InferenceEngine version has not been set, 2020.4 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
+    message(WARNING "InferenceEngine version has not been set, 2021.1 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
   endif()
-  set(INF_ENGINE_RELEASE "2020040000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
+  set(INF_ENGINE_RELEASE "2021010000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
   set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
     INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
   )
diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake
index 22e20b6b79..28aa47ba9c 100644
--- a/cmake/OpenCVFindLibsGrfmt.cmake
+++ b/cmake/OpenCVFindLibsGrfmt.cmake
@@ -15,11 +15,12 @@ else()
 endif()
 
 if(NOT ZLIB_FOUND)
-  ocv_clear_vars(ZLIB_LIBRARY ZLIB_LIBRARIES ZLIB_INCLUDE_DIRS)
+  ocv_clear_vars(ZLIB_LIBRARY ZLIB_LIBRARIES ZLIB_INCLUDE_DIR)
 
-  set(ZLIB_LIBRARY zlib)
+  set(ZLIB_LIBRARY zlib CACHE INTERNAL "")
   add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/zlib")
-  set(ZLIB_INCLUDE_DIRS "${${ZLIB_LIBRARY}_SOURCE_DIR}" "${${ZLIB_LIBRARY}_BINARY_DIR}")
+  set(ZLIB_INCLUDE_DIR "${${ZLIB_LIBRARY}_SOURCE_DIR}" "${${ZLIB_LIBRARY}_BINARY_DIR}" CACHE INTERNAL "")
+  set(ZLIB_INCLUDE_DIRS ${ZLIB_INCLUDE_DIR})
   set(ZLIB_LIBRARIES ${ZLIB_LIBRARY})
 
   ocv_parse_header2(ZLIB "${${ZLIB_LIBRARY}_SOURCE_DIR}/zlib.h" ZLIB_VERSION)
@@ -37,16 +38,17 @@ if(WITH_JPEG)
     ocv_clear_vars(JPEG_LIBRARY JPEG_LIBRARIES JPEG_INCLUDE_DIR)
 
     if(NOT BUILD_JPEG_TURBO_DISABLE)
-      set(JPEG_LIBRARY libjpeg-turbo)
+      set(JPEG_LIBRARY libjpeg-turbo CACHE INTERNAL "")
       set(JPEG_LIBRARIES ${JPEG_LIBRARY})
       add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libjpeg-turbo")
-      set(JPEG_INCLUDE_DIR "${${JPEG_LIBRARY}_SOURCE_DIR}/src")
+      set(JPEG_INCLUDE_DIR "${${JPEG_LIBRARY}_SOURCE_DIR}/src" CACHE INTERNAL "")
     else()
-      set(JPEG_LIBRARY libjpeg)
+      set(JPEG_LIBRARY libjpeg CACHE INTERNAL "")
       set(JPEG_LIBRARIES ${JPEG_LIBRARY})
       add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libjpeg")
-      set(JPEG_INCLUDE_DIR "${${JPEG_LIBRARY}_SOURCE_DIR}")
+      set(JPEG_INCLUDE_DIR "${${JPEG_LIBRARY}_SOURCE_DIR}" CACHE INTERNAL "")
     endif()
+    set(JPEG_INCLUDE_DIRS "${JPEG_INCLUDE_DIR}")
   endif()
 
   macro(ocv_detect_jpeg_version header_file)
@@ -83,10 +85,10 @@ if(WITH_TIFF)
   if(NOT TIFF_FOUND)
     ocv_clear_vars(TIFF_LIBRARY TIFF_LIBRARIES TIFF_INCLUDE_DIR)
 
-    set(TIFF_LIBRARY libtiff)
+    set(TIFF_LIBRARY libtiff CACHE INTERNAL "")
     set(TIFF_LIBRARIES ${TIFF_LIBRARY})
     add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libtiff")
-    set(TIFF_INCLUDE_DIR "${${TIFF_LIBRARY}_SOURCE_DIR}" "${${TIFF_LIBRARY}_BINARY_DIR}")
+    set(TIFF_INCLUDE_DIR "${${TIFF_LIBRARY}_SOURCE_DIR}" "${${TIFF_LIBRARY}_BINARY_DIR}" CACHE INTERNAL "")
     ocv_parse_header("${${TIFF_LIBRARY}_SOURCE_DIR}/tiff.h" TIFF_VERSION_LINES TIFF_VERSION_CLASSIC TIFF_VERSION_BIG TIFF_VERSION TIFF_BIGTIFF_VERSION)
   endif()
 
@@ -128,12 +130,12 @@ endif()
 if(WITH_WEBP AND NOT WEBP_FOUND
     AND (NOT ANDROID OR HAVE_CPUFEATURES)
 )
-
-  set(WEBP_LIBRARY libwebp)
+  ocv_clear_vars(WEBP_LIBRARY WEBP_INCLUDE_DIR)
+  set(WEBP_LIBRARY libwebp CACHE INTERNAL "")
   set(WEBP_LIBRARIES ${WEBP_LIBRARY})
 
   add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libwebp")
-  set(WEBP_INCLUDE_DIR "${${WEBP_LIBRARY}_SOURCE_DIR}/src")
+  set(WEBP_INCLUDE_DIR "${${WEBP_LIBRARY}_SOURCE_DIR}/src" CACHE INTERNAL "")
   set(HAVE_WEBP 1)
 endif()
 
@@ -192,10 +194,10 @@ if(WITH_JASPER AND NOT HAVE_OPENJPEG)
   if(NOT JASPER_FOUND)
     ocv_clear_vars(JASPER_LIBRARY JASPER_LIBRARIES JASPER_INCLUDE_DIR)
 
-    set(JASPER_LIBRARY libjasper)
+    set(JASPER_LIBRARY libjasper CACHE INTERNAL "")
     set(JASPER_LIBRARIES ${JASPER_LIBRARY})
     add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libjasper")
-    set(JASPER_INCLUDE_DIR "${${JASPER_LIBRARY}_SOURCE_DIR}")
+    set(JASPER_INCLUDE_DIR "${${JASPER_LIBRARY}_SOURCE_DIR}" CACHE INTERNAL "")
   endif()
 
   set(HAVE_JASPER YES)
@@ -225,10 +227,10 @@ if(WITH_PNG)
   if(NOT PNG_FOUND)
     ocv_clear_vars(PNG_LIBRARY PNG_LIBRARIES PNG_INCLUDE_DIR PNG_PNG_INCLUDE_DIR HAVE_LIBPNG_PNG_H PNG_DEFINITIONS)
 
-    set(PNG_LIBRARY libpng)
+    set(PNG_LIBRARY libpng CACHE INTERNAL "")
     set(PNG_LIBRARIES ${PNG_LIBRARY})
     add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libpng")
-    set(PNG_INCLUDE_DIR "${${PNG_LIBRARY}_SOURCE_DIR}")
+    set(PNG_INCLUDE_DIR "${${PNG_LIBRARY}_SOURCE_DIR}" CACHE INTERNAL "")
     set(PNG_DEFINITIONS "")
     ocv_parse_header("${PNG_INCLUDE_DIR}/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
   endif()
@@ -270,7 +272,7 @@ if(WITH_GDAL)
     endif()
 endif()
 
-if (WITH_GDCM)
+if(WITH_GDCM)
   find_package(GDCM QUIET)
   if(NOT GDCM_FOUND)
     set(HAVE_GDCM NO)
diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake
index b9b1a95799..3753084d28 100644
--- a/cmake/OpenCVFindLibsPerf.cmake
+++ b/cmake/OpenCVFindLibsPerf.cmake
@@ -51,7 +51,15 @@ endif(WITH_CUDA)
 
 # --- Eigen ---
 if(WITH_EIGEN AND NOT HAVE_EIGEN)
-  find_package(Eigen3 QUIET)
+  if((OPENCV_FORCE_EIGEN_FIND_PACKAGE_CONFIG
+      OR NOT (CMAKE_VERSION VERSION_LESS "3.0.0")  # Eigen3Targets.cmake required CMake 3.0.0+
+      ) AND NOT OPENCV_SKIP_EIGEN_FIND_PACKAGE_CONFIG
+  )
+    find_package(Eigen3 CONFIG QUIET)  # Ceres 2.0.0 CMake scripts doesn't work with CMake's FindEigen3.cmake module (due to missing EIGEN3_VERSION_STRING)
+  endif()
+  if(NOT Eigen3_FOUND)
+    find_package(Eigen3 QUIET)
+  endif()
 
   if(Eigen3_FOUND)
     if(TARGET Eigen3::Eigen)
diff --git a/cmake/OpenCVFindOpenBLAS.cmake b/cmake/OpenCVFindOpenBLAS.cmake
index 6cb486d95d..d1db034908 100644
--- a/cmake/OpenCVFindOpenBLAS.cmake
+++ b/cmake/OpenCVFindOpenBLAS.cmake
@@ -57,7 +57,7 @@ SET(Open_BLAS_INCLUDE_SEARCH_PATHS
 )
 
 SET(Open_BLAS_LIB_SEARCH_PATHS
-        $ENV{OpenBLAS}cd
+        $ENV{OpenBLAS}
         $ENV{OpenBLAS}/lib
         $ENV{OpenBLAS_HOME}
         $ENV{OpenBLAS_HOME}/lib
diff --git a/cmake/OpenCVGenInfoPlist.cmake b/cmake/OpenCVGenInfoPlist.cmake
index 90dd85479f..105087907f 100644
--- a/cmake/OpenCVGenInfoPlist.cmake
+++ b/cmake/OpenCVGenInfoPlist.cmake
@@ -2,7 +2,11 @@ set(OPENCV_APPLE_BUNDLE_NAME "OpenCV")
 set(OPENCV_APPLE_BUNDLE_ID "org.opencv")
 
 if(IOS)
-  if (APPLE_FRAMEWORK AND DYNAMIC_PLIST)
+  if(MAC_CATALYST)
+    # Copy the iOS plist over to the OSX directory if building iOS library for Catalyst
+    configure_file("${OpenCV_SOURCE_DIR}/platforms/ios/Info.plist.in"
+                  "${CMAKE_BINARY_DIR}/osx/Info.plist")
+  elseif(APPLE_FRAMEWORK AND DYNAMIC_PLIST)
     configure_file("${OpenCV_SOURCE_DIR}/platforms/ios/Info.Dynamic.plist.in"
                    "${CMAKE_BINARY_DIR}/ios/Info.plist")
   else()
diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index a63861e987..2ad380236c 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -1512,10 +1512,16 @@ function(ocv_add_library target)
 
     set(CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG 1)
 
+    if(IOS AND NOT MAC_CATALYST)
+      set(OPENCV_APPLE_INFO_PLIST "${CMAKE_BINARY_DIR}/ios/Info.plist")
+    else()
+      set(OPENCV_APPLE_INFO_PLIST "${CMAKE_BINARY_DIR}/osx/Info.plist")
+    endif()
+
     set_target_properties(${target} PROPERTIES
       FRAMEWORK TRUE
       MACOSX_FRAMEWORK_IDENTIFIER org.opencv
-      MACOSX_FRAMEWORK_INFO_PLIST ${CMAKE_BINARY_DIR}/ios/Info.plist
+      MACOSX_FRAMEWORK_INFO_PLIST ${OPENCV_APPLE_INFO_PLIST}
       # "current version" in semantic format in Mach-O binary file
       VERSION ${OPENCV_LIBVERSION}
       # "compatibility version" in semantic format in Mach-O binary file
@@ -1882,6 +1888,13 @@ function(ocv_update_file filepath content)
   endif()
 endfunction()
 
+if(NOT BUILD_SHARED_LIBS AND (CMAKE_VERSION VERSION_LESS "3.14.0"))
+  ocv_update(OPENCV_3RDPARTY_EXCLUDE_FROM_ALL "")  # avoid CMake warnings: https://gitlab.kitware.com/cmake/cmake/-/issues/18938
+else()
+  ocv_update(OPENCV_3RDPARTY_EXCLUDE_FROM_ALL "EXCLUDE_FROM_ALL")
+endif()
+
+
 # adopted from https://gist.github.com/amir-saniyan/de99cee82fa9d8d615bb69f3f53b6004
 function(ocv_blob2hdr blob_filename hdr_filename cpp_variable)
     if(EXISTS "${hdr_filename}")
diff --git a/cmake/templates/opencv_abi.xml.in b/cmake/templates/opencv_abi.xml.in
index 212b6d67d4..711c4e99ee 100644
--- a/cmake/templates/opencv_abi.xml.in
+++ b/cmake/templates/opencv_abi.xml.in
@@ -32,6 +32,7 @@
     opencv2/flann/hdf5.h
     opencv2/imgcodecs/imgcodecs_c.h
     opencv2/imgcodecs/ios.h
+    opencv2/imgcodecs/macosx.h
     opencv2/videoio/videoio_c.h
     opencv2/videoio/cap_ios.h
     opencv2/xobjdetect/private.hpp
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index e89a89daef..c19fe967b7 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -145,9 +145,23 @@ if(DOXYGEN_FOUND)
   set(tutorial_js_path "${CMAKE_CURRENT_SOURCE_DIR}/js_tutorials")
   set(example_path "${CMAKE_SOURCE_DIR}/samples")
 
+  set(doxygen_image_path
+      ${CMAKE_CURRENT_SOURCE_DIR}/images
+      ${paths_doc}
+      ${tutorial_path}
+      ${tutorial_py_path}
+      ${tutorial_js_path}
+      ${paths_tutorial}
+      #${OpenCV_SOURCE_DIR}/samples/data         # TODO: need to resolve ambiguous conflicts first
+      ${OpenCV_SOURCE_DIR}
+      ${OpenCV_SOURCE_DIR}/modules               # <opencv>/modules
+      ${OPENCV_EXTRA_MODULES_PATH}               # <opencv_contrib>/modules
+      ${OPENCV_DOCS_EXTRA_IMAGE_PATH}            # custom variable for user modules
+  )
+
   # set export variables
   string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${faqfile} ; ${paths_include} ; ${paths_hal_interface} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${tutorial_js_path} ; ${paths_tutorial} ; ${tutorial_contrib_root}")
-  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/images ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${tutorial_js_path} ; ${paths_tutorial}")
+  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${doxygen_image_path}")
   string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXCLUDE_LIST "${CMAKE_DOXYGEN_EXCLUDE_LIST}")
   string(REPLACE ";" " " CMAKE_DOXYGEN_ENABLED_SECTIONS "${CMAKE_DOXYGEN_ENABLED_SECTIONS}")
   # TODO: remove paths_doc from EXAMPLE_PATH after face module tutorials/samples moved to separate folders
diff --git a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
index 87167cd219..435f06fe02 100644
--- a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
+++ b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
@@ -32,6 +32,15 @@ source ./emsdk_env.sh
 echo ${EMSCRIPTEN}
 @endcode
 
+The version 1.39.16 of emscripten is verified for latest WebAssembly. Please check the version of emscripten to use the newest features of WebAssembly.
+
+For example:
+@code{.bash}
+./emsdk update
+./emsdk install 1.39.16
+./emsdk activate 1.39.16
+@endcode
+
 Obtaining OpenCV Source Code
 --------------------------
 
@@ -76,6 +85,31 @@ Building OpenCV.js from Source
     python ./platforms/js/build_js.py build_wasm --build_wasm
     @endcode
 
+-#  [Optional] To build the OpenCV.js loader, append `--build_loader`.
+
+    For example:
+    @code{.bash}
+    python ./platforms/js/build_js.py build_js --build_loader
+    @endcode
+
+    @note
+    The loader is implemented as a js file in the path `<opencv_js_dir>/bin/loader.js`. The loader utilizes the [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) to detect the features of the broswer and load corresponding OpenCV.js automatically. To use it, you need to use the UMD version of [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) and introduce the `loader.js` in your Web application.
+
+    Example Code:
+    @code{.javascipt}
+    // Set paths configuration
+    let pathsConfig = {
+        wasm: "../../build_wasm/opencv.js",
+        threads: "../../build_mt/opencv.js",
+        simd: "../../build_simd/opencv.js",
+        threadsSimd: "../../build_mtSIMD/opencv.js",
+    }
+
+    // Load OpenCV.js and use the pathsConfiguration and main function as the params.
+    loadOpenCV(pathsConfig, main);
+    @endcode
+
+
 -#  [optional] To build documents, append `--build_doc` option.
 
     For example:
diff --git a/doc/pattern_tools/gen_pattern.py b/doc/pattern_tools/gen_pattern.py
index 1f90615736..a6ffc7ca7e 100755
--- a/doc/pattern_tools/gen_pattern.py
+++ b/doc/pattern_tools/gen_pattern.py
@@ -92,11 +92,11 @@ def main():
                         dest="square_size", type=float)
     parser.add_argument("-R", "--radius_rate", help="circles_radius = square_size/radius_rate", default="5.0",
                         action="store", dest="radius_rate", type=float)
-    parser.add_argument("-w", "--page_width", help="page width in units", default="216", action="store",
+    parser.add_argument("-w", "--page_width", help="page width in units", default=argparse.SUPPRESS, action="store",
                         dest="page_width", type=float)
-    parser.add_argument("-h", "--page_height", help="page height in units", default="279", action="store",
-                        dest="page_width", type=float)
-    parser.add_argument("-a", "--page_size", help="page size, supersedes -h -w arguments", default="A4", action="store",
+    parser.add_argument("-h", "--page_height", help="page height in units", default=argparse.SUPPRESS, action="store",
+                        dest="page_height", type=float)
+    parser.add_argument("-a", "--page_size", help="page size, superseded if -h and -w are set", default="A4", action="store",
                         dest="page_size", choices=["A0", "A1", "A2", "A3", "A4", "A5"])
     args = parser.parse_args()
 
@@ -111,12 +111,16 @@ def main():
     units = args.units
     square_size = args.square_size
     radius_rate = args.radius_rate
-    page_size = args.page_size
-    # page size dict (ISO standard, mm) for easy lookup. format - size: [width, height]
-    page_sizes = {"A0": [840, 1188], "A1": [594, 840], "A2": [420, 594], "A3": [297, 420], "A4": [210, 297],
-                  "A5": [148, 210]}
-    page_width = page_sizes[page_size.upper()][0]
-    page_height = page_sizes[page_size.upper()][1]
+    if 'page_width' and 'page_height' in args:
+        page_width = args.page_width
+        page_height = args.page_height
+    else:
+        page_size = args.page_size
+        # page size dict (ISO standard, mm) for easy lookup. format - size: [width, height]
+        page_sizes = {"A0": [840, 1188], "A1": [594, 840], "A2": [420, 594], "A3": [297, 420], "A4": [210, 297],
+                      "A5": [148, 210]}
+        page_width = page_sizes[page_size][0]
+        page_height = page_sizes[page_size][1]
     pm = PatternMaker(columns, rows, output, units, square_size, radius_rate, page_width, page_height)
     # dict for easy lookup of pattern type
     mp = {"circles": pm.make_circles_pattern, "acircles": pm.make_acircles_pattern,
diff --git a/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown b/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
index e337999efd..bba7b90b9f 100644
--- a/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
+++ b/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
@@ -209,7 +209,7 @@ find the average error, we calculate the arithmetical mean of the errors calcula
 calibration images.
 @code{.py}
 mean_error = 0
-for i in xrange(len(objpoints)):
+for i in range(len(objpoints)):
     imgpoints2, _ = cv.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
     error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2)/len(imgpoints2)
     mean_error += error
diff --git a/doc/py_tutorials/py_calib3d/py_epipolar_geometry/py_epipolar_geometry.markdown b/doc/py_tutorials/py_calib3d/py_epipolar_geometry/py_epipolar_geometry.markdown
index 3ed072c04d..6b8d90882a 100644
--- a/doc/py_tutorials/py_calib3d/py_epipolar_geometry/py_epipolar_geometry.markdown
+++ b/doc/py_tutorials/py_calib3d/py_epipolar_geometry/py_epipolar_geometry.markdown
@@ -79,7 +79,7 @@ from matplotlib import pyplot as plt
 img1 = cv.imread('myleft.jpg',0)  #queryimage # left image
 img2 = cv.imread('myright.jpg',0) #trainimage # right image
 
-sift = cv.SIFT()
+sift = cv.SIFT_create()
 
 # find the keypoints and descriptors with SIFT
 kp1, des1 = sift.detectAndCompute(img1,None)
@@ -93,14 +93,12 @@ search_params = dict(checks=50)
 flann = cv.FlannBasedMatcher(index_params,search_params)
 matches = flann.knnMatch(des1,des2,k=2)
 
-good = []
 pts1 = []
 pts2 = []
 
 # ratio test as per Lowe's paper
 for i,(m,n) in enumerate(matches):
     if m.distance < 0.8*n.distance:
-        good.append(m)
         pts2.append(kp2[m.trainIdx].pt)
         pts1.append(kp1[m.queryIdx].pt)
 @endcode
diff --git a/doc/py_tutorials/py_imgproc/py_thresholding/images/threshold.jpg b/doc/py_tutorials/py_imgproc/py_thresholding/images/threshold.jpg
index e203927791..c7053cc76d 100644
Binary files a/doc/py_tutorials/py_imgproc/py_thresholding/images/threshold.jpg and b/doc/py_tutorials/py_imgproc/py_thresholding/images/threshold.jpg differ
diff --git a/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown b/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown
index 285124d17c..0540098850 100644
--- a/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown
+++ b/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown
@@ -48,7 +48,7 @@ titles = ['Original Image','BINARY','BINARY_INV','TRUNC','TOZERO','TOZERO_INV']
 images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
 
 for i in xrange(6):
-    plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')
+    plt.subplot(2,3,i+1),plt.imshow(images[i],'gray',vmin=0,vmax=255)
     plt.title(titles[i])
     plt.xticks([]),plt.yticks([])
 
diff --git a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown
index 8e863ec79e..80cc6c68fe 100644
--- a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown
+++ b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown
@@ -32,7 +32,7 @@ automatically available with the platform (e.g. APPLE GCD) but chances are that
 have access to a parallel framework either directly or by enabling the option in CMake and rebuild the library.
 
 The second (weak) precondition is more related to the task you want to achieve as not all computations
-are suitable / can be adatapted to be run in a parallel way. To remain simple, tasks that can be split
+are suitable / can be adapted to be run in a parallel way. To remain simple, tasks that can be split
 into multiple elementary operations with no memory dependency (no possible race condition) are easily
 parallelizable. Computer vision processing are often easily parallelizable as most of the time the processing of
 one pixel does not depend to the state of other pixels.
diff --git a/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown b/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown
index a5c6695f91..42f8c7c38f 100644
--- a/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown
+++ b/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown
@@ -84,57 +84,198 @@ This tutorial's code is shown below. You can also download it
 Explanation
 -----------
 
--#  Most of the material shown here is trivial (if you have any doubt, please refer to the tutorials in
-    previous sections). Let's check the general structure of the C++ program:
+@add_toggle_cpp
+Most of the material shown here is trivial (if you have any doubt, please refer to the tutorials in
+previous sections). Let's check the general structure of the C++ program:
 
-    -   Load an image (can be BGR or grayscale)
-    -   Create two windows (one for dilation output, the other for erosion)
-    -   Create a set of two Trackbars for each operation:
-        -   The first trackbar "Element" returns either **erosion_elem** or **dilation_elem**
-        -   The second trackbar "Kernel size" return **erosion_size** or **dilation_size** for the
-            corresponding operation.
-    -   Every time we move any slider, the user's function **Erosion** or **Dilation** will be
-        called and it will update the output image based on the current trackbar values.
+@snippet cpp/tutorial_code/ImgProc/Morphology_1.cpp main
 
-    Let's analyze these two functions:
+-#   Load an image (can be BGR or grayscale)
+-#   Create two windows (one for dilation output, the other for erosion)
+-#   Create a set of two Trackbars for each operation:
+    -   The first trackbar "Element" returns either **erosion_elem** or **dilation_elem**
+    -   The second trackbar "Kernel size" return **erosion_size** or **dilation_size** for the
+        corresponding operation.
+-#  Call once erosion and dilation to show the initial image.
 
--#  **erosion:**
-    @snippet cpp/tutorial_code/ImgProc/Morphology_1.cpp erosion
 
-    -   The function that performs the *erosion* operation is @ref cv::erode . As we can see, it
-        receives three arguments:
-        -   *src*: The source image
-        -   *erosion_dst*: The output image
-        -   *element*: This is the kernel we will use to perform the operation. If we do not
-            specify, the default is a simple `3x3` matrix. Otherwise, we can specify its
-            shape. For this, we need to use the function cv::getStructuringElement :
-            @snippet cpp/tutorial_code/ImgProc/Morphology_1.cpp kernel
+Every time we move any slider, the user's function **Erosion** or **Dilation** will be
+called and it will update the output image based on the current trackbar values.
 
-            We can choose any of three shapes for our kernel:
+Let's analyze these two functions:
 
-            -   Rectangular box: MORPH_RECT
-            -   Cross: MORPH_CROSS
-            -   Ellipse: MORPH_ELLIPSE
+#### The erosion function
 
-            Then, we just have to specify the size of our kernel and the *anchor point*. If not
-            specified, it is assumed to be in the center.
+@snippet cpp/tutorial_code/ImgProc/Morphology_1.cpp erosion
 
-    -   That is all. We are ready to perform the erosion of our image.
-@note Additionally, there is another parameter that allows you to perform multiple erosions
-(iterations) at once. However, We haven't used it in this simple tutorial. You can check out the
-reference for more details.
+The function that performs the *erosion* operation is @ref cv::erode . As we can see, it
+receives three arguments:
+-   *src*: The source image
+-   *erosion_dst*: The output image
+-   *element*: This is the kernel we will use to perform the operation. If we do not
+    specify, the default is a simple `3x3` matrix. Otherwise, we can specify its
+    shape. For this, we need to use the function cv::getStructuringElement :
+    @snippet cpp/tutorial_code/ImgProc/Morphology_1.cpp kernel
 
--#  **dilation:**
+    We can choose any of three shapes for our kernel:
 
-    The code is below. As you can see, it is completely similar to the snippet of code for **erosion**.
-    Here we also have the option of defining our kernel, its anchor point and the size of the operator
-    to be used.
-    @snippet cpp/tutorial_code/ImgProc/Morphology_1.cpp dilation
+    -   Rectangular box: MORPH_RECT
+    -   Cross: MORPH_CROSS
+    -   Ellipse: MORPH_ELLIPSE
+
+    Then, we just have to specify the size of our kernel and the *anchor point*. If not
+    specified, it is assumed to be in the center.
+
+That is all. We are ready to perform the erosion of our image.
+
+#### The dilation function
+
+The code is below. As you can see, it is completely similar to the snippet of code for **erosion**.
+Here we also have the option of defining our kernel, its anchor point and the size of the operator
+to be used.
+@snippet cpp/tutorial_code/ImgProc/Morphology_1.cpp dilation
+@end_toggle
+
+@add_toggle_java
+Most of the material shown here is trivial (if you have any doubt, please refer to the tutorials in
+previous sections). Let's check however the general structure of the java class. There are 4 main
+parts in the java class:
+
+- the class constructor which setups the window that will be filled with window components
+- the `addComponentsToPane` method, which fills out the window
+- the `update` method, which determines what happens when the user changes any value
+- the `main` method, which is the entry point of the program
+
+In this tutorial we will focus on the `addComponentsToPane` and `update` methods. However, for completion the
+steps followed in the constructor are:
+
+-#  Load an image (can be BGR or grayscale)
+-#  Create a window
+-#  Add various control components with `addComponentsToPane`
+-#  show the window
+
+The components were added by the following method:
+
+@snippet java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java components
+
+In short we
+
+-#  create a panel for the sliders
+-#  create a combo box for the element types
+-#  create a slider for the kernel size
+-#  create a combo box for the morphology function to use (erosion or dilation)
+
+The action and state changed listeners added call at the end the `update` method which updates
+the image based on the current slider values. So every time we move any slider, the `update` method is triggered.
+
+#### Updating the image
+
+To update the image we used the following implementation:
+
+@snippet java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java update
+
+In other words we
+
+-# get the structuring element the user chose
+-# execute the **erosion** or **dilation** function based on `doErosion`
+-# reload the image with the morphology applied
+-# repaint the frame
+
+Let's analyze the `erode` and `dilate` methods:
+
+#### The erosion method
+
+@snippet java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java erosion
+
+The function that performs the *erosion* operation is @ref cv::erode . As we can see, it
+receives three arguments:
+-   *src*: The source image
+-   *erosion_dst*: The output image
+-   *element*: This is the kernel we will use to perform the operation. For specifying the shape, we need to use
+    the function cv::getStructuringElement :
+    @snippet java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java kernel
+
+    We can choose any of three shapes for our kernel:
+
+    -   Rectangular box: CV_SHAPE_RECT
+    -   Cross: CV_SHAPE_CROSS
+    -   Ellipse: CV_SHAPE_ELLIPSE
+
+    Together with the shape we specify the size of our kernel and the *anchor point*. If the anchor point is not
+    specified, it is assumed to be in the center.
+
+That is all. We are ready to perform the erosion of our image.
+
+#### The dilation function
+
+The code is below. As you can see, it is completely similar to the snippet of code for **erosion**.
+Here we also have the option of defining our kernel, its anchor point and the size of the operator
+to be used.
+@snippet java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java dilation
+@end_toggle
+
+@add_toggle_python
+Most of the material shown here is trivial (if you have any doubt, please refer to the tutorials in
+previous sections). Let's check the general structure of the python script:
+
+@snippet python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py main
+
+-#  Load an image (can be BGR or grayscale)
+-#  Create two windows (one for erosion output, the other for dilation) with a set of trackbars each
+    -   The first trackbar "Element" returns the value for the morphological type that will be mapped
+        (1 = rectangle, 2 = cross, 3 = ellipse)
+    -   The second trackbar "Kernel size" returns the size of the element for the
+        corresponding operation
+-#  Call once erosion and dilation to show the initial image
+
+Every time we move any slider, the user's function **erosion** or **dilation** will be
+called and it will update the output image based on the current trackbar values.
+
+Let's analyze these two functions:
+
+#### The erosion function
+
+@snippet python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py erosion
+
+The function that performs the *erosion* operation is @ref cv::erode . As we can see, it
+receives two arguments and returns the processed image:
+-   *src*: The source image
+-   *element*: The kernel we will use to perform the operation. We can specify its
+    shape by using the function cv::getStructuringElement :
+    @snippet python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py kernel
+
+    We can choose any of three shapes for our kernel:
+
+    -   Rectangular box: MORPH_RECT
+    -   Cross: MORPH_CROSS
+    -   Ellipse: MORPH_ELLIPSE
+
+Then, we just have to specify the size of our kernel and the *anchor point*. If the anchor point not
+specified, it is assumed to be in the center.
+
+That is all. We are ready to perform the erosion of our image.
+
+#### The dilation function
+
+The code is below. As you can see, it is completely similar to the snippet of code for **erosion**.
+Here we also have the option of defining our kernel, its anchor point and the size of the operator
+to be used.
+
+@snippet python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py dilation
+@end_toggle
+
+@note Additionally, there are further parameters that allow you to perform multiple erosions/dilations
+(iterations) at once and also set the border type and value. However, We haven't used those
+in this simple tutorial. You can check out the reference for more details.
 
 Results
 -------
 
-Compile the code above and execute it with an image as argument. For instance, using this image:
+Compile the code above and execute it (or run the script if using python) with an image as argument.
+If you do not provide an image as argument the default sample image
+([LinuxLogo.jpg](https://github.com/opencv/opencv/tree/master/samples/data/LinuxLogo.jpg)) will be used.
+
+For instance, using this image:
 
 ![](images/Morphology_1_Tutorial_Original_Image.jpg)
 
@@ -143,3 +284,4 @@ naturally. Try them out! You can even try to add a third Trackbar to control the
 iterations.
 
 ![](images/Morphology_1_Result.jpg)
+(depending on the programming language the output might vary a little or be only 1 window)
diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index 196eb42ac0..3d65710630 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -247,15 +247,18 @@ When `WITH_` option is enabled:
 
 `WITH_CUDA` (default: _OFF_)
 
-Many algorithms have been implemented using CUDA acceleration, these functions are located in separate modules: @ref cuda. CUDA toolkit must be installed from the official NVIDIA site as a prerequisite. For cmake versions older than 3.9 OpenCV uses own `cmake/FindCUDA.cmake` script, for newer versions - the one packaged with CMake. Additional options can be used to control build process, e.g. `CUDA_GENERATION` or `CUDA_ARCH_BIN`. These parameters are not documented yet, please consult with the `cmake/OpenCVDetectCUDA.cmake` script for details.
-
-Some tutorials can be found in the corresponding section: @ref tutorial_table_of_content_gpu
+Many algorithms have been implemented using CUDA acceleration, these functions are located in separate modules. CUDA toolkit must be installed from the official NVIDIA site as a prerequisite. For cmake versions older than 3.9 OpenCV uses own `cmake/FindCUDA.cmake` script, for newer versions - the one packaged with CMake. Additional options can be used to control build process, e.g. `CUDA_GENERATION` or `CUDA_ARCH_BIN`. These parameters are not documented yet, please consult with the `cmake/OpenCVDetectCUDA.cmake` script for details.
 
 @note Since OpenCV version 4.0 all CUDA-accelerated algorithm implementations have been moved to the _opencv_contrib_ repository. To build _opencv_ and _opencv_contrib_ together check @ref tutorial_config_reference_general_contrib.
 
+@cond CUDA_MODULES
+@note Some tutorials can be found in the corresponding section: @ref tutorial_table_of_content_gpu
+@see @ref cuda
+@endcond
+
 @see https://en.wikipedia.org/wiki/CUDA
 
-TODO: other options: `WITH_CUFFT`, `WITH_CUBLAS`, WITH_NVCUVID`?
+TODO: other options: `WITH_CUFFT`, `WITH_CUBLAS`, `WITH_NVCUVID`?
 
 ### OpenCL support
 
diff --git a/doc/tutorials/video/background_subtraction/background_subtraction.markdown b/doc/tutorials/video/background_subtraction/background_subtraction.markdown
index c95cd4173d..420286960d 100644
--- a/doc/tutorials/video/background_subtraction/background_subtraction.markdown
+++ b/doc/tutorials/video/background_subtraction/background_subtraction.markdown
@@ -32,8 +32,7 @@ In this tutorial you will learn how to:
 -#  Create and update the background model by using @ref cv::BackgroundSubtractor class;
 -#  Get and show the foreground mask by using @ref cv::imshow ;
 
-Code
-----
+### Code
 
 In the following you can find the source code. We will let the user choose to process either a video
 file or a sequence of images.
diff --git a/doc/tutorials/videoio/intelperc.markdown b/doc/tutorials/videoio/intelperc.markdown
index e27f70c7ed..6a6a5e5c9a 100644
--- a/doc/tutorials/videoio/intelperc.markdown
+++ b/doc/tutorials/videoio/intelperc.markdown
@@ -1,7 +1,7 @@
 Using Creative Senz3D and other Intel RealSense SDK compatible depth sensors {#tutorial_intelperc}
 =======================================================================================
 
-@prev_tutorial{tutorial_kinect_openni}
+@prev_tutorial{tutorial_orbbec_astra}
 
 **Note**: This tutorial is partially obsolete since PerC SDK has been replaced with RealSense SDK
 
diff --git a/doc/tutorials/videoio/kinect_openni.markdown b/doc/tutorials/videoio/kinect_openni.markdown
index dc1ee6eeaa..aadaec5e44 100644
--- a/doc/tutorials/videoio/kinect_openni.markdown
+++ b/doc/tutorials/videoio/kinect_openni.markdown
@@ -2,7 +2,7 @@ Using Kinect and other OpenNI compatible depth sensors {#tutorial_kinect_openni}
 ======================================================
 
 @prev_tutorial{tutorial_video_write}
-@next_tutorial{tutorial_intelperc}
+@next_tutorial{tutorial_orbbec_astra}
 
 
 Depth sensors compatible with OpenNI (Kinect, XtionPRO, ...) are supported through VideoCapture
diff --git a/doc/tutorials/videoio/orbbec-astra/images/astra_color.jpg b/doc/tutorials/videoio/orbbec-astra/images/astra_color.jpg
new file mode 100644
index 0000000000..d37e2803df
Binary files /dev/null and b/doc/tutorials/videoio/orbbec-astra/images/astra_color.jpg differ
diff --git a/doc/tutorials/videoio/orbbec-astra/images/astra_depth.png b/doc/tutorials/videoio/orbbec-astra/images/astra_depth.png
new file mode 100644
index 0000000000..6fe2c6cd38
Binary files /dev/null and b/doc/tutorials/videoio/orbbec-astra/images/astra_depth.png differ
diff --git a/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown b/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown
new file mode 100644
index 0000000000..664e4f6dfe
--- /dev/null
+++ b/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown
@@ -0,0 +1,150 @@
+Using Orbbec Astra 3D cameras {#tutorial_orbbec_astra}
+======================================================
+
+@prev_tutorial{tutorial_kinect_openni}
+@next_tutorial{tutorial_intelperc}
+
+
+### Introduction
+
+This tutorial is devoted to the Astra Series of Orbbec 3D cameras (https://orbbec3d.com/product-astra-pro/).
+That cameras have a depth sensor in addition to a common color sensor. The depth sensors can be read using
+the OpenNI interface with @ref cv::VideoCapture class. The video stream is provided through the regular camera
+interface.
+
+### Installation Instructions
+
+In order to use a depth sensor with OpenCV you should do the following steps:
+
+-#  Download the latest version of Orbbec OpenNI SDK (from here <https://orbbec3d.com/develop/>).
+    Unzip the archive, choose the build according to your operating system and follow installation
+    steps provided in the Readme file. For instance, if you use 64bit GNU/Linux run:
+    @code{.bash}
+    $ cd Linux/OpenNI-Linux-x64-2.3.0.63/
+    $ sudo ./install.sh
+    @endcode
+    When you are done with the installation, make sure to replug your device for udev rules to take
+    effect. The camera should now work as a general camera device. Note that your current user should
+    belong to group `video` to have access to the camera. Also, make sure to source `OpenNIDevEnvironment` file:
+    @code{.bash}
+    $ source OpenNIDevEnvironment
+    @endcode
+
+-#  Run the following commands to verify that OpenNI library and header files can be found. You should see
+    something similar in your terminal:
+    @code{.bash}
+    $ echo $OPENNI2_INCLUDE
+    /home/user/OpenNI_2.3.0.63/Linux/OpenNI-Linux-x64-2.3.0.63/Include
+    $ echo $OPENNI2_REDIST
+    /home/user/OpenNI_2.3.0.63/Linux/OpenNI-Linux-x64-2.3.0.63/Redist
+    @endcode
+    If the above two variables are empty, then you need to source `OpenNIDevEnvironment` again. Now you can
+    configure OpenCV with OpenNI support enabled by setting the `WITH_OPENNI2` flag in CMake.
+    You may also like to enable the `BUILD_EXAMPLES` flag to get a code sample working with your Astra camera.
+    Run the following commands in the directory containing OpenCV source code to enable OpenNI support:
+    @code{.bash}
+    $ mkdir build
+    $ cd build
+    $ cmake -DWITH_OPENNI2=ON ..
+    @endcode
+    If the OpenNI library is found, OpenCV will be built with OpenNI2 support. You can see the status of OpenNI2
+    support in the CMake log:
+    @code{.text}
+    --   Video I/O:
+    --     DC1394:                      YES (2.2.6)
+    --     FFMPEG:                      YES
+    --       avcodec:                   YES (58.91.100)
+    --       avformat:                  YES (58.45.100)
+    --       avutil:                    YES (56.51.100)
+    --       swscale:                   YES (5.7.100)
+    --       avresample:                NO
+    --     GStreamer:                   YES (1.18.1)
+    --     OpenNI2:                     YES (2.3.0)
+    --     v4l/v4l2:                    YES (linux/videodev2.h)
+    @endcode
+
+-#  Build OpenCV:
+    @code{.bash}
+    $ make
+    @endcode
+
+### Code
+
+To get both depth and color frames, two @ref cv::VideoCapture objects should be created:
+
+@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Open streams
+
+The first object will use the regular Video4Linux2 interface to access the color sensor. The second one
+is using OpenNI2 API to retrieve depth data.
+
+Before using the created VideoCapture objects you may want to setup stream parameters by setting
+objects' properties. The most important parameters are frame width, frame height and fps:
+
+@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Setup streams
+
+For setting and getting some property of sensor data generators use @ref cv::VideoCapture::set and
+@ref cv::VideoCapture::get methods respectively, e.g. :
+
+@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Get properties
+
+The following properties of cameras available through OpenNI interfaces are supported for the depth
+generator:
+
+-   @ref cv::CAP_PROP_FRAME_WIDTH -- Frame width in pixels.
+-   @ref cv::CAP_PROP_FRAME_HEIGHT -- Frame height in pixels.
+-   @ref cv::CAP_PROP_FPS -- Frame rate in FPS.
+-   @ref cv::CAP_PROP_OPENNI_REGISTRATION -- Flag that registers the remapping depth map to image map
+    by changing the depth generator's viewpoint (if the flag is "on") or sets this view point to
+    its normal one (if the flag is "off"). The registration process’ resulting images are
+    pixel-aligned, which means that every pixel in the image is aligned to a pixel in the depth
+    image.
+-   @ref cv::CAP_PROP_OPENNI2_MIRROR -- Flag to enable or disable mirroring for this stream. Set to 0
+    to disable mirroring
+
+    Next properties are available for getting only:
+
+-   @ref cv::CAP_PROP_OPENNI_FRAME_MAX_DEPTH -- A maximum supported depth of the camera in mm.
+-   @ref cv::CAP_PROP_OPENNI_BASELINE -- Baseline value in mm.
+
+After the VideoCapture objects are set up you can start reading frames from them.
+
+@note
+    OpenCV's VideoCapture provides synchronous API, so you have to grab frames in a new thread
+    to avoid one stream blocking while another stream is being read. VideoCapture is not a
+    thread-safe class, so you need to be careful to avoid any possible deadlocks or data races.
+
+Example implementation that gets frames from each sensor in a new thread and stores them
+in a list along with their timestamps:
+
+@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Read streams
+
+VideoCapture can retrieve the following data:
+
+-#  data given from the depth generator:
+    -   @ref cv::CAP_OPENNI_DEPTH_MAP - depth values in mm (CV_16UC1)
+    -   @ref cv::CAP_OPENNI_POINT_CLOUD_MAP - XYZ in meters (CV_32FC3)
+    -   @ref cv::CAP_OPENNI_DISPARITY_MAP - disparity in pixels (CV_8UC1)
+    -   @ref cv::CAP_OPENNI_DISPARITY_MAP_32F - disparity in pixels (CV_32FC1)
+    -   @ref cv::CAP_OPENNI_VALID_DEPTH_MASK - mask of valid pixels (not occluded, not shaded, etc.)
+        (CV_8UC1)
+
+-#  data given from the color sensor is a regular BGR image (CV_8UC3).
+
+When new data is available a reading thread notifies the main thread. A frame is stored in the
+ordered list -- the first frame is the latest one:
+
+@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Show color frame
+
+Depth frames can be picked the same way from the `depthFrames` list.
+
+After that, you'll have two frames: one containing color information and another one -- depth
+information. In the sample images below you can see the color frame and the depth frame showing
+the same scene. Looking at the color frame it's hard to distinguish plant leaves from leaves painted
+on a wall, but the depth data makes it easy.
+
+![Color frame](images/astra_color.jpg)
+![Depth frame](images/astra_depth.png)
+
+The complete implementation can be found in
+[orbbec_astra.cpp](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp)
+in `samples/cpp/tutorial_code/videoio` directory.
diff --git a/doc/tutorials/videoio/table_of_content_videoio.markdown b/doc/tutorials/videoio/table_of_content_videoio.markdown
index b27726bd87..393a0fc236 100644
--- a/doc/tutorials/videoio/table_of_content_videoio.markdown
+++ b/doc/tutorials/videoio/table_of_content_videoio.markdown
@@ -26,6 +26,10 @@ This section contains tutorials about how to read/save your video files.
 
     *Languages:* C++
 
+-   @subpage tutorial_orbbec_astra
+
+    *Languages:* C++
+
 -   @subpage tutorial_intelperc
 
-    *Languages:* C++
\ No newline at end of file
+    *Languages:* C++
diff --git a/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown b/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
index e24f3cb71b..76cfa3751d 100644
--- a/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
+++ b/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
@@ -126,13 +126,12 @@ captRefrnc.set(CAP_PROP_POS_FRAMES, 10); // go to the 10th frame of the video
 For properties you can read and change look into the documentation of the @ref cv::VideoCapture::get and
 @ref cv::VideoCapture::set functions.
 
-Image similarity - PSNR and SSIM
---------------------------------
+### Image similarity - PSNR and SSIM
 
 We want to check just how imperceptible our video converting operation went, therefore we need a
 system to check frame by frame the similarity or differences. The most common algorithm used for
 this is the PSNR (aka **Peak signal-to-noise ratio**). The simplest definition of this starts out
-from the *mean squad error*. Let there be two images: I1 and I2; with a two dimensional size i and
+from the *mean squared error*. Let there be two images: I1 and I2; with a two dimensional size i and
 j, composed of c number of channels.
 
 \f[MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}\f]
@@ -145,15 +144,15 @@ Here the \f$MAX_I\f$ is the maximum valid value for a pixel. In case of the simp
 per pixel per channel this is 255. When two images are the same the MSE will give zero, resulting in
 an invalid divide by zero operation in the PSNR formula. In this case the PSNR is undefined and as
 we'll need to handle this case separately. The transition to a logarithmic scale is made because the
-pixel values have a very wide dynamic range. All this translated to OpenCV and a C++ function looks
+pixel values have a very wide dynamic range. All this translated to OpenCV and a function looks
 like:
 
 @add_toggle_cpp
-@include cpp/tutorial_code/videoio/video-input-psnr-ssim/video-input-psnr-ssim.cpp get-psnr
+@snippet cpp/tutorial_code/videoio/video-input-psnr-ssim/video-input-psnr-ssim.cpp get-psnr
 @end_toggle
 
 @add_toggle_python
-@include samples/python/tutorial_code/videoio/video-input-psnr-ssim.py get-psnr
+@snippet samples/python/tutorial_code/videoio/video-input-psnr-ssim.py get-psnr
 @end_toggle
 
 Typically result values are anywhere between 30 and 50 for video compression, where higher is
@@ -172,11 +171,11 @@ implementation below.
     Transactions on Image Processing, vol. 13, no. 4, pp. 600-612, Apr. 2004." article.
 
 @add_toggle_cpp
-@include cpp/tutorial_code/videoio/video-input-psnr-ssim/video-input-psnr-ssim.cpp get-mssim
+@snippet samples/cpp/tutorial_code/videoio/video-input-psnr-ssim/video-input-psnr-ssim.cpp get-mssim
 @end_toggle
 
 @add_toggle_python
-@include samples/python/tutorial_code/videoio/video-input-psnr-ssim.py get-mssim
+@snippet samples/python/tutorial_code/videoio/video-input-psnr-ssim.py get-mssim
 @end_toggle
 
 This will return a similarity index for each channel of the image. This value is between zero and
diff --git a/doc/tutorials/videoio/video-write/video_write.markdown b/doc/tutorials/videoio/video-write/video_write.markdown
index 624b2d1db6..0100f8cfc4 100644
--- a/doc/tutorials/videoio/video-write/video_write.markdown
+++ b/doc/tutorials/videoio/video-write/video_write.markdown
@@ -63,7 +63,7 @@ specialized video writing libraries such as *FFMpeg* or codecs as *HuffYUV*, *Co
 an alternative, create the video track with OpenCV and expand it with sound tracks or convert it to
 other formats by using video manipulation programs such as *VirtualDub* or *AviSynth*.
 
-The *VideoWriter* class
+The VideoWriter class
 -----------------------
 
 The content written here builds on the assumption you
diff --git a/modules/calib3d/doc/calib3d.bib b/modules/calib3d/doc/calib3d.bib
index 47dcf93788..86f2277b16 100644
--- a/modules/calib3d/doc/calib3d.bib
+++ b/modules/calib3d/doc/calib3d.bib
@@ -40,6 +40,14 @@
   publisher={IEEE}
 }
 
+@inproceedings{Terzakis20,
+  author = {Terzakis, George and Lourakis, Manolis},
+  year = {2020},
+  month = {09},
+  pages = {},
+  title = {A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem}
+}
+
 @inproceedings{strobl2011iccv,
   title={More accurate pinhole camera calibration with imperfect planar target},
   author={Strobl, Klaus H. and Hirzinger, Gerd},
diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp
index cc5fdbfe1c..228a4f3f44 100644
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@ -471,6 +471,7 @@ enum SolvePnPMethod {
                               //!<   - point 1: [ squareLength / 2,  squareLength / 2, 0]
                               //!<   - point 2: [ squareLength / 2, -squareLength / 2, 0]
                               //!<   - point 3: [-squareLength / 2, -squareLength / 2, 0]
+    SOLVEPNP_SQPNP       = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis20
 #ifndef CV_DOXYGEN
     SOLVEPNP_MAX_COUNT        //!< Used for count
 #endif
@@ -550,17 +551,18 @@ enum NeighborSearchMethod { NEIGH_FLANN_KNN, NEIGH_GRID, NEIGH_FLANN_RADIUS };
 
 struct CV_EXPORTS_W_SIMPLE UsacParams
 { // in alphabetical order
-    double confidence = 0.99;
-    bool isParallel = false;
-    int loIterations = 5;
-    LocalOptimMethod loMethod = LocalOptimMethod::LOCAL_OPTIM_INNER_LO;
-    int loSampleSize = 14;
-    int maxIterations = 5000;
-    NeighborSearchMethod neighborsSearch = NeighborSearchMethod::NEIGH_GRID;
-    int randomGeneratorState = 0;
-    SamplingMethod sampler = SamplingMethod::SAMPLING_UNIFORM;
-    ScoreMethod score = ScoreMethod::SCORE_METHOD_MSAC;
-    double threshold = 1.5;
+    CV_WRAP UsacParams();
+    CV_PROP_RW double confidence;
+    CV_PROP_RW bool isParallel;
+    CV_PROP_RW int loIterations;
+    CV_PROP_RW LocalOptimMethod loMethod;
+    CV_PROP_RW int loSampleSize;
+    CV_PROP_RW int maxIterations;
+    CV_PROP_RW NeighborSearchMethod neighborsSearch;
+    CV_PROP_RW int randomGeneratorState;
+    CV_PROP_RW SamplingMethod sampler;
+    CV_PROP_RW ScoreMethod score;
+    CV_PROP_RW double threshold;
 };
 
 /** @brief Converts a rotation matrix to a rotation vector or vice versa.
@@ -945,6 +947,9 @@ It requires 4 coplanar object points defined in the following order:
   - point 1: [ squareLength / 2,  squareLength / 2, 0]
   - point 2: [ squareLength / 2, -squareLength / 2, 0]
   - point 3: [-squareLength / 2, -squareLength / 2, 0]
+-   **SOLVEPNP_SQPNP** Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
+Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points.
+
 
 The function estimates the object pose given a set of object points, their corresponding image
 projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
@@ -1068,6 +1073,7 @@ a 3D point expressed in the world frame into the camera frame:
          - point 1: [ squareLength / 2,  squareLength / 2, 0]
          - point 2: [ squareLength / 2, -squareLength / 2, 0]
          - point 3: [-squareLength / 2, -squareLength / 2, 0]
+    -  With **SOLVEPNP_SQPNP** input points must be >= 3
  */
 CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
                             InputArray cameraMatrix, InputArray distCoeffs,
@@ -2597,10 +2603,10 @@ CV_EXPORTS void convertPointsHomogeneous( InputArray src, OutputArray dst );
 floating-point (single or double precision).
 @param points2 Array of the second image points of the same size and format as points1 .
 @param method Method for computing a fundamental matrix.
--   **CV_FM_7POINT** for a 7-point algorithm. \f$N = 7\f$
--   **CV_FM_8POINT** for an 8-point algorithm. \f$N \ge 8\f$
--   **CV_FM_RANSAC** for the RANSAC algorithm. \f$N \ge 8\f$
--   **CV_FM_LMEDS** for the LMedS algorithm. \f$N \ge 8\f$
+-   @ref FM_7POINT for a 7-point algorithm. \f$N = 7\f$
+-   @ref FM_8POINT for an 8-point algorithm. \f$N \ge 8\f$
+-   @ref FM_RANSAC for the RANSAC algorithm. \f$N \ge 8\f$
+-   @ref FM_LMEDS for the LMedS algorithm. \f$N \ge 8\f$
 @param ransacReprojThreshold Parameter used only for RANSAC. It is the maximum distance from a point to an epipolar
 line in pixels, beyond which the point is considered an outlier and is not used for computing the
 final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the
diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp
index 5c04662489..0f12333eb9 100644
--- a/modules/calib3d/src/solvepnp.cpp
+++ b/modules/calib3d/src/solvepnp.cpp
@@ -47,6 +47,7 @@
 #include "p3p.h"
 #include "ap3p.h"
 #include "ippe.hpp"
+#include "sqpnp.hpp"
 #include "calib3d_c_api.h"
 
 #include "usac.hpp"
@@ -197,6 +198,21 @@ public:
     Mat tvec;
 };
 
+UsacParams::UsacParams()
+{
+    confidence = 0.99;
+    isParallel = false;
+    loIterations = 5;
+    loMethod = LocalOptimMethod::LOCAL_OPTIM_INNER_LO;
+    loSampleSize = 14;
+    maxIterations = 5000;
+    neighborsSearch = NeighborSearchMethod::NEIGH_GRID;
+    randomGeneratorState = 0;
+    sampler = SamplingMethod::SAMPLING_UNIFORM;
+    score = ScoreMethod::SCORE_METHOD_MSAC;
+    threshold = 1.5;
+}
+
 bool solvePnPRansac(InputArray _opoints, InputArray _ipoints,
                     InputArray _cameraMatrix, InputArray _distCoeffs,
                     OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess,
@@ -781,7 +797,8 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
 
     Mat opoints = _opoints.getMat(), ipoints = _ipoints.getMat();
     int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F));
-    CV_Assert( ( (npoints >= 4) || (npoints == 3 && flags == SOLVEPNP_ITERATIVE && useExtrinsicGuess) )
+    CV_Assert( ( (npoints >= 4) || (npoints == 3 && flags == SOLVEPNP_ITERATIVE && useExtrinsicGuess)
+                || (npoints >= 3 && flags == SOLVEPNP_SQPNP) )
                && npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) );
 
     opoints = opoints.reshape(3, npoints);
@@ -966,6 +983,14 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
             }
         } catch (...) { }
     }
+    else if (flags == SOLVEPNP_SQPNP)
+    {
+        Mat undistortedPoints;
+        undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
+
+        sqpnp::PoseSolver solver;
+        solver.solve(opoints, undistortedPoints, vec_rvecs, vec_tvecs);
+    }
     /*else if (flags == SOLVEPNP_DLS)
     {
         Mat undistortedPoints;
@@ -993,7 +1018,8 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
         vec_tvecs.push_back(tvec);
     }*/
     else
-        CV_Error(CV_StsBadArg, "The flags argument must be one of SOLVEPNP_ITERATIVE, SOLVEPNP_P3P, SOLVEPNP_EPNP or SOLVEPNP_DLS");
+        CV_Error(CV_StsBadArg, "The flags argument must be one of SOLVEPNP_ITERATIVE, SOLVEPNP_P3P, "
+            "SOLVEPNP_EPNP, SOLVEPNP_DLS, SOLVEPNP_UPNP, SOLVEPNP_AP3P, SOLVEPNP_IPPE, SOLVEPNP_IPPE_SQUARE or SOLVEPNP_SQPNP");
 
     CV_Assert(vec_rvecs.size() == vec_tvecs.size());
 
diff --git a/modules/calib3d/src/sqpnp.cpp b/modules/calib3d/src/sqpnp.cpp
new file mode 100644
index 0000000000..7117e61c96
--- /dev/null
+++ b/modules/calib3d/src/sqpnp.cpp
@@ -0,0 +1,775 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This file is based on file issued with the following license:
+
+/*
+BSD 3-Clause License
+
+Copyright (c) 2020, George Terzakis
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "precomp.hpp"
+#include "sqpnp.hpp"
+
+#include <opencv2/calib3d.hpp>
+
+namespace cv {
+namespace sqpnp {
+
+const double PoseSolver::RANK_TOLERANCE = 1e-7;
+const double PoseSolver::SQP_SQUARED_TOLERANCE = 1e-10;
+const double PoseSolver::SQP_DET_THRESHOLD = 1.001;
+const double PoseSolver::ORTHOGONALITY_SQUARED_ERROR_THRESHOLD = 1e-8;
+const double PoseSolver::EQUAL_VECTORS_SQUARED_DIFF = 1e-10;
+const double PoseSolver::EQUAL_SQUARED_ERRORS_DIFF = 1e-6;
+const double PoseSolver::POINT_VARIANCE_THRESHOLD = 1e-5;
+const double PoseSolver::SQRT3 = std::sqrt(3);
+const int PoseSolver::SQP_MAX_ITERATION = 15;
+
+//No checking done here for overflow, since this is not public all call instances
+//are assumed to be valid
+template <typename tp, int snrows, int sncols,
+    int dnrows, int dncols>
+    void set(int row, int col, cv::Matx<tp, dnrows, dncols>& dest,
+        const cv::Matx<tp, snrows, sncols>& source)
+{
+    for (int y = 0; y < snrows; y++)
+    {
+        for (int x = 0; x < sncols; x++)
+        {
+            dest(row + y, col + x) = source(y, x);
+        }
+    }
+}
+
+PoseSolver::PoseSolver()
+    : num_null_vectors_(-1),
+    num_solutions_(0)
+{
+}
+
+
+void PoseSolver::solve(InputArray objectPoints, InputArray imagePoints, OutputArrayOfArrays rvecs,
+    OutputArrayOfArrays tvecs)
+{
+    //Input checking
+    int objType = objectPoints.getMat().type();
+    CV_CheckType(objType, objType == CV_32FC3 || objType == CV_64FC3,
+        "Type of objectPoints must be CV_32FC3 or CV_64FC3");
+
+    int imgType = imagePoints.getMat().type();
+    CV_CheckType(imgType, imgType == CV_32FC2 || imgType == CV_64FC2,
+        "Type of imagePoints must be CV_32FC2 or CV_64FC2");
+
+    CV_Assert(objectPoints.rows() == 1 || objectPoints.cols() == 1);
+    CV_Assert(objectPoints.rows() >= 3 || objectPoints.cols() >= 3);
+    CV_Assert(imagePoints.rows() == 1 || imagePoints.cols() == 1);
+    CV_Assert(imagePoints.rows() * imagePoints.cols() == objectPoints.rows() * objectPoints.cols());
+
+    Mat _imagePoints;
+    if (imgType == CV_32FC2)
+    {
+        imagePoints.getMat().convertTo(_imagePoints, CV_64F);
+    }
+    else
+    {
+        _imagePoints = imagePoints.getMat();
+    }
+
+    Mat _objectPoints;
+    if (objType == CV_32FC3)
+    {
+        objectPoints.getMat().convertTo(_objectPoints, CV_64F);
+    }
+    else
+    {
+        _objectPoints = objectPoints.getMat();
+    }
+
+    num_null_vectors_ = -1;
+    num_solutions_ = 0;
+
+    computeOmega(_objectPoints, _imagePoints);
+    solveInternal();
+
+    int depthRot = rvecs.fixedType() ? rvecs.depth() : CV_64F;
+    int depthTrans = tvecs.fixedType() ? tvecs.depth() : CV_64F;
+
+    rvecs.create(num_solutions_, 1, CV_MAKETYPE(depthRot, rvecs.fixedType() && rvecs.kind() == _InputArray::STD_VECTOR ? 3 : 1));
+    tvecs.create(num_solutions_, 1, CV_MAKETYPE(depthTrans, tvecs.fixedType() && tvecs.kind() == _InputArray::STD_VECTOR ? 3 : 1));
+
+    for (int i = 0; i < num_solutions_; i++)
+    {
+
+        Mat rvec;
+        Mat rotation = Mat(solutions_[i].r_hat).reshape(1, 3);
+        Rodrigues(rotation, rvec);
+
+        rvecs.getMatRef(i) = rvec;
+        tvecs.getMatRef(i) = Mat(solutions_[i].t);
+    }
+}
+
+void PoseSolver::computeOmega(InputArray objectPoints, InputArray imagePoints)
+{
+    omega_ = cv::Matx<double, 9, 9>::zeros();
+    cv::Matx<double, 3, 9> qa_sum = cv::Matx<double, 3, 9>::zeros();
+
+    cv::Point2d sum_img(0, 0);
+    cv::Point3d sum_obj(0, 0, 0);
+    double sq_norm_sum = 0;
+
+    Mat _imagePoints = imagePoints.getMat();
+    Mat _objectPoints = objectPoints.getMat();
+
+    int n = _objectPoints.cols * _objectPoints.rows;
+
+    for (int i = 0; i < n; i++)
+    {
+        const cv::Point2d& img_pt = _imagePoints.at<cv::Point2d>(i);
+        const cv::Point3d& obj_pt = _objectPoints.at<cv::Point3d>(i);
+
+        sum_img += img_pt;
+        sum_obj += obj_pt;
+
+        const double& x = img_pt.x, & y = img_pt.y;
+        const double& X = obj_pt.x, & Y = obj_pt.y, & Z = obj_pt.z;
+        double sq_norm = x * x + y * y;
+        sq_norm_sum += sq_norm;
+
+        double X2 = X * X,
+            XY = X * Y,
+            XZ = X * Z,
+            Y2 = Y * Y,
+            YZ = Y * Z,
+            Z2 = Z * Z;
+
+        omega_(0, 0) += X2;
+        omega_(0, 1) += XY;
+        omega_(0, 2) += XZ;
+        omega_(1, 1) += Y2;
+        omega_(1, 2) += YZ;
+        omega_(2, 2) += Z2;
+
+
+        //Populating this manually saves operations by only calculating upper triangle
+        omega_(0, 6) += -x * X2; omega_(0, 7) += -x * XY; omega_(0, 8) += -x * XZ;
+        omega_(1, 7) += -x * Y2; omega_(1, 8) += -x * YZ;
+        omega_(2, 8) += -x * Z2;
+
+        omega_(3, 6) += -y * X2; omega_(3, 7) += -y * XY; omega_(3, 8) += -y * XZ;
+        omega_(4, 7) += -y * Y2; omega_(4, 8) += -y * YZ;
+        omega_(5, 8) += -y * Z2;
+
+
+        omega_(6, 6) += sq_norm * X2; omega_(6, 7) += sq_norm * XY; omega_(6, 8) += sq_norm * XZ;
+        omega_(7, 7) += sq_norm * Y2; omega_(7, 8) += sq_norm * YZ;
+        omega_(8, 8) += sq_norm * Z2;
+
+        //Compute qa_sum
+        qa_sum(0, 0) += X; qa_sum(0, 1) += Y; qa_sum(0, 2) += Z;
+        qa_sum(1, 3) += X; qa_sum(1, 4) += Y; qa_sum(1, 5) += Z;
+
+        qa_sum(0, 6) += -x * X; qa_sum(0, 7) += -x * Y; qa_sum(0, 8) += -x * Z;
+        qa_sum(1, 6) += -y * X; qa_sum(1, 7) += -y * Y; qa_sum(1, 8) += -y * Z;
+
+        qa_sum(2, 0) += -x * X; qa_sum(2, 1) += -x * Y; qa_sum(2, 2) += -x * Z;
+        qa_sum(2, 3) += -y * X; qa_sum(2, 4) += -y * Y; qa_sum(2, 5) += -y * Z;
+
+        qa_sum(2, 6) += sq_norm * X; qa_sum(2, 7) += sq_norm * Y; qa_sum(2, 8) += sq_norm * Z;
+    }
+
+
+    omega_(1, 6) = omega_(0, 7); omega_(2, 6) = omega_(0, 8); omega_(2, 7) = omega_(1, 8);
+    omega_(4, 6) = omega_(3, 7); omega_(5, 6) = omega_(3, 8); omega_(5, 7) = omega_(4, 8);
+    omega_(7, 6) = omega_(6, 7); omega_(8, 6) = omega_(6, 8); omega_(8, 7) = omega_(7, 8);
+
+
+    omega_(3, 3) = omega_(0, 0); omega_(3, 4) = omega_(0, 1); omega_(3, 5) = omega_(0, 2);
+    omega_(4, 4) = omega_(1, 1); omega_(4, 5) = omega_(1, 2);
+    omega_(5, 5) = omega_(2, 2);
+
+    //Mirror upper triangle to lower triangle
+    for (int r = 0; r < 9; r++)
+    {
+        for (int c = 0; c < r; c++)
+        {
+            omega_(r, c) = omega_(c, r);
+        }
+    }
+
+    cv::Matx<double, 3, 3> q;
+    q(0, 0) = n; q(0, 1) = 0; q(0, 2) = -sum_img.x;
+    q(1, 0) = 0; q(1, 1) = n; q(1, 2) = -sum_img.y;
+    q(2, 0) = -sum_img.x; q(2, 1) = -sum_img.y; q(2, 2) = sq_norm_sum;
+
+    double inv_n = 1.0 / n;
+    double detQ = n * (n * sq_norm_sum - sum_img.y * sum_img.y - sum_img.x * sum_img.x);
+    double point_coordinate_variance = detQ * inv_n * inv_n * inv_n;
+
+    CV_Assert(point_coordinate_variance >= POINT_VARIANCE_THRESHOLD);
+
+    Matx<double, 3, 3> q_inv;
+    analyticalInverse3x3Symm(q, q_inv);
+
+    p_ = -q_inv * qa_sum;
+
+    omega_ += qa_sum.t() * p_;
+
+    cv::SVD omega_svd(omega_, cv::SVD::FULL_UV);
+    s_ = omega_svd.w;
+    u_ = cv::Mat(omega_svd.vt.t());
+
+    CV_Assert(s_(0) >= 1e-7);
+
+    while (s_(7 - num_null_vectors_) < RANK_TOLERANCE) num_null_vectors_++;
+
+    CV_Assert(++num_null_vectors_ <= 6);
+
+    point_mean_ = cv::Vec3d(sum_obj.x / n, sum_obj.y / n, sum_obj.z / n);
+}
+
+void PoseSolver::solveInternal()
+{
+    double min_sq_err = std::numeric_limits<double>::max();
+    int num_eigen_points = num_null_vectors_ > 0 ? num_null_vectors_ : 1;
+
+    for (int i = 9 - num_eigen_points; i < 9; i++)
+    {
+        const cv::Matx<double, 9, 1> e = SQRT3 * u_.col(i);
+        double orthogonality_sq_err = orthogonalityError(e);
+
+        SQPSolution solutions[2];
+
+        //If e is orthogonal, we can skip SQP
+        if (orthogonality_sq_err < ORTHOGONALITY_SQUARED_ERROR_THRESHOLD)
+        {
+            solutions[0].r_hat = det3x3(e) * e;
+            solutions[0].t = p_ * solutions[0].r_hat;
+            checkSolution(solutions[0], min_sq_err);
+        }
+        else
+        {
+            Matx<double, 9, 1> r;
+            nearestRotationMatrix(e, r);
+            solutions[0] = runSQP(r);
+            solutions[0].t = p_ * solutions[0].r_hat;
+            checkSolution(solutions[0], min_sq_err);
+
+            nearestRotationMatrix(-e, r);
+            solutions[1] = runSQP(r);
+            solutions[1].t = p_ * solutions[1].r_hat;
+            checkSolution(solutions[1], min_sq_err);
+        }
+    }
+
+    int c = 1;
+
+    while (min_sq_err > 3 * s_[9 - num_eigen_points - c] && 9 - num_eigen_points - c > 0)
+    {
+        int index = 9 - num_eigen_points - c;
+
+        const cv::Matx<double, 9, 1> e = u_.col(index);
+        SQPSolution solutions[2];
+
+        Matx<double, 9, 1> r;
+        nearestRotationMatrix(e, r);
+        solutions[0] = runSQP(r);
+        solutions[0].t = p_ * solutions[0].r_hat;
+        checkSolution(solutions[0], min_sq_err);
+
+        nearestRotationMatrix(-e, r);
+        solutions[1] = runSQP(r);
+        solutions[1].t = p_ * solutions[1].r_hat;
+        checkSolution(solutions[1], min_sq_err);
+
+        c++;
+    }
+}
+
+PoseSolver::SQPSolution PoseSolver::runSQP(const cv::Matx<double, 9, 1>& r0)
+{
+    cv::Matx<double, 9, 1> r = r0;
+
+    double delta_squared_norm = std::numeric_limits<double>::max();
+    cv::Matx<double, 9, 1> delta;
+
+    int step = 0;
+    while (delta_squared_norm > SQP_SQUARED_TOLERANCE && step++ < SQP_MAX_ITERATION)
+    {
+        solveSQPSystem(r, delta);
+        r += delta;
+        delta_squared_norm = cv::norm(delta, cv::NORM_L2SQR);
+    }
+
+    SQPSolution solution;
+
+    double det_r = det3x3(r);
+    if (det_r < 0)
+    {
+        r = -r;
+        det_r = -det_r;
+    }
+
+    if (det_r > SQP_DET_THRESHOLD)
+    {
+        nearestRotationMatrix(r, solution.r_hat);
+    }
+    else
+    {
+        solution.r_hat = r;
+    }
+
+    return solution;
+}
+
+void PoseSolver::solveSQPSystem(const cv::Matx<double, 9, 1>& r, cv::Matx<double, 9, 1>& delta)
+{
+    double sqnorm_r1 = r(0) * r(0) + r(1) * r(1) + r(2) * r(2),
+        sqnorm_r2 = r(3) * r(3) + r(4) * r(4) + r(5) * r(5),
+        sqnorm_r3 = r(6) * r(6) + r(7) * r(7) + r(8) * r(8);
+    double dot_r1r2 = r(0) * r(3) + r(1) * r(4) + r(2) * r(5),
+        dot_r1r3 = r(0) * r(6) + r(1) * r(7) + r(2) * r(8),
+        dot_r2r3 = r(3) * r(6) + r(4) * r(7) + r(5) * r(8);
+
+    cv::Matx<double, 9, 3> N;
+    cv::Matx<double, 9, 6> H;
+    cv::Matx<double, 6, 6> JH;
+
+    computeRowAndNullspace(r, H, N, JH);
+
+    cv::Matx<double, 6, 1> g;
+    g(0) = 1 - sqnorm_r1; g(1) = 1 - sqnorm_r2; g(2) = 1 - sqnorm_r3; g(3) = -dot_r1r2; g(4) = -dot_r2r3; g(5) = -dot_r1r3;
+
+    cv::Matx<double, 6, 1> x;
+    x(0) = g(0) / JH(0, 0);
+    x(1) = g(1) / JH(1, 1);
+    x(2) = g(2) / JH(2, 2);
+    x(3) = (g(3) - JH(3, 0) * x(0) - JH(3, 1) * x(1)) / JH(3, 3);
+    x(4) = (g(4) - JH(4, 1) * x(1) - JH(4, 2) * x(2) - JH(4, 3) * x(3)) / JH(4, 4);
+    x(5) = (g(5) - JH(5, 0) * x(0) - JH(5, 2) * x(2) - JH(5, 3) * x(3) - JH(5, 4) * x(4)) / JH(5, 5);
+
+    delta = H * x;
+
+
+    cv::Matx<double, 3, 9> nt_omega = N.t() * omega_;
+    cv::Matx<double, 3, 3> W = nt_omega * N, W_inv;
+
+    analyticalInverse3x3Symm(W, W_inv);
+
+    cv::Matx<double, 3, 1> y = -W_inv * nt_omega * (delta + r);
+    delta += N * y;
+}
+
+bool PoseSolver::analyticalInverse3x3Symm(const cv::Matx<double, 3, 3>& Q,
+    cv::Matx<double, 3, 3>& Qinv,
+    const double& threshold)
+{
+    // 1. Get the elements of the matrix
+    double a = Q(0, 0),
+        b = Q(1, 0), d = Q(1, 1),
+        c = Q(2, 0), e = Q(2, 1), f = Q(2, 2);
+
+    // 2. Determinant
+    double t2, t4, t7, t9, t12;
+    t2 = e * e;
+    t4 = a * d;
+    t7 = b * b;
+    t9 = b * c;
+    t12 = c * c;
+    double det = -t4 * f + a * t2 + t7 * f - 2.0 * t9 * e + t12 * d;
+
+    if (fabs(det) < threshold) return false;
+
+    // 3. Inverse
+    double t15, t20, t24, t30;
+    t15 = 1.0 / det;
+    t20 = (-b * f + c * e) * t15;
+    t24 = (b * e - c * d) * t15;
+    t30 = (a * e - t9) * t15;
+    Qinv(0, 0) = (-d * f + t2) * t15;
+    Qinv(0, 1) = Qinv(1, 0) = -t20;
+    Qinv(0, 2) = Qinv(2, 0) = -t24;
+    Qinv(1, 1) = -(a * f - t12) * t15;
+    Qinv(1, 2) = Qinv(2, 1) = t30;
+    Qinv(2, 2) = -(t4 - t7) * t15;
+
+    return true;
+}
+
+void PoseSolver::computeRowAndNullspace(const cv::Matx<double, 9, 1>& r,
+    cv::Matx<double, 9, 6>& H,
+    cv::Matx<double, 9, 3>& N,
+    cv::Matx<double, 6, 6>& K,
+    const double& norm_threshold)
+{
+    H = cv::Matx<double, 9, 6>::zeros();
+
+    // 1. q1
+    double norm_r1 = sqrt(r(0) * r(0) + r(1) * r(1) + r(2) * r(2));
+    double inv_norm_r1 = norm_r1 > 1e-5 ? 1.0 / norm_r1 : 0.0;
+    H(0, 0) = r(0) * inv_norm_r1;
+    H(1, 0) = r(1) * inv_norm_r1;
+    H(2, 0) = r(2) * inv_norm_r1;
+    K(0, 0) = 2 * norm_r1;
+
+    // 2. q2
+    double norm_r2 = sqrt(r(3) * r(3) + r(4) * r(4) + r(5) * r(5));
+    double inv_norm_r2 = 1.0 / norm_r2;
+    H(3, 1) = r(3) * inv_norm_r2;
+    H(4, 1) = r(4) * inv_norm_r2;
+    H(5, 1) = r(5) * inv_norm_r2;
+    K(1, 0) = 0;
+    K(1, 1) = 2 * norm_r2;
+
+    // 3. q3 = (r3'*q2)*q2 - (r3'*q1)*q1 ; q3 = q3/norm(q3)
+    double norm_r3 = sqrt(r(6) * r(6) + r(7) * r(7) + r(8) * r(8));
+    double inv_norm_r3 = 1.0 / norm_r3;
+    H(6, 2) = r(6) * inv_norm_r3;
+    H(7, 2) = r(7) * inv_norm_r3;
+    H(8, 2) = r(8) * inv_norm_r3;
+    K(2, 0) = K(2, 1) = 0;
+    K(2, 2) = 2 * norm_r3;
+
+    // 4. q4
+    double dot_j4q1 = r(3) * H(0, 0) + r(4) * H(1, 0) + r(5) * H(2, 0),
+        dot_j4q2 = r(0) * H(3, 1) + r(1) * H(4, 1) + r(2) * H(5, 1);
+
+    H(0, 3) = r(3) - dot_j4q1 * H(0, 0);
+    H(1, 3) = r(4) - dot_j4q1 * H(1, 0);
+    H(2, 3) = r(5) - dot_j4q1 * H(2, 0);
+    H(3, 3) = r(0) - dot_j4q2 * H(3, 1);
+    H(4, 3) = r(1) - dot_j4q2 * H(4, 1);
+    H(5, 3) = r(2) - dot_j4q2 * H(5, 1);
+    double inv_norm_j4 = 1.0 / sqrt(H(0, 3) * H(0, 3) + H(1, 3) * H(1, 3) + H(2, 3) * H(2, 3) +
+        H(3, 3) * H(3, 3) + H(4, 3) * H(4, 3) + H(5, 3) * H(5, 3));
+
+    H(0, 3) *= inv_norm_j4;
+    H(1, 3) *= inv_norm_j4;
+    H(2, 3) *= inv_norm_j4;
+    H(3, 3) *= inv_norm_j4;
+    H(4, 3) *= inv_norm_j4;
+    H(5, 3) *= inv_norm_j4;
+
+    K(3, 0) = r(3) * H(0, 0) + r(4) * H(1, 0) + r(5) * H(2, 0);
+    K(3, 1) = r(0) * H(3, 1) + r(1) * H(4, 1) + r(2) * H(5, 1);
+    K(3, 2) = 0;
+    K(3, 3) = r(3) * H(0, 3) + r(4) * H(1, 3) + r(5) * H(2, 3) + r(0) * H(3, 3) + r(1) * H(4, 3) + r(2) * H(5, 3);
+
+    // 5. q5
+    double dot_j5q2 = r(6) * H(3, 1) + r(7) * H(4, 1) + r(8) * H(5, 1);
+    double dot_j5q3 = r(3) * H(6, 2) + r(4) * H(7, 2) + r(5) * H(8, 2);
+    double dot_j5q4 = r(6) * H(3, 3) + r(7) * H(4, 3) + r(8) * H(5, 3);
+
+    H(0, 4) = -dot_j5q4 * H(0, 3);
+    H(1, 4) = -dot_j5q4 * H(1, 3);
+    H(2, 4) = -dot_j5q4 * H(2, 3);
+    H(3, 4) = r(6) - dot_j5q2 * H(3, 1) - dot_j5q4 * H(3, 3);
+    H(4, 4) = r(7) - dot_j5q2 * H(4, 1) - dot_j5q4 * H(4, 3);
+    H(5, 4) = r(8) - dot_j5q2 * H(5, 1) - dot_j5q4 * H(5, 3);
+    H(6, 4) = r(3) - dot_j5q3 * H(6, 2); H(7, 4) = r(4) - dot_j5q3 * H(7, 2); H(8, 4) = r(5) - dot_j5q3 * H(8, 2);
+
+    Matx<double, 9, 1> q4 = H.col(4);
+    q4 /= cv::norm(q4);
+    set<double, 9, 1, 9, 6>(0, 4, H, q4);
+
+    K(4, 0) = 0;
+    K(4, 1) = r(6) * H(3, 1) + r(7) * H(4, 1) + r(8) * H(5, 1);
+    K(4, 2) = r(3) * H(6, 2) + r(4) * H(7, 2) + r(5) * H(8, 2);
+    K(4, 3) = r(6) * H(3, 3) + r(7) * H(4, 3) + r(8) * H(5, 3);
+    K(4, 4) = r(6) * H(3, 4) + r(7) * H(4, 4) + r(8) * H(5, 4) + r(3) * H(6, 4) + r(4) * H(7, 4) + r(5) * H(8, 4);
+
+
+    // 4. q6
+    double dot_j6q1 = r(6) * H(0, 0) + r(7) * H(1, 0) + r(8) * H(2, 0);
+    double dot_j6q3 = r(0) * H(6, 2) + r(1) * H(7, 2) + r(2) * H(8, 2);
+    double dot_j6q4 = r(6) * H(0, 3) + r(7) * H(1, 3) + r(8) * H(2, 3);
+    double dot_j6q5 = r(0) * H(6, 4) + r(1) * H(7, 4) + r(2) * H(8, 4) + r(6) * H(0, 4) + r(7) * H(1, 4) + r(8) * H(2, 4);
+
+    H(0, 5) = r(6) - dot_j6q1 * H(0, 0) - dot_j6q4 * H(0, 3) - dot_j6q5 * H(0, 4);
+    H(1, 5) = r(7) - dot_j6q1 * H(1, 0) - dot_j6q4 * H(1, 3) - dot_j6q5 * H(1, 4);
+    H(2, 5) = r(8) - dot_j6q1 * H(2, 0) - dot_j6q4 * H(2, 3) - dot_j6q5 * H(2, 4);
+
+    H(3, 5) = -dot_j6q5 * H(3, 4) - dot_j6q4 * H(3, 3);
+    H(4, 5) = -dot_j6q5 * H(4, 4) - dot_j6q4 * H(4, 3);
+    H(5, 5) = -dot_j6q5 * H(5, 4) - dot_j6q4 * H(5, 3);
+
+    H(6, 5) = r(0) - dot_j6q3 * H(6, 2) - dot_j6q5 * H(6, 4);
+    H(7, 5) = r(1) - dot_j6q3 * H(7, 2) - dot_j6q5 * H(7, 4);
+    H(8, 5) = r(2) - dot_j6q3 * H(8, 2) - dot_j6q5 * H(8, 4);
+
+    Matx<double, 9, 1> q5 = H.col(5);
+    q5 /= cv::norm(q5);
+    set<double, 9, 1, 9, 6>(0, 5, H, q5);
+
+    K(5, 0) = r(6) * H(0, 0) + r(7) * H(1, 0) + r(8) * H(2, 0);
+    K(5, 1) = 0; K(5, 2) = r(0) * H(6, 2) + r(1) * H(7, 2) + r(2) * H(8, 2);
+    K(5, 3) = r(6) * H(0, 3) + r(7) * H(1, 3) + r(8) * H(2, 3);
+    K(5, 4) = r(6) * H(0, 4) + r(7) * H(1, 4) + r(8) * H(2, 4) + r(0) * H(6, 4) + r(1) * H(7, 4) + r(2) * H(8, 4);
+    K(5, 5) = r(6) * H(0, 5) + r(7) * H(1, 5) + r(8) * H(2, 5) + r(0) * H(6, 5) + r(1) * H(7, 5) + r(2) * H(8, 5);
+
+    // Great! Now H is an orthogonalized, sparse basis of the Jacobian row space and K is filled.
+    //
+    // Now get a projector onto the null space H:
+    const cv::Matx<double, 9, 9> Pn = cv::Matx<double, 9, 9>::eye() - (H * H.t());
+
+    // Now we need to pick 3 columns of P with non-zero norm (> 0.3) and some angle between them (> 0.3).
+    //
+    // Find the 3 columns of Pn with largest norms
+    int index1 = 0,
+        index2 = 0,
+        index3 = 0;
+    double  max_norm1 = std::numeric_limits<double>::min();
+    double min_dot12 = std::numeric_limits<double>::max();
+    double min_dot1323 = std::numeric_limits<double>::max();
+
+
+    double col_norms[9];
+    for (int i = 0; i < 9; i++)
+    {
+        col_norms[i] = cv::norm(Pn.col(i));
+        if (col_norms[i] >= norm_threshold)
+        {
+            if (max_norm1 < col_norms[i])
+            {
+                max_norm1 = col_norms[i];
+                index1 = i;
+            }
+        }
+    }
+
+    Matx<double, 9, 1> v1 = Pn.col(index1);
+    v1 /= max_norm1;
+    set<double, 9, 1, 9, 3>(0, 0, N, v1);
+
+    for (int i = 0; i < 9; i++)
+    {
+        if (i == index1) continue;
+        if (col_norms[i] >= norm_threshold)
+        {
+            double cos_v1_x_col = fabs(Pn.col(i).dot(v1) / col_norms[i]);
+
+            if (cos_v1_x_col <= min_dot12)
+            {
+                index2 = i;
+                min_dot12 = cos_v1_x_col;
+            }
+        }
+    }
+
+    Matx<double, 9, 1> v2 = Pn.col(index2);
+    Matx<double, 9, 1> n0 = N.col(0);
+    v2 -= v2.dot(n0) * n0;
+    v2 /= cv::norm(v2);
+    set<double, 9, 1, 9, 3>(0, 1, N, v2);
+
+    for (int i = 0; i < 9; i++)
+    {
+        if (i == index2 || i == index1) continue;
+        if (col_norms[i] >= norm_threshold)
+        {
+            double cos_v1_x_col = fabs(Pn.col(i).dot(v1) / col_norms[i]);
+            double cos_v2_x_col = fabs(Pn.col(i).dot(v2) / col_norms[i]);
+
+            if (cos_v1_x_col + cos_v2_x_col <= min_dot1323)
+            {
+                index3 = i;
+                min_dot1323 = cos_v2_x_col + cos_v2_x_col;
+            }
+        }
+    }
+
+    Matx<double, 9, 1> v3 = Pn.col(index3);
+    Matx<double, 9, 1> n1 = N.col(1);
+    v3 -= (v3.dot(n1)) * n1 - (v3.dot(n0)) * n0;
+    v3 /= cv::norm(v3);
+    set<double, 9, 1, 9, 3>(0, 2, N, v3);
+
+}
+
+// faster nearest rotation computation based on FOAM (see: http://users.ics.forth.gr/~lourakis/publ/2018_iros.pdf )
+/* Solve the nearest orthogonal approximation problem
+    * i.e., given e, find R minimizing ||R-e||_F
+    *
+    * The computation borrows from Markley's FOAM algorithm
+    * "Attitude Determination Using Vector Observations: A Fast Optimal Matrix Algorithm", J. Astronaut. Sci.
+    *
+    * See also M. Lourakis: "An Efficient Solution to Absolute Orientation", ICPR 2016
+    *
+    *  Copyright (C) 2019 Manolis Lourakis (lourakis **at** ics forth gr)
+    *  Institute of Computer Science, Foundation for Research & Technology - Hellas
+    *  Heraklion, Crete, Greece.
+    */
+void PoseSolver::nearestRotationMatrix(const cv::Matx<double, 9, 1>& e,
+    cv::Matx<double, 9, 1>& r)
+{
+    int i;
+    double l, lprev, det_e, e_sq, adj_e_sq, adj_e[9];
+
+    // e's adjoint
+    adj_e[0] = e(4) * e(8) - e(5) * e(7); adj_e[1] = e(2) * e(7) - e(1) * e(8); adj_e[2] = e(1) * e(5) - e(2) * e(4);
+    adj_e[3] = e(5) * e(6) - e(3) * e(8); adj_e[4] = e(0) * e(8) - e(2) * e(6); adj_e[5] = e(2) * e(3) - e(0) * e(5);
+    adj_e[6] = e(3) * e(7) - e(4) * e(6); adj_e[7] = e(1) * e(6) - e(0) * e(7); adj_e[8] = e(0) * e(4) - e(1) * e(3);
+
+    // det(e), ||e||^2, ||adj(e)||^2
+    det_e = e(0) * e(4) * e(8) - e(0) * e(5) * e(7) - e(1) * e(3) * e(8) + e(2) * e(3) * e(7) + e(1) * e(6) * e(5) - e(2) * e(6) * e(4);
+    e_sq = e(0) * e(0) + e(1) * e(1) + e(2) * e(2) + e(3) * e(3) + e(4) * e(4) + e(5) * e(5) + e(6) * e(6) + e(7) * e(7) + e(8) * e(8);
+    adj_e_sq = adj_e[0] * adj_e[0] + adj_e[1] * adj_e[1] + adj_e[2] * adj_e[2] + adj_e[3] * adj_e[3] + adj_e[4] * adj_e[4] + adj_e[5] * adj_e[5] + adj_e[6] * adj_e[6] + adj_e[7] * adj_e[7] + adj_e[8] * adj_e[8];
+
+    // compute l_max with Newton-Raphson from FOAM's characteristic polynomial, i.e. eq.(23) - (26)
+    for (i = 200, l = 2.0, lprev = 0.0; fabs(l - lprev) > 1E-12 * fabs(lprev) && i > 0; --i) {
+        double tmp, p, pp;
+
+        tmp = (l * l - e_sq);
+        p = (tmp * tmp - 8.0 * l * det_e - 4.0 * adj_e_sq);
+        pp = 8.0 * (0.5 * tmp * l - det_e);
+
+        lprev = l;
+        l -= p / pp;
+    }
+
+    // the rotation matrix equals ((l^2 + e_sq)*e + 2*l*adj(e') - 2*e*e'*e) / (l*(l*l-e_sq) - 2*det(e)), i.e. eq.(14) using (18), (19)
+    {
+        // compute (l^2 + e_sq)*e
+        double tmp[9], e_et[9], denom;
+        const double a = l * l + e_sq;
+
+        // e_et=e*e'
+        e_et[0] = e(0) * e(0) + e(1) * e(1) + e(2) * e(2);
+        e_et[1] = e(0) * e(3) + e(1) * e(4) + e(2) * e(5);
+        e_et[2] = e(0) * e(6) + e(1) * e(7) + e(2) * e(8);
+
+        e_et[3] = e_et[1];
+        e_et[4] = e(3) * e(3) + e(4) * e(4) + e(5) * e(5);
+        e_et[5] = e(3) * e(6) + e(4) * e(7) + e(5) * e(8);
+
+        e_et[6] = e_et[2];
+        e_et[7] = e_et[5];
+        e_et[8] = e(6) * e(6) + e(7) * e(7) + e(8) * e(8);
+
+        // tmp=e_et*e
+        tmp[0] = e_et[0] * e(0) + e_et[1] * e(3) + e_et[2] * e(6);
+        tmp[1] = e_et[0] * e(1) + e_et[1] * e(4) + e_et[2] * e(7);
+        tmp[2] = e_et[0] * e(2) + e_et[1] * e(5) + e_et[2] * e(8);
+
+        tmp[3] = e_et[3] * e(0) + e_et[4] * e(3) + e_et[5] * e(6);
+        tmp[4] = e_et[3] * e(1) + e_et[4] * e(4) + e_et[5] * e(7);
+        tmp[5] = e_et[3] * e(2) + e_et[4] * e(5) + e_et[5] * e(8);
+
+        tmp[6] = e_et[6] * e(0) + e_et[7] * e(3) + e_et[8] * e(6);
+        tmp[7] = e_et[6] * e(1) + e_et[7] * e(4) + e_et[8] * e(7);
+        tmp[8] = e_et[6] * e(2) + e_et[7] * e(5) + e_et[8] * e(8);
+
+        // compute R as (a*e + 2*(l*adj(e)' - tmp))*denom; note that adj(e')=adj(e)'
+        denom = l * (l * l - e_sq) - 2.0 * det_e;
+        denom = 1.0 / denom;
+        r(0) = (a * e(0) + 2.0 * (l * adj_e[0] - tmp[0])) * denom;
+        r(1) = (a * e(1) + 2.0 * (l * adj_e[3] - tmp[1])) * denom;
+        r(2) = (a * e(2) + 2.0 * (l * adj_e[6] - tmp[2])) * denom;
+
+        r(3) = (a * e(3) + 2.0 * (l * adj_e[1] - tmp[3])) * denom;
+        r(4) = (a * e(4) + 2.0 * (l * adj_e[4] - tmp[4])) * denom;
+        r(5) = (a * e(5) + 2.0 * (l * adj_e[7] - tmp[5])) * denom;
+
+        r(6) = (a * e(6) + 2.0 * (l * adj_e[2] - tmp[6])) * denom;
+        r(7) = (a * e(7) + 2.0 * (l * adj_e[5] - tmp[7])) * denom;
+        r(8) = (a * e(8) + 2.0 * (l * adj_e[8] - tmp[8])) * denom;
+    }
+}
+
+double PoseSolver::det3x3(const cv::Matx<double, 9, 1>& e)
+{
+    return e(0) * e(4) * e(8) + e(1) * e(5) * e(6) + e(2) * e(3) * e(7)
+        - e(6) * e(4) * e(2) - e(7) * e(5) * e(0) - e(8) * e(3) * e(1);
+}
+
+inline bool PoseSolver::positiveDepth(const SQPSolution& solution) const
+{
+    const cv::Matx<double, 9, 1>& r = solution.r_hat;
+    const cv::Matx<double, 3, 1>& t = solution.t;
+    const cv::Vec3d& mean = point_mean_;
+    return (r(6) * mean(0) + r(7) * mean(1) + r(8) * mean(2) + t(2) > 0);
+}
+
+void PoseSolver::checkSolution(SQPSolution& solution, double& min_error)
+{
+    if (positiveDepth(solution))
+    {
+        solution.sq_error = (omega_ * solution.r_hat).ddot(solution.r_hat);
+        if (fabs(min_error - solution.sq_error) > EQUAL_SQUARED_ERRORS_DIFF)
+        {
+            if (min_error > solution.sq_error)
+            {
+                min_error = solution.sq_error;
+                solutions_[0] = solution;
+                num_solutions_ = 1;
+            }
+        }
+        else
+        {
+            bool found = false;
+            for (int i = 0; i < num_solutions_; i++)
+            {
+                if (cv::norm(solutions_[i].r_hat - solution.r_hat, cv::NORM_L2SQR) < EQUAL_VECTORS_SQUARED_DIFF)
+                {
+                    if (solutions_[i].sq_error > solution.sq_error)
+                    {
+                        solutions_[i] = solution;
+                    }
+                    found = true;
+                    break;
+                }
+            }
+
+            if (!found)
+            {
+                solutions_[num_solutions_++] = solution;
+            }
+            if (min_error > solution.sq_error) min_error = solution.sq_error;
+        }
+    }
+}
+
+double PoseSolver::orthogonalityError(const cv::Matx<double, 9, 1>& e)
+{
+    double sq_norm_e1 = e(0) * e(0) + e(1) * e(1) + e(2) * e(2);
+    double sq_norm_e2 = e(3) * e(3) + e(4) * e(4) + e(5) * e(5);
+    double sq_norm_e3 = e(6) * e(6) + e(7) * e(7) + e(8) * e(8);
+    double dot_e1e2 = e(0) * e(3) + e(1) * e(4) + e(2) * e(5);
+    double dot_e1e3 = e(0) * e(6) + e(1) * e(7) + e(2) * e(8);
+    double dot_e2e3 = e(3) * e(6) + e(4) * e(7) + e(5) * e(8);
+
+    return (sq_norm_e1 - 1) * (sq_norm_e1 - 1) + (sq_norm_e2 - 1) * (sq_norm_e2 - 1) + (sq_norm_e3 - 1) * (sq_norm_e3 - 1) +
+        2 * (dot_e1e2 * dot_e1e2 + dot_e1e3 * dot_e1e3 + dot_e2e3 * dot_e2e3);
+}
+
+}
+}
diff --git a/modules/calib3d/src/sqpnp.hpp b/modules/calib3d/src/sqpnp.hpp
new file mode 100644
index 0000000000..f8136324c9
--- /dev/null
+++ b/modules/calib3d/src/sqpnp.hpp
@@ -0,0 +1,194 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This file is based on file issued with the following license:
+
+/*
+BSD 3-Clause License
+
+Copyright (c) 2020, George Terzakis
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OPENCV_CALIB3D_SQPNP_HPP
+#define OPENCV_CALIB3D_SQPNP_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace sqpnp {
+
+
+class PoseSolver {
+public:
+    /**
+    * @brief PoseSolver constructor
+    */
+    PoseSolver();
+
+    /**
+    * @brief                Finds the possible poses of a camera given a set of 3D points
+    *                       and their corresponding 2D image projections. The poses are
+    *                       sorted by lowest squared error (which corresponds to lowest
+    *                       reprojection error).
+    * @param objectPoints   Array or vector of 3 or more 3D points defined in object coordinates.
+    *                       1xN/Nx1 3-channel (float or double) where N is the number of points.
+    * @param imagePoints    Array or vector of corresponding 2D points, 1xN/Nx1 2-channel.
+    * @param rvec           The output rotation solutions (up to 18 3x1 rotation vectors)
+    * @param tvec           The output translation solutions (up to 18 3x1 vectors)
+    */
+    void solve(InputArray objectPoints, InputArray imagePoints, OutputArrayOfArrays rvec,
+        OutputArrayOfArrays tvec);
+
+private:
+    struct SQPSolution
+    {
+        cv::Matx<double, 9, 1> r_hat;
+        cv::Matx<double, 3, 1> t;
+        double sq_error;
+    };
+
+    /*
+    * @brief                Computes the 9x9 PSD Omega matrix and supporting matrices.
+    * @param objectPoints   Array or vector of 3 or more 3D points defined in object coordinates.
+    *                       1xN/Nx1 3-channel (float or double) where N is the number of points.
+    * @param imagePoints    Array or vector of corresponding 2D points, 1xN/Nx1 2-channel.
+    */
+    void computeOmega(InputArray objectPoints, InputArray imagePoints);
+
+    /*
+    * @brief                Computes the 9x9 PSD Omega matrix and supporting matrices.
+    */
+    void solveInternal();
+
+    /*
+    * @brief                Produces the distance from being orthogonal for a given 3x3 matrix
+    *                       in row-major form.
+    * @param e              The vector to test representing a 3x3 matrix in row major form.
+    * @return               The distance the matrix is from being orthogonal.
+    */
+    static double orthogonalityError(const cv::Matx<double, 9, 1>& e);
+
+    /*
+    * @brief                Processes a solution and sorts it by error.
+    * @param solution       The solution to evaluate.
+    * @param min_error          The current minimum error.
+    */
+    void checkSolution(SQPSolution& solution, double& min_error);
+
+    /*
+    * @brief                Computes the determinant of a matrix stored in row-major format.
+    * @param e              Vector representing a 3x3 matrix stored in row-major format.
+    * @return               The determinant of the matrix.
+    */
+    static double det3x3(const cv::Matx<double, 9, 1>& e);
+
+    /*
+    * @brief                Tests the cheirality for a given solution.
+    * @param solution       The solution to evaluate.
+    */
+    inline bool positiveDepth(const SQPSolution& solution) const;
+
+    /*
+    * @brief                Determines the nearest rotation matrix to a given rotaiton matrix.
+    *                       Input and output are 9x1 vector representing a vector stored in row-major
+    *                       form.
+    * @param e              The input 3x3 matrix stored in a vector in row-major form.
+    * @param r              The nearest rotation matrix to the input e (again in row-major form).
+    */
+    static void nearestRotationMatrix(const cv::Matx<double, 9, 1>& e,
+        cv::Matx<double, 9, 1>& r);
+
+    /*
+    * @brief                Runs the sequential quadratic programming on orthogonal matrices.
+    * @param r0             The start point of the solver.
+    */
+    SQPSolution runSQP(const cv::Matx<double, 9, 1>& r0);
+
+    /*
+    * @brief                Steps down the gradient for the given matrix r to solve the SQP system.
+    * @param r              The current matrix step.
+    * @param delta          The next step down the gradient.
+    */
+    void solveSQPSystem(const cv::Matx<double, 9, 1>& r, cv::Matx<double, 9, 1>& delta);
+
+    /*
+    * @brief                Analytically computes the inverse of a symmetric 3x3 matrix using the
+    *                       lower triangle.
+    * @param Q              The matrix to invert.
+    * @param Qinv           The inverse of Q.
+    * @param threshold      The threshold to determine if Q is singular and non-invertible.
+    */
+    bool analyticalInverse3x3Symm(const cv::Matx<double, 3, 3>& Q,
+        cv::Matx<double, 3, 3>& Qinv,
+        const double& threshold = 1e-8);
+
+    /*
+    * @brief                Computes the 3D null space and 6D normal space of the constraint Jacobian
+    *                       at a 9D vector r (representing a rank-3 matrix). Note that K is lower
+    *                       triangular so upper triangle is undefined.
+    * @param r              9D vector representing a rank-3 matrix.
+    * @param H              6D row space of the constraint Jacobian at r.
+    * @param N              3D null space of the constraint Jacobian at r.
+    * @param K              The constraint Jacobian at r.
+    * @param norm_threshold Threshold for column vector norm of Pn (the projection onto the null space
+    *                       of the constraint Jacobian).
+    */
+    void computeRowAndNullspace(const cv::Matx<double, 9, 1>& r,
+        cv::Matx<double, 9, 6>& H,
+        cv::Matx<double, 9, 3>& N,
+        cv::Matx<double, 6, 6>& K,
+        const double& norm_threshold = 0.1);
+
+    static const double RANK_TOLERANCE;
+    static const double SQP_SQUARED_TOLERANCE;
+    static const double SQP_DET_THRESHOLD;
+    static const double ORTHOGONALITY_SQUARED_ERROR_THRESHOLD;
+    static const double EQUAL_VECTORS_SQUARED_DIFF;
+    static const double EQUAL_SQUARED_ERRORS_DIFF;
+    static const double POINT_VARIANCE_THRESHOLD;
+    static const int SQP_MAX_ITERATION;
+    static const double SQRT3;
+
+    cv::Matx<double, 9, 9> omega_;
+    cv::Vec<double, 9> s_;
+    cv::Matx<double, 9, 9> u_;
+    cv::Matx<double, 3, 9> p_;
+    cv::Vec3d point_mean_;
+    int num_null_vectors_;
+
+    SQPSolution solutions_[18];
+    int num_solutions_;
+
+};
+
+}
+}
+
+#endif
diff --git a/modules/calib3d/src/usac.hpp b/modules/calib3d/src/usac.hpp
index c18de92479..06a0ff2056 100644
--- a/modules/calib3d/src/usac.hpp
+++ b/modules/calib3d/src/usac.hpp
@@ -421,7 +421,7 @@ struct SPRT_history {
     double epsilon, delta, A;
     // number of samples processed by test
     int tested_samples; // k
-    SPRT_history () {
+    SPRT_history () : epsilon(0), delta(0), A(0) {
         tested_samples = 0;
     }
 };
diff --git a/modules/calib3d/src/usac/ransac_solvers.cpp b/modules/calib3d/src/usac/ransac_solvers.cpp
index 65fa2d3b9f..0c7637d582 100644
--- a/modules/calib3d/src/usac/ransac_solvers.cpp
+++ b/modules/calib3d/src/usac/ransac_solvers.cpp
@@ -286,7 +286,7 @@ public:
                             current_score = quality->getScore(models[i]);
                         } else {
                             if (is_magsac && iters % repeat_magsac == 0) {
-                                if (!local_optimization->refineModel
+                                if (local_optimization && !local_optimization->refineModel
                                         (models[i], best_score_thread, models[i], current_score))
                                     continue;
                             } else if (model_verifier->isModelGood(models[i])) {
@@ -1028,4 +1028,4 @@ bool run (const Ptr<const Model> &params, InputArray points1, InputArray points2
     }
     return false;
 }
-}}
\ No newline at end of file
+}}
diff --git a/modules/calib3d/test/test_solvepnp_ransac.cpp b/modules/calib3d/test/test_solvepnp_ransac.cpp
index 0d35fa7126..fb0e2965e6 100644
--- a/modules/calib3d/test/test_solvepnp_ransac.cpp
+++ b/modules/calib3d/test/test_solvepnp_ransac.cpp
@@ -190,6 +190,8 @@ static std::string printMethod(int method)
         return "SOLVEPNP_IPPE";
     case 7:
         return "SOLVEPNP_IPPE_SQUARE";
+    case 8:
+        return "SOLVEPNP_SQPNP";
     default:
         return "Unknown value";
     }
@@ -206,6 +208,7 @@ public:
         eps[SOLVEPNP_AP3P] = 1.0e-2;
         eps[SOLVEPNP_DLS] = 1.0e-2;
         eps[SOLVEPNP_UPNP] = 1.0e-2;
+        eps[SOLVEPNP_SQPNP] = 1.0e-2;
         totalTestsCount = 10;
         pointsCount = 500;
     }
@@ -436,6 +439,7 @@ public:
         eps[SOLVEPNP_UPNP] = 1.0e-6; //UPnP is remapped to EPnP, so we use the same threshold
         eps[SOLVEPNP_IPPE] = 1.0e-6;
         eps[SOLVEPNP_IPPE_SQUARE] = 1.0e-6;
+        eps[SOLVEPNP_SQPNP] = 1.0e-6;
 
         totalTestsCount = 1000;
 
diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index adbe3727a4..50af505968 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -203,6 +203,9 @@ enum CovarFlags {
     COVAR_COLS      = 16
 };
 
+//! @addtogroup core_cluster
+//!  @{
+
 //! k-Means flags
 enum KmeansFlags {
     /** Select random initial centers in each attempt.*/
@@ -216,12 +219,18 @@ enum KmeansFlags {
     KMEANS_USE_INITIAL_LABELS = 1
 };
 
+//! @} core_cluster
+
+//! @addtogroup core_array
+//! @{
+
 enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix.
                    REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix.
                    REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix.
                    REDUCE_MIN = 3  //!< the output is the minimum (column/row-wise) of all rows/columns of the matrix.
                  };
 
+//! @} core_array
 
 /** @brief Swaps two matrices
 */
diff --git a/modules/core/include/opencv2/core/affine.hpp b/modules/core/include/opencv2/core/affine.hpp
index 7e2ed30785..1806382e99 100644
--- a/modules/core/include/opencv2/core/affine.hpp
+++ b/modules/core/include/opencv2/core/affine.hpp
@@ -499,7 +499,7 @@ typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
     double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
     double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
     c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
-    double theta = acos(c);
+    double theta = std::acos(c);
 
     if( s < 1e-5 )
     {
diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
index eb3f8693c2..ef2b31ac18 100644
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@@ -220,6 +220,11 @@ struct VZeroUpperGuard {
 #  define CV_VSX 1
 #endif
 
+#ifdef __F16C__
+#  include <immintrin.h>
+#  define CV_FP16 1
+#endif
+
 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
 
 
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index f7a11a29fd..1bfaa82c34 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -90,7 +90,7 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
 // keep current value (through OpenCV port file)
 #elif defined __GNUC__ || (defined (__cpluscplus) && (__cpluscplus >= 201103))
 #define CV_Func __func__
-#elif defined __clang__ && (__clang_minor__ * 100 + __clang_major >= 305)
+#elif defined __clang__ && (__clang_minor__ * 100 + __clang_major__ >= 305)
 #define CV_Func __func__
 #elif defined(__STDC_VERSION__) && (__STDC_VERSION >= 199901)
 #define CV_Func __func__
@@ -844,7 +844,7 @@ protected:
     float16_t() : w(0) {}
     explicit float16_t(float x)
     {
-    #if CV_AVX2
+    #if CV_FP16
         __m128 v = _mm_load_ss(&x);
         w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
     #else
@@ -875,7 +875,7 @@ protected:
 
     operator float() const
     {
-    #if CV_AVX2
+    #if CV_FP16
         float f;
         _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
         return f;
diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
index 5dc5bb567d..54e8927192 100644
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@@ -3121,18 +3121,39 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, un
 OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
 OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
 
+//
 // FP16
+//
+
 inline v_float32x8 v256_load_expand(const float16_t* ptr)
 {
+#if CV_FP16
     return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    for (int i = 0; i < 8; i++)
+        buf[i] = (float)ptr[i];
+    return v256_load_aligned(buf);
+#endif
 }
 
 inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
 {
+#if CV_FP16
     __m128i ah = _mm256_cvtps_ph(a.val, 0);
     _mm_storeu_si128((__m128i*)ptr, ah);
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    v_store_aligned(buf, a);
+    for (int i = 0; i < 8; i++)
+        ptr[i] = float16_t(buf[i]);
+#endif
 }
 
+//
+// end of FP16
+//
+
 inline void v256_cleanup() { _mm256_zeroall(); }
 
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp
index d1bfb6da6d..ef928f6a5c 100644
--- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp
@@ -207,13 +207,7 @@ struct v_uint64x2
 
     uint64 get0() const
     {
-#ifdef __wasm_unimplemented_simd128__
         return (uint64)wasm_i64x2_extract_lane(val, 0);
-#else
-        uint64 des[2];
-        wasm_v128_store(des, val);
-        return des[0];
-#endif
     }
 
     v128_t val;
@@ -235,13 +229,7 @@ struct v_int64x2
 
     int64 get0() const
     {
-#ifdef __wasm_unimplemented_simd128__
         return wasm_i64x2_extract_lane(val, 0);
-#else
-        int64 des[2];
-        wasm_v128_store(des, val);
-        return des[0];
-#endif
     }
 
     v128_t val;
@@ -263,13 +251,7 @@ struct v_float64x2
 
     double get0() const
     {
-#ifdef __wasm_unimplemented_simd128__
         return wasm_f64x2_extract_lane(val, 0);
-#else
-        double des[2];
-        wasm_v128_store(des, val);
-        return des[0];
-#endif
     }
 
     v128_t val;
@@ -1797,22 +1779,9 @@ OPENCV_HAL_IMPL_WASM_INITVEC(v_int16x8, short, s16, i16x8, short)
 OPENCV_HAL_IMPL_WASM_INITVEC(v_uint32x4, unsigned, u32, i32x4, int)
 OPENCV_HAL_IMPL_WASM_INITVEC(v_int32x4, int, s32, i32x4, int)
 OPENCV_HAL_IMPL_WASM_INITVEC(v_float32x4, float, f32, f32x4, float)
-
-#ifdef __wasm_unimplemented_simd128__
 OPENCV_HAL_IMPL_WASM_INITVEC(v_uint64x2, uint64, u64, i64x2, int64)
 OPENCV_HAL_IMPL_WASM_INITVEC(v_int64x2, int64, s64, i64x2, int64)
 OPENCV_HAL_IMPL_WASM_INITVEC(v_float64x2, double, f64, f64x2, double)
-#else
-#define OPENCV_HAL_IMPL_FALLBACK_INITVEC(_Tpvec, _Tp, suffix, _Tps) \
-inline _Tpvec v_setzero_##suffix() { return _Tpvec((_Tps)0, (_Tps)0); } \
-inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec((_Tps)v, (_Tps)v); } \
-template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
-{ return _Tpvec(a.val); }
-
-OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_uint64x2, uint64, u64, int64)
-OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_int64x2, int64, s64, int64)
-OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_float64x2, double, f64, double)
-#endif
 
 //////////////// PACK ///////////////
 inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
@@ -1931,28 +1900,18 @@ inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
 template<int n>
 inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
     v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
     v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n);
     return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
-#else
-    fallback::v_uint64x2 a_(a), b_(b);
-    return fallback::v_rshr_pack<n>(a_, b_);
-#endif
 }
 template<int n>
 inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
     v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
     v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n);
     return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
-#else
-    fallback::v_int64x2 a_(a), b_(b);
-    return fallback::v_rshr_pack<n>(a_, b_);
-#endif
 }
 template<int n>
 inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
@@ -2139,7 +2098,6 @@ inline void v_rshr_pack_store(short* ptr, const v_int32x4& a)
 template<int n>
 inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
     v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
     v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
@@ -2148,15 +2106,10 @@ inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
     for (int i=0; i<2; ++i) {
         ptr[i] = t_ptr[i];
     }
-#else
-    fallback::v_uint64x2 _a(a);
-    fallback::v_rshr_pack_store<n>(ptr, _a);
-#endif
 }
 template<int n>
 inline void v_rshr_pack_store(int* ptr, const v_int64x2& a)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
     v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
     v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
@@ -2165,10 +2118,6 @@ inline void v_rshr_pack_store(int* ptr, const v_int64x2& a)
     for (int i=0; i<2; ++i) {
         ptr[i] = t_ptr[i];
     }
-#else
-    fallback::v_int64x2 _a(a);
-    fallback::v_rshr_pack_store<n>(ptr, _a);
-#endif
 }
 template<int n>
 inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
@@ -2228,7 +2177,6 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin
                            const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
                            const v_uint64x2& g, const v_uint64x2& h)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t maxval = wasm_i32x4_splat(255);
     v128_t a1 = wasm_v128_bitselect(maxval, a.val, ((__u64x2)(a.val) > (__u64x2)maxval));
     v128_t b1 = wasm_v128_bitselect(maxval, b.val, ((__u64x2)(b.val) > (__u64x2)maxval));
@@ -2245,10 +2193,6 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin
     v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
     v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
     return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
-#else
-    fallback::v_uint64x2 a_(a), b_(b), c_(c), d_(d), e_(e), f_(f), g_(g), h_(h);
-    return fallback::v_pack_b(a_, b_, c_, d_, e_, f_, g_, h_);
-#endif
 }
 
 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
@@ -2310,8 +2254,6 @@ OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float32x4, wasm_f32x4_add)
 OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float32x4, wasm_f32x4_sub)
 OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float32x4, wasm_f32x4_mul)
 OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float32x4, wasm_f32x4_div)
-
-#ifdef __wasm_unimplemented_simd128__
 OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint64x2, wasm_i64x2_add)
 OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint64x2, wasm_i64x2_sub)
 OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int64x2, wasm_i64x2_add)
@@ -2320,30 +2262,6 @@ OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float64x2, wasm_f64x2_add)
 OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float64x2, wasm_f64x2_sub)
 OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float64x2, wasm_f64x2_mul)
 OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float64x2, wasm_f64x2_div)
-#else
-#define OPENCV_HAL_IMPL_FALLBACK_BIN_OP(bin_op, _Tpvec) \
-inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
-{ \
-    fallback::_Tpvec a_(a), b_(b); \
-    return _Tpvec((a_) bin_op (b_)); \
-} \
-inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
-{ \
-    fallback::_Tpvec a_(a), b_(b); \
-    a_ bin_op##= b_; \
-    a = _Tpvec(a_); \
-    return a; \
-}
-
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_uint64x2)
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_uint64x2)
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_int64x2)
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_int64x2)
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_float64x2)
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_float64x2)
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(*, v_float64x2)
-OPENCV_HAL_IMPL_FALLBACK_BIN_OP(/, v_float64x2)
-#endif
 
 // saturating multiply 8-bit, 16-bit
 #define OPENCV_HAL_IMPL_WASM_MUL_SAT(_Tpvec, _Tpwvec)        \
@@ -2405,19 +2323,11 @@ inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
 inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
                          v_uint64x2& c, v_uint64x2& d)
 {
-#ifdef __wasm_unimplemented_simd128__
     v_uint64x2 a0, a1, b0, b1;
     v_expand(a, a0, a1);
     v_expand(b, b0, b1);
     c.val = ((__u64x2)(a0.val) * (__u64x2)(b0.val));
     d.val = ((__u64x2)(a1.val) * (__u64x2)(b1.val));
-#else
-    fallback::v_uint32x4 a_(a), b_(b);
-    fallback::v_uint64x2 c_, d_;
-    fallback::v_mul_expand(a_, b_, c_, d_);
-    c = v_uint64x2(c_);
-    d = v_uint64x2(d_);
-#endif
 }
 
 inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
@@ -2457,7 +2367,6 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32
 
 inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t a0 = wasm_i64x2_shr(wasm_i64x2_shl(a.val, 32), 32);
     v128_t a1 = wasm_i64x2_shr(a.val, 32);
     v128_t b0 = wasm_i64x2_shr(wasm_i64x2_shl(b.val, 32), 32);
@@ -2465,22 +2374,10 @@ inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
     v128_t c = (v128_t)((__i64x2)a0 * (__i64x2)b0);
     v128_t d = (v128_t)((__i64x2)a1 * (__i64x2)b1);
     return v_int64x2(wasm_i64x2_add(c, d));
-#else
-    fallback::v_int32x4 a_(a);
-    fallback::v_int32x4 b_(b);
-    return fallback::v_dotprod(a_, b_);
-#endif
 }
 inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
 {
-#ifdef __wasm_unimplemented_simd128__
     return v_dotprod(a, b) + c;
-#else
-    fallback::v_int32x4 a_(a);
-    fallback::v_int32x4 b_(b);
-    fallback::v_int64x2 c_(c);
-    return fallback::v_dotprod(a_, b_, c_);
-#endif
 }
 
 // 8 >> 32
@@ -2515,32 +2412,32 @@ inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const
 // 16 >> 64
 inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
 {
-    fallback::v_uint16x8 a_(a);
-    fallback::v_uint16x8 b_(b);
-    return fallback::v_dotprod_expand(a_, b_);
+    v128_t a0 = wasm_u32x4_shr(wasm_i32x4_shl(a.val, 16), 16);
+    v128_t a1 = wasm_u32x4_shr(a.val, 16);
+    v128_t b0 = wasm_u32x4_shr(wasm_i32x4_shl(b.val, 16), 16);
+    v128_t b1 = wasm_u32x4_shr(b.val, 16);
+    return v_uint64x2((
+        v_dotprod(v_int32x4(a0), v_int32x4(b0)) +
+        v_dotprod(v_int32x4(a1), v_int32x4(b1))).val
+    );
 }
 inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
-{
-    fallback::v_uint16x8 a_(a);
-    fallback::v_uint16x8 b_(b);
-    fallback::v_uint64x2 c_(c);
-    return fallback::v_dotprod_expand(a_, b_, c_);
-}
+{ return v_dotprod_expand(a, b) + c; }
 
 inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
 {
-    fallback::v_int16x8 a_(a);
-    fallback::v_int16x8 b_(b);
-    return fallback::v_dotprod_expand(a_, b_);
+    v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16);
+    v128_t a1 = wasm_i32x4_shr(a.val, 16);
+    v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16);
+    v128_t b1 = wasm_i32x4_shr(b.val, 16);
+    return v_int64x2((
+        v_dotprod(v_int32x4(a0), v_int32x4(b0)) +
+        v_dotprod(v_int32x4(a1), v_int32x4(b1)))
+    );
 }
 
 inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
-{
-    fallback::v_int16x8 a_(a);
-    fallback::v_int16x8 b_(b);
-    fallback::v_int64x2 c_(c);
-    return fallback::v_dotprod_expand(a_, b_, c_);
-}
+{ return v_dotprod_expand(a, b) + c; }
 
 // 32 >> 64f
 inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
@@ -2610,44 +2507,24 @@ OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float64x2)
 
 inline v_float32x4 v_sqrt(const v_float32x4& x)
 {
-#ifdef __wasm_unimplemented_simd128__
     return v_float32x4(wasm_f32x4_sqrt(x.val));
-#else
-    fallback::v_float32x4 x_(x);
-    return fallback::v_sqrt(x_);
-#endif
 }
 
 inline v_float32x4 v_invsqrt(const v_float32x4& x)
 {
-#ifdef __wasm_unimplemented_simd128__
     const v128_t _1_0 = wasm_f32x4_splat(1.0);
     return v_float32x4(wasm_f32x4_div(_1_0, wasm_f32x4_sqrt(x.val)));
-#else
-    fallback::v_float32x4 x_(x);
-    return fallback::v_invsqrt(x_);
-#endif
 }
 
 inline v_float64x2 v_sqrt(const v_float64x2& x)
 {
-#ifdef __wasm_unimplemented_simd128__
     return v_float64x2(wasm_f64x2_sqrt(x.val));
-#else
-    fallback::v_float64x2 x_(x);
-    return fallback::v_sqrt(x_);
-#endif
 }
 
 inline v_float64x2 v_invsqrt(const v_float64x2& x)
 {
-#ifdef __wasm_unimplemented_simd128__
     const v128_t _1_0 = wasm_f64x2_splat(1.0);
     return v_float64x2(wasm_f64x2_div(_1_0, wasm_f64x2_sqrt(x.val)));
-#else
-    fallback::v_float64x2 x_(x);
-    return fallback::v_invsqrt(x_);
-#endif
 }
 
 #define OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(_Tpuvec, _Tpsvec, suffix, zsuffix, shiftWidth) \
@@ -2666,12 +2543,7 @@ inline v_float32x4 v_abs(const v_float32x4& x)
 { return v_float32x4(wasm_f32x4_abs(x.val)); }
 inline v_float64x2 v_abs(const v_float64x2& x)
 {
-#ifdef __wasm_unimplemented_simd128__
     return v_float64x2(wasm_f64x2_abs(x.val));
-#else
-    fallback::v_float64x2 x_(x);
-    return fallback::v_abs(x_);
-#endif
 }
 
 // TODO: exp, log, sin, cos
@@ -2684,21 +2556,8 @@ inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
 
 OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_min, wasm_f32x4_min)
 OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_max, wasm_f32x4_max)
-
-#ifdef __wasm_unimplemented_simd128__
 OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_min, wasm_f64x2_min)
 OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_max, wasm_f64x2_max)
-#else
-#define OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(func) \
-inline v_float64x2 func(const v_float64x2& a, const v_float64x2& b) \
-{ \
-    fallback::v_float64x2 a_(a), b_(b); \
-    return fallback::func(a_, b_); \
-}
-
-OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(v_min)
-OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(v_max)
-#endif
 
 #define OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(_Tpvec, suffix) \
 inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \
@@ -2753,24 +2612,7 @@ OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int16x8, i16x8, i16x8)
 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint32x4, u32x4, i32x4)
 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int32x4, i32x4, i32x4)
 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float32x4, f32x4, f32x4)
-
-#ifdef __wasm_unimplemented_simd128__
 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float64x2, f64x2, f64x2)
-#else
-#define OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(_Tpvec, bin_op) \
-inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
-{ \
-    fallback::_Tpvec a_(a), b_(b); \
-    return _Tpvec((a_) bin_op (b_));\
-} \
-
-OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, ==)
-OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, !=)
-OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, <)
-OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, >)
-OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, <=)
-OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, >=)
-#endif
 
 #define OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(_Tpvec, cast) \
 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
@@ -2789,14 +2631,9 @@ inline v_float32x4 v_not_nan(const v_float32x4& a)
 }
 inline v_float64x2 v_not_nan(const v_float64x2& a)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t z = wasm_i64x2_splat(0x7fffffffffffffff);
     v128_t t = wasm_i64x2_splat(0x7ff0000000000000);
     return v_float64x2((__u64x2)(wasm_v128_and(a.val, z)) < (__u64x2)t);
-#else
-    fallback::v_float64x2 a_(a);
-    return fallback::v_not_nan(a_);
-#endif
 }
 
 OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_add_wrap, wasm_i8x16_add)
@@ -2877,32 +2714,30 @@ inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
 }
 inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
 {
-#ifdef __wasm_unimplemented_simd128__
     v128_t absmask_vec = wasm_u64x2_shr(wasm_i32x4_splat(-1), 1);
     return v_float64x2(wasm_v128_and(wasm_f64x2_sub(a.val, b.val), absmask_vec));
-#else
-    fallback::v_float64x2 a_(a), b_(b);
-    return fallback::v_absdiff(a_, b_);
-#endif
 }
 
-#define OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(_Tpvec) \
+#define OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(_Tpvec, suffix) \
 inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
 { \
-    fallback::_Tpvec a_(a), b_(b); \
-    return fallback::v_magnitude(a_, b_); \
+    v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \
+    v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \
+    return _Tpvec(wasm_##suffix##_sqrt(wasm_##suffix##_add(a_Square, b_Square))); \
 } \
 inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
 { \
-    return v_fma(a, a, b*b); \
+    v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \
+    v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \
+    return _Tpvec(wasm_##suffix##_add(a_Square, b_Square)); \
 } \
 inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
 { \
-    return v_fma(a, b, c); \
+    return _Tpvec(wasm_##suffix##_add(wasm_##suffix##_mul(a.val, b.val), c.val)); \
 }
 
-OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float32x4)
-OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float64x2)
+OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float32x4, f32x4)
+OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float64x2, f64x2)
 
 #define OPENCV_HAL_IMPL_WASM_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, ssuffix) \
 inline _Tpuvec operator << (const _Tpuvec& a, int imm) \
@@ -2945,37 +2780,7 @@ inline _Tpsvec v_shr(const _Tpsvec& a) \
 OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint8x16, v_int8x16, i8x16, u8x16)
 OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint16x8, v_int16x8, i16x8, u16x8)
 OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint32x4, v_int32x4, i32x4, u32x4)
-
-#ifdef __wasm_unimplemented_simd128__
 OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint64x2, v_int64x2, i64x2, u64x2)
-#else
-#define OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(_Tpvec) \
-inline _Tpvec operator << (const _Tpvec& a, int imm) \
-{ \
-    fallback::_Tpvec a_(a); \
-    return a_ << imm; \
-} \
-inline _Tpvec operator >> (const _Tpvec& a, int imm) \
-{ \
-    fallback::_Tpvec a_(a); \
-    return a_ >> imm; \
-} \
-template<int imm> \
-inline _Tpvec v_shl(const _Tpvec& a) \
-{ \
-    fallback::_Tpvec a_(a); \
-    return fallback::v_shl<imm>(a_); \
-} \
-template<int imm> \
-inline _Tpvec v_shr(const _Tpvec& a) \
-{ \
-    fallback::_Tpvec a_(a); \
-    return fallback::v_shr<imm>(a_); \
-} \
-
-OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(v_uint64x2)
-OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(v_int64x2)
-#endif
 
 namespace hal_wasm_internal
 {
@@ -3180,9 +2985,18 @@ OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint8x16, unsigned)
 OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int8x16, int)
 OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint16x8, unsigned)
 OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int)
-OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint64x2, uint64)
-OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int64x2, int64)
-OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_float64x2, double)
+
+
+#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \
+inline scalartype v_reduce_sum(const _Tpvec& a) \
+{ \
+    regtype val = a.val; \
+    val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
+    return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
+}
+OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_uint64x2, uint64, v128_t, i64x2, i64x2)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_int64x2, int64,  v128_t, i64x2, i64x2)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_float64x2, double,  v128_t, f64x2,f64x2)
 
 inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
                                  const v_float32x4& c, const v_float32x4& d)
@@ -3318,30 +3132,27 @@ OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int16x8, i16x8, short)
 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint32x4, i32x4, int)
 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int32x4, i32x4, int)
 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float32x4, i32x4, float)
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float64x2, f64x2, double)
+
+#define OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(_Tpvec, suffix, esuffix) \
+inline bool v_check_all(const _Tpvec& a) \
+{ \
+    v128_t masked = v_reinterpret_as_##esuffix(a).val; \
+    masked = wasm_i32x4_replace_lane(masked, 0, 0xffffffff); \
+    masked = wasm_i32x4_replace_lane(masked, 2, 0xffffffff); \
+    return wasm_i8x16_all_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \
+} \
+inline bool v_check_any(const _Tpvec& a) \
+{ \
+    v128_t masked = v_reinterpret_as_##esuffix(a).val; \
+    masked = wasm_i32x4_replace_lane(masked, 0, 0x0); \
+    masked = wasm_i32x4_replace_lane(masked, 2, 0x0); \
+    return wasm_i8x16_any_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \
+} \
+
+OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(v_int64x2, i32x4, s32)
+OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(v_uint64x2, i32x4, u32)
 
-inline int v_signmask(const v_float64x2& a)
-{
-    fallback::v_float64x2 a_(a);
-    return fallback::v_signmask(a_);
-}
-inline bool v_check_all(const v_float64x2& a)
-{
-#ifdef __wasm_unimplemented_simd128__
-    return wasm_i8x16_all_true((__i64x2)(a.val) < (__i64x2)(wasm_i64x2_splat(0)));
-#else
-    fallback::v_float64x2 a_(a);
-    return fallback::v_check_all(a_);
-#endif
-}
-inline bool v_check_any(const v_float64x2& a)
-{
-#ifdef __wasm_unimplemented_simd128__
-    return wasm_i8x16_any_true((__i64x2)(a.val) < (__i64x2)(wasm_i64x2_splat(0)));;
-#else
-    fallback::v_float64x2 a_(a);
-    return fallback::v_check_any(a_);
-#endif
-}
 
 inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
 inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
@@ -3366,8 +3177,8 @@ OPENCV_HAL_IMPL_WASM_SELECT(v_uint16x8)
 OPENCV_HAL_IMPL_WASM_SELECT(v_int16x8)
 OPENCV_HAL_IMPL_WASM_SELECT(v_uint32x4)
 OPENCV_HAL_IMPL_WASM_SELECT(v_int32x4)
-// OPENCV_HAL_IMPL_WASM_SELECT(v_uint64x2)
-// OPENCV_HAL_IMPL_WASM_SELECT(v_int64x2)
+OPENCV_HAL_IMPL_WASM_SELECT(v_uint64x2)
+OPENCV_HAL_IMPL_WASM_SELECT(v_int64x2)
 OPENCV_HAL_IMPL_WASM_SELECT(v_float32x4)
 OPENCV_HAL_IMPL_WASM_SELECT(v_float64x2)
 
diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp
index 8a7eebbe22..d6296f8e2e 100644
--- a/modules/core/include/opencv2/core/mat.inl.hpp
+++ b/modules/core/include/opencv2/core/mat.inl.hpp
@@ -458,158 +458,6 @@ CV__DEBUG_NS_END
 
 //////////////////////////////////////////// Mat //////////////////////////////////////////
 
-inline
-Mat::Mat()
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{}
-
-inline
-Mat::Mat(int _rows, int _cols, int _type)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create(_rows, _cols, _type);
-}
-
-inline
-Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create(_rows, _cols, _type);
-    *this = _s;
-}
-
-inline
-Mat::Mat(Size _sz, int _type)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create( _sz.height, _sz.width, _type );
-}
-
-inline
-Mat::Mat(Size _sz, int _type, const Scalar& _s)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create(_sz.height, _sz.width, _type);
-    *this = _s;
-}
-
-inline
-Mat::Mat(int _dims, const int* _sz, int _type)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create(_dims, _sz, _type);
-}
-
-inline
-Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create(_dims, _sz, _type);
-    *this = _s;
-}
-
-inline
-Mat::Mat(const std::vector<int>& _sz, int _type)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create(_sz, _type);
-}
-
-inline
-Mat::Mat(const std::vector<int>& _sz, int _type, const Scalar& _s)
-    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows), step(0)
-{
-    create(_sz, _type);
-    *this = _s;
-}
-
-inline
-Mat::Mat(const Mat& m)
-    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
-      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
-      u(m.u), size(&rows), step(0)
-{
-    if( u )
-        CV_XADD(&u->refcount, 1);
-    if( m.dims <= 2 )
-    {
-        step[0] = m.step[0]; step[1] = m.step[1];
-    }
-    else
-    {
-        dims = 0;
-        copySize(m);
-    }
-}
-
-inline
-Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
-    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols),
-      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
-      allocator(0), u(0), size(&rows)
-{
-    CV_Assert(total() == 0 || data != NULL);
-
-    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
-    size_t minstep = cols * esz;
-    if( _step == AUTO_STEP )
-    {
-        _step = minstep;
-    }
-    else
-    {
-        CV_DbgAssert( _step >= minstep );
-        if (_step % esz1 != 0)
-        {
-            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
-        }
-    }
-    step[0] = _step;
-    step[1] = esz;
-    datalimit = datastart + _step * rows;
-    dataend = datalimit - _step + minstep;
-    updateContinuityFlag();
-}
-
-inline
-Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
-    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width),
-      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
-      allocator(0), u(0), size(&rows)
-{
-    CV_Assert(total() == 0 || data != NULL);
-
-    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
-    size_t minstep = cols*esz;
-    if( _step == AUTO_STEP )
-    {
-        _step = minstep;
-    }
-    else
-    {
-        CV_DbgAssert( _step >= minstep );
-
-        if (_step % esz1 != 0)
-        {
-            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
-        }
-    }
-    step[0] = _step;
-    step[1] = esz;
-    datalimit = datastart + _step*rows;
-    dataend = datalimit - _step + minstep;
-    updateContinuityFlag();
-}
-
 template<typename _Tp> inline
 Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
     : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
@@ -743,43 +591,6 @@ Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
     *this = commaInitializer.operator Mat_<_Tp>();
 }
 
-inline
-Mat::~Mat()
-{
-    release();
-    if( step.p != step.buf )
-        fastFree(step.p);
-}
-
-inline
-Mat& Mat::operator = (const Mat& m)
-{
-    if( this != &m )
-    {
-        if( m.u )
-            CV_XADD(&m.u->refcount, 1);
-        release();
-        flags = m.flags;
-        if( dims <= 2 && m.dims <= 2 )
-        {
-            dims = m.dims;
-            rows = m.rows;
-            cols = m.cols;
-            step[0] = m.step[0];
-            step[1] = m.step[1];
-        }
-        else
-            copySize(m);
-        data = m.data;
-        datastart = m.datastart;
-        dataend = m.dataend;
-        datalimit = m.datalimit;
-        allocator = m.allocator;
-        u = m.u;
-    }
-    return *this;
-}
-
 inline
 Mat Mat::row(int y) const
 {
@@ -816,67 +627,6 @@ Mat Mat::colRange(const Range& r) const
     return Mat(*this, Range::all(), r);
 }
 
-inline
-Mat Mat::clone() const
-{
-    Mat m;
-    copyTo(m);
-    return m;
-}
-
-inline
-void Mat::assignTo( Mat& m, int _type ) const
-{
-    if( _type < 0 )
-        m = *this;
-    else
-        convertTo(m, _type);
-}
-
-inline
-void Mat::create(int _rows, int _cols, int _type)
-{
-    _type &= TYPE_MASK;
-    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && data )
-        return;
-    int sz[] = {_rows, _cols};
-    create(2, sz, _type);
-}
-
-inline
-void Mat::create(Size _sz, int _type)
-{
-    create(_sz.height, _sz.width, _type);
-}
-
-inline
-void Mat::addref()
-{
-    if( u )
-        CV_XADD(&u->refcount, 1);
-}
-
-inline
-void Mat::release()
-{
-    if( u && CV_XADD(&u->refcount, -1) == 1 )
-        deallocate();
-    u = NULL;
-    datastart = dataend = datalimit = data = 0;
-    for(int i = 0; i < dims; i++)
-        size.p[i] = 0;
-#ifdef _DEBUG
-    flags = MAGIC_VAL;
-    dims = rows = cols = 0;
-    if(step.p != step.buf)
-    {
-        fastFree(step.p);
-        step.p = step.buf;
-        size.p = &rows;
-    }
-#endif
-}
-
 inline
 Mat Mat::operator()( Range _rowRange, Range _colRange ) const
 {
@@ -945,40 +695,6 @@ int Mat::channels() const
     return CV_MAT_CN(flags);
 }
 
-inline
-size_t Mat::step1(int i) const
-{
-    return step.p[i] / elemSize1();
-}
-
-inline
-bool Mat::empty() const
-{
-    return data == 0 || total() == 0 || dims == 0;
-}
-
-inline
-size_t Mat::total() const
-{
-    if( dims <= 2 )
-        return (size_t)rows * cols;
-    size_t p = 1;
-    for( int i = 0; i < dims; i++ )
-        p *= size[i];
-    return p;
-}
-
-inline
-size_t Mat::total(int startDim, int endDim) const
-{
-    CV_Assert( 0 <= startDim && startDim <= endDim);
-    size_t p = 1;
-    int endDim_ = endDim <= dims ? endDim : dims;
-    for( int i = startDim; i < endDim_; i++ )
-        p *= size[i];
-    return p;
-}
-
 inline
 uchar* Mat::ptr(int y)
 {
@@ -1396,67 +1112,6 @@ void Mat::push_back(const std::vector<_Tp>& v)
     push_back(Mat(v));
 }
 
-inline
-Mat::Mat(Mat&& m)
-    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
-      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
-      u(m.u), size(&rows)
-{
-    if (m.dims <= 2)  // move new step/size info
-    {
-        step[0] = m.step[0];
-        step[1] = m.step[1];
-    }
-    else
-    {
-        CV_DbgAssert(m.step.p != m.step.buf);
-        step.p = m.step.p;
-        size.p = m.size.p;
-        m.step.p = m.step.buf;
-        m.size.p = &m.rows;
-    }
-    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
-    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
-    m.allocator = NULL;
-    m.u = NULL;
-}
-
-inline
-Mat& Mat::operator = (Mat&& m)
-{
-    if (this == &m)
-      return *this;
-
-    release();
-    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols; data = m.data;
-    datastart = m.datastart; dataend = m.dataend; datalimit = m.datalimit; allocator = m.allocator;
-    u = m.u;
-    if (step.p != step.buf) // release self step/size
-    {
-        fastFree(step.p);
-        step.p = step.buf;
-        size.p = &rows;
-    }
-    if (m.dims <= 2) // move new step/size info
-    {
-        step[0] = m.step[0];
-        step[1] = m.step[1];
-    }
-    else
-    {
-        CV_DbgAssert(m.step.p != m.step.buf);
-        step.p = m.step.p;
-        size.p = m.size.p;
-        m.step.p = m.step.buf;
-        m.size.p = &m.rows;
-    }
-    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
-    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
-    m.allocator = NULL;
-    m.u = NULL;
-    return *this;
-}
-
 
 ///////////////////////////// MatSize ////////////////////////////
 
@@ -1503,22 +1158,6 @@ MatSize::operator const int*() const
     return p;
 }
 
-inline
-bool MatSize::operator == (const MatSize& sz) const
-{
-    int d = dims();
-    int dsz = sz.dims();
-    if( d != dsz )
-        return false;
-    if( d == 2 )
-        return p[0] == sz.p[0] && p[1] == sz.p[1];
-
-    for( int i = 0; i < d; i++ )
-        if( p[i] != sz.p[i] )
-            return false;
-    return true;
-}
-
 inline
 bool MatSize::operator != (const MatSize& sz) const
 {
@@ -1775,9 +1414,7 @@ template<typename _Tp> inline
 void Mat_<_Tp>::release()
 {
     Mat::release();
-#ifdef _DEBUG
     flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
-#endif
 }
 
 template<typename _Tp> inline
@@ -2132,51 +1769,6 @@ Mat_<_Tp>::Mat_(MatExpr&& e)
 
 ///////////////////////////// SparseMat /////////////////////////////
 
-inline
-SparseMat::SparseMat()
-    : flags(MAGIC_VAL), hdr(0)
-{}
-
-inline
-SparseMat::SparseMat(int _dims, const int* _sizes, int _type)
-    : flags(MAGIC_VAL), hdr(0)
-{
-    create(_dims, _sizes, _type);
-}
-
-inline
-SparseMat::SparseMat(const SparseMat& m)
-    : flags(m.flags), hdr(m.hdr)
-{
-    addref();
-}
-
-inline
-SparseMat::~SparseMat()
-{
-    release();
-}
-
-inline
-SparseMat& SparseMat::operator = (const SparseMat& m)
-{
-    if( this != &m )
-    {
-        if( m.hdr )
-            CV_XADD(&m.hdr->refcount, 1);
-        release();
-        flags = m.flags;
-        hdr = m.hdr;
-    }
-    return *this;
-}
-
-inline
-SparseMat& SparseMat::operator = (const Mat& m)
-{
-    return (*this = SparseMat(m));
-}
-
 inline
 SparseMat SparseMat::clone() const
 {
@@ -2185,30 +1777,6 @@ SparseMat SparseMat::clone() const
     return temp;
 }
 
-inline
-void SparseMat::assignTo( SparseMat& m, int _type ) const
-{
-    if( _type < 0 )
-        m = *this;
-    else
-        convertTo(m, _type);
-}
-
-inline
-void SparseMat::addref()
-{
-    if( hdr )
-        CV_XADD(&hdr->refcount, 1);
-}
-
-inline
-void SparseMat::release()
-{
-    if( hdr && CV_XADD(&hdr->refcount, -1) == 1 )
-        delete hdr;
-    hdr = 0;
-}
-
 inline
 size_t SparseMat::elemSize() const
 {
@@ -2268,36 +1836,6 @@ size_t SparseMat::nzcount() const
     return hdr ? hdr->nodeCount : 0;
 }
 
-inline
-size_t SparseMat::hash(int i0) const
-{
-    return (size_t)i0;
-}
-
-inline
-size_t SparseMat::hash(int i0, int i1) const
-{
-    return (size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1;
-}
-
-inline
-size_t SparseMat::hash(int i0, int i1, int i2) const
-{
-    return ((size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1) * HASH_SCALE + (unsigned)i2;
-}
-
-inline
-size_t SparseMat::hash(const int* idx) const
-{
-    size_t h = (unsigned)idx[0];
-    if( !hdr )
-        return 0;
-    int d = hdr->dims;
-    for(int i = 1; i < d; i++ )
-        h = h * HASH_SCALE + (unsigned)idx[i];
-    return h;
-}
-
 template<typename _Tp> inline
 _Tp& SparseMat::ref(int i0, size_t* hashval)
 {
@@ -3617,74 +3155,6 @@ const Mat_<_Tp>& operator /= (const Mat_<_Tp>& a, const MatExpr& b)
 
 //////////////////////////////// UMat ////////////////////////////////
 
-inline
-UMat::UMat(UMatUsageFlags _usageFlags)
-: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
-{}
-
-inline
-UMat::UMat(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
-: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
-{
-    create(_rows, _cols, _type);
-}
-
-inline
-UMat::UMat(int _rows, int _cols, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
-: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
-{
-    create(_rows, _cols, _type);
-    *this = _s;
-}
-
-inline
-UMat::UMat(Size _sz, int _type, UMatUsageFlags _usageFlags)
-: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
-{
-    create( _sz.height, _sz.width, _type );
-}
-
-inline
-UMat::UMat(Size _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
-: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
-{
-    create(_sz.height, _sz.width, _type);
-    *this = _s;
-}
-
-inline
-UMat::UMat(int _dims, const int* _sz, int _type, UMatUsageFlags _usageFlags)
-: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
-{
-    create(_dims, _sz, _type);
-}
-
-inline
-UMat::UMat(int _dims, const int* _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
-: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
-{
-    create(_dims, _sz, _type);
-    *this = _s;
-}
-
-inline
-UMat::UMat(const UMat& m)
-: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
-  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
-{
-    addref();
-    if( m.dims <= 2 )
-    {
-        step[0] = m.step[0]; step[1] = m.step[1];
-    }
-    else
-    {
-        dims = 0;
-        copySize(m);
-    }
-}
-
-
 template<typename _Tp> inline
 UMat::UMat(const std::vector<_Tp>& vec, bool copyData)
 : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
@@ -3701,33 +3171,6 @@ cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
         Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this);
 }
 
-inline
-UMat& UMat::operator = (const UMat& m)
-{
-    if( this != &m )
-    {
-        const_cast<UMat&>(m).addref();
-        release();
-        flags = m.flags;
-        if( dims <= 2 && m.dims <= 2 )
-        {
-            dims = m.dims;
-            rows = m.rows;
-            cols = m.cols;
-            step[0] = m.step[0];
-            step[1] = m.step[1];
-        }
-        else
-            copySize(m);
-        allocator = m.allocator;
-        if (usageFlags == USAGE_DEFAULT)
-            usageFlags = m.usageFlags;
-        u = m.u;
-        offset = m.offset;
-    }
-    return *this;
-}
-
 inline
 UMat UMat::row(int y) const
 {
@@ -3764,55 +3207,6 @@ UMat UMat::colRange(const Range& r) const
     return UMat(*this, Range::all(), r);
 }
 
-inline
-UMat UMat::clone() const
-{
-    UMat m;
-    copyTo(m);
-    return m;
-}
-
-inline
-void UMat::assignTo( UMat& m, int _type ) const
-{
-    if( _type < 0 )
-        m = *this;
-    else
-        convertTo(m, _type);
-}
-
-inline
-void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
-{
-    _type &= TYPE_MASK;
-    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u )
-        return;
-    int sz[] = {_rows, _cols};
-    create(2, sz, _type, _usageFlags);
-}
-
-inline
-void UMat::create(Size _sz, int _type, UMatUsageFlags _usageFlags)
-{
-    create(_sz.height, _sz.width, _type, _usageFlags);
-}
-
-inline
-void UMat::addref()
-{
-    if( u )
-        CV_XADD(&(u->urefcount), 1);
-}
-
-inline void UMat::release()
-{
-    if( u && CV_XADD(&(u->urefcount), -1) == 1 )
-        deallocate();
-    for(int i = 0; i < dims; i++)
-        size.p[i] = 0;
-    u = 0;
-}
-
 inline
 UMat UMat::operator()( Range _rowRange, Range _colRange ) const
 {
@@ -3887,83 +3281,6 @@ size_t UMat::step1(int i) const
     return step.p[i] / elemSize1();
 }
 
-inline
-bool UMat::empty() const
-{
-    return u == 0 || total() == 0 || dims == 0;
-}
-
-inline
-size_t UMat::total() const
-{
-    if( dims <= 2 )
-        return (size_t)rows * cols;
-    size_t p = 1;
-    for( int i = 0; i < dims; i++ )
-        p *= size[i];
-    return p;
-}
-
-inline
-UMat::UMat(UMat&& m)
-: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
-  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
-{
-    if (m.dims <= 2)  // move new step/size info
-    {
-        step[0] = m.step[0];
-        step[1] = m.step[1];
-    }
-    else
-    {
-        CV_DbgAssert(m.step.p != m.step.buf);
-        step.p = m.step.p;
-        size.p = m.size.p;
-        m.step.p = m.step.buf;
-        m.size.p = &m.rows;
-    }
-    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
-    m.allocator = NULL;
-    m.u = NULL;
-    m.offset = 0;
-}
-
-inline
-UMat& UMat::operator = (UMat&& m)
-{
-    if (this == &m)
-      return *this;
-    release();
-    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols;
-    allocator = m.allocator; usageFlags = m.usageFlags;
-    u = m.u;
-    offset = m.offset;
-    if (step.p != step.buf) // release self step/size
-    {
-        fastFree(step.p);
-        step.p = step.buf;
-        size.p = &rows;
-    }
-    if (m.dims <= 2) // move new step/size info
-    {
-        step[0] = m.step[0];
-        step[1] = m.step[1];
-    }
-    else
-    {
-        CV_DbgAssert(m.step.p != m.step.buf);
-        step.p = m.step.p;
-        size.p = m.size.p;
-        m.step.p = m.step.buf;
-        m.size.p = &m.rows;
-    }
-    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
-    m.allocator = NULL;
-    m.u = NULL;
-    m.offset = 0;
-    return *this;
-}
-
 
 inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; }
 inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; }
diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp
index 9e3f8f6914..276f640323 100644
--- a/modules/core/include/opencv2/core/persistence.hpp
+++ b/modules/core/include/opencv2/core/persistence.hpp
@@ -403,8 +403,8 @@ public:
 
     /**
      * @brief Simplified writing API to use with bindings.
-     * @param name Name of the written object
-     * @param val Value of the written object
+     * @param name Name of the written object. When writing to sequences (a.k.a. "arrays"), pass an empty string.
+     * @param val Value of the written object.
      */
     CV_WRAP void write(const String& name, int val);
     /// @overload
@@ -437,9 +437,10 @@ public:
     CV_WRAP void writeComment(const String& comment, bool append = false);
 
     /** @brief Starts to write a nested structure (sequence or a mapping).
-    @param name name of the structure (if it's a member of parent mapping, otherwise it should be empty
+    @param name name of the structure. When writing to sequences (a.k.a. "arrays"), pass an empty string.
     @param flags type of the structure (FileNode::MAP or FileNode::SEQ (both with optional FileNode::FLOW)).
-    @param typeName usually an empty string
+    @param typeName optional name of the type you store. The effect of setting this depends on the storage format.
+    I.e. if the format has a specification for storing type information, this parameter is used.
     */
     CV_WRAP void startWriteStruct(const String& name, int flags, const String& typeName=String());
 
diff --git a/modules/core/include/opencv2/core/quaternion.hpp b/modules/core/include/opencv2/core/quaternion.hpp
new file mode 100644
index 0000000000..c72ee8c37f
--- /dev/null
+++ b/modules/core/include/opencv2/core/quaternion.hpp
@@ -0,0 +1,1194 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Liangqian Kong <chargerKong@126.com>
+//         Longbu Wang <riskiest@gmail.com>
+#ifndef OPENCV_CORE_QUATERNION_HPP
+#define OPENCV_CORE_QUATERNION_HPP
+
+#include <opencv2/core.hpp>
+#include <iostream>
+namespace cv
+{
+//! @addtogroup core
+//! @{
+
+//! Unit quaternion flag
+enum QuatAssumeType
+{
+    /**
+     * This flag is specified by default.
+     * If this flag is specified, the input quaternions are assumed to be not unit quaternions.
+     * It can guarantee the correctness of the calculations,
+     * although the calculation speed will be slower than the flag QUAT_ASSUME_UNIT.
+     */
+    QUAT_ASSUME_NOT_UNIT,
+    /**
+     * If this flag is specified, the input quaternions are assumed to be unit quaternions which
+     * will save some computations. However, if this flag is specified without unit quaternion,
+     * the program correctness of the result will not be guaranteed.
+     */
+    QUAT_ASSUME_UNIT
+};
+
+template <typename _Tp> class Quat;
+template <typename _Tp> std::ostream& operator<<(std::ostream&, const Quat<_Tp>&);
+
+/**
+ * Quaternion is a number system that extends the complex numbers. It can be expressed as a
+ * rotation in three-dimensional space.
+ * A quaternion is generally represented in the form:
+ *      \f[q = w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\f]
+ *      \f[q = [w, x, y, z]\f]
+ *      \f[q = [w, \boldsymbol{v}] \f]
+ *      \f[q = ||q||[\cos\psi, u_x\sin\psi,u_y\sin\psi,  u_z\sin\psi].\f]
+ *      \f[q = ||q||[\cos\psi, \boldsymbol{u}\sin\psi]\f]
+ * where \f$\psi = \frac{\theta}{2}\f$, \f$\theta\f$ represents rotation angle,
+ * \f$\boldsymbol{u} = [u_x, u_y, u_z]\f$ represents normalized rotation axis,
+ * and \f$||q||\f$ represents the norm of \f$q\f$.
+ *
+ * A unit quaternion is usually represents rotation, which has the form:
+ *      \f[q = [\cos\psi, u_x\sin\psi,u_y\sin\psi,  u_z\sin\psi].\f]
+ *
+ * To create a quaternion representing the rotation around the axis \f$\boldsymbol{u}\f$
+ * with angle \f$\theta\f$, you can use
+ * ```
+ * using namespace cv;
+ * double angle = CV_PI;
+ * Vec3d axis = {0, 0, 1};
+ * Quatd q = Quatd::createFromAngleAxis(angle, axis);
+ * ```
+ *
+ * You can simply use four same type number to create a quaternion
+ * ```
+ * Quatd q(1, 2, 3, 4);
+ * ```
+ * Or use a Vec4d or Vec4f vector.
+ * ```
+ * Vec4d vec{1, 2, 3, 4};
+ * Quatd q(vec);
+ * ```
+ *
+ * ```
+ * Vec4f vec{1, 2, 3, 4};
+ * Quatf q(vec);
+ * ```
+ *
+ * If you already have a 3x3 rotation matrix R, then you can use
+ * ```
+ * Quatd q = Quatd::createFromRotMat(R);
+ * ```
+ *
+ * If you already have a rotation vector rvec which has the form of `angle * axis`, then you can use
+ * ```
+ * Quatd q = Quatd::createFromRvec(rvec);
+ * ```
+ *
+ * To extract the rotation matrix from quaternion, see toRotMat3x3()
+ *
+ * To extract the Vec4d or Vec4f, see toVec()
+ *
+ * To extract the rotation vector, see toRotVec()
+ *
+ * If there are two quaternions \f$q_0, q_1\f$ are needed to interpolate, you can use nlerp(), slerp() or spline()
+ * ```
+ * Quatd::nlerp(q0, q1, t)
+ *
+ * Quatd::slerp(q0, q1, t)
+ *
+ * Quatd::spline(q0, q0, q1, q1, t)
+ * ```
+ * spline can smoothly connect rotations of  multiple quaternions
+ *
+ * Three ways to get an element in Quaternion
+ * ```
+ * Quatf q(1,2,3,4);
+ * std::cout << q.w << std::endl; // w=1, x=2, y=3, z=4
+ * std::cout << q[0] << std::endl; // q[0]=1, q[1]=2, q[2]=3, q[3]=4
+ * std::cout << q.at(0) << std::endl;
+ * ```
+ */
+template <typename _Tp>
+class Quat
+{
+    static_assert(std::is_floating_point<_Tp>::value, "Quaternion only make sense with type of float or double");
+    using value_type = _Tp;
+
+public:
+    static constexpr _Tp CV_QUAT_EPS = (_Tp)1.e-6;
+
+    Quat();
+
+    /**
+     * @brief From Vec4d or Vec4f.
+     */
+    explicit Quat(const Vec<_Tp, 4> &coeff);
+
+    /**
+     * @brief from four numbers.
+     */
+    Quat(_Tp w, _Tp x, _Tp y, _Tp z);
+
+    /**
+     * @brief from an angle, axis. Axis will be normalized in this function. And
+     * it generates
+     * \f[q = [\cos\psi, u_x\sin\psi,u_y\sin\psi,  u_z\sin\psi].\f]
+     * where \f$\psi = \frac{\theta}{2}\f$, \f$\theta\f$ is the rotation angle.
+     */
+    static Quat<_Tp> createFromAngleAxis(const _Tp angle, const Vec<_Tp, 3> &axis);
+
+    /**
+     * @brief from a 3x3 rotation matrix.
+     */
+    static Quat<_Tp> createFromRotMat(InputArray R);
+
+    /**
+     * @brief from a rotation vector
+     * \f$r\f$ has the form \f$\theta \cdot \boldsymbol{u}\f$, where \f$\theta\f$
+     * represents rotation angle and \f$\boldsymbol{u}\f$ represents normalized rotation axis.
+     *
+     * Angle and axis could be easily derived as:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * \psi &= ||r||\\
+     * \boldsymbol{u} &= \frac{r}{\theta}
+     * \end{split}
+     * \end{equation}
+     * \f]
+     * Then a quaternion can be calculated by
+     *  \f[q = [\cos\psi, \boldsymbol{u}\sin\psi]\f]
+     *  where \f$\psi = \theta / 2 \f$
+     */
+    static Quat<_Tp> createFromRvec(InputArray rvec);
+
+    /**
+     * @brief a way to get element.
+     * @param index over a range [0, 3].
+     *
+     * A quaternion q
+     *
+     * q.at(0) is equivalent to q.w,
+     *
+     * q.at(1) is equivalent to q.x,
+     *
+     * q.at(2) is equivalent to q.y,
+     *
+     * q.at(3) is equivalent to q.z.
+     */
+    _Tp at(size_t index) const;
+
+    /**
+     * @brief return the conjugate of this quaternion.
+     * \f[q.conjugate() = (w, -x, -y, -z).\f]
+     */
+    Quat<_Tp> conjugate() const;
+
+    /**
+     *
+     * @brief return the value of exponential value.
+     * \f[\exp(q) = e^w (\cos||\boldsymbol{v}||+ \frac{v}{||\boldsymbol{v}||})\sin||\boldsymbol{v}||\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example:
+     * ```
+     * Quatd q{1,2,3,4};
+     * cout << exp(q) << endl;
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> exp(const Quat<T> &q);
+
+    /**
+     * @brief return the value of exponential value.
+     * \f[\exp(q) = e^w (\cos||\boldsymbol{v}||+ \frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q{1,2,3,4};
+     * cout << q.exp() << endl;
+     * ```
+     */
+    Quat<_Tp> exp() const;
+
+    /**
+     * @brief return the value of logarithm function.
+     * \f[\ln(q) = \ln||q|| + \frac{\boldsymbol{v}}{||\boldsymbol{v}||}\arccos\frac{w}{||q||}.\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, q assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd q1{1,2,3,4};
+     * cout << log(q1) << endl;
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> log(const Quat<T> &q, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief return the value of logarithm function.
+     *  \f[\ln(q) = \ln||q|| + \frac{\boldsymbol{v}}{||\boldsymbol{v}||}\arccos\frac{w}{||q||}\f].
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.log();
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * Quatd q1(1,2,3,4);
+     * q1.normalize().log(assumeUnit);
+     * ```
+     */
+    Quat<_Tp> log(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the value of power function with index \f$x\f$.
+     * \f[q^x = ||q||(cos(x\theta) + \boldsymbol{u}sin(x\theta))).\f]
+     * @param q a quaternion.
+     * @param x index of exponentiation.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * power(q, 2);
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * double angle = CV_PI;
+     * Vec3d axis{0, 0, 1};
+     * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle
+     * power(q1, 2, assumeUnit);//This assumeUnit means q1 is a unit quaternion.
+     * ```
+     */
+    template <typename T, typename _T>
+    friend Quat<T> power(const Quat<T> &q, _T x, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief return the value of power function with index \f$x\f$.
+     * \f[q^x = ||q||(\cos(x\theta) + \boldsymbol{u}\sin(x\theta))).\f]
+     * @param x index of exponentiation.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.power(2);
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * double angle = CV_PI;
+     * Vec3d axis{0, 0, 1};
+     * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle
+     * q1.power(2, assumeUnit); //This assumeUnt means q1 is a unit quaternion
+     * ```
+     */
+    template <typename _T>
+    Quat<_Tp> power(_T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return \f$\sqrt{q}\f$.
+     * @param q a quaternion.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatf q(1,2,3,4);
+     * sqrt(q);
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * q = {1,0,0,0};
+     * sqrt(q, assumeUnit); //This assumeUnit means q is a unit quaternion.
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> sqrt(const Quat<T> &q, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief return \f$\sqrt{q}\f$.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatf q(1,2,3,4);
+     * q.sqrt();
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * q = {1,0,0,0};
+     * q.sqrt(assumeUnit); //This assumeUnit means q is a unit quaternion
+     * ```
+     */
+    Quat<_Tp> sqrt(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the value of power function with quaternion \f$q\f$.
+     * \f[p^q = e^{q\ln(p)}.\f]
+     * @param p base quaternion of power function.
+     * @param q index quaternion of power function.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion \f$p\f$ assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd p(1,2,3,4);
+     * Quatd q(5,6,7,8);
+     * power(p, q);
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * p = p.normalize();
+     * power(p, q, assumeUnit); //This assumeUnit means p is a unit quaternion
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> power(const Quat<T> &p, const Quat<T> &q, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief return the value of power function with quaternion \f$q\f$.
+     * \f[p^q = e^{q\ln(p)}.\f]
+     * @param q index quaternion of power function.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd p(1,2,3,4);
+     * Quatd q(5,6,7,8);
+     * p.power(q);
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * p = p.normalize();
+     * p.power(q, assumeUnit); //This assumeUnit means p is a unit quaternion
+     * ```
+     */
+    Quat<_Tp> power(const Quat<_Tp> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the crossProduct between \f$p = (a, b, c, d) = (a, \boldsymbol{u})\f$ and \f$q = (w, x, y, z) = (w, \boldsymbol{v})\f$.
+     * \f[p \times q = \frac{pq- qp}{2}\f]
+     * \f[p \times q = \boldsymbol{u} \times \boldsymbol{v}\f]
+     * \f[p \times q = (cz-dy)i + (dx-bz)j + (by-xc)k \f]
+     *
+     * For example
+     * ```
+     * Quatd q{1,2,3,4};
+     * Quatd p{5,6,7,8};
+     * crossProduct(p, q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> crossProduct(const Quat<T> &p, const Quat<T> &q);
+
+    /**
+     * @brief return the crossProduct between \f$p = (a, b, c, d) = (a, \boldsymbol{u})\f$ and \f$q = (w, x, y, z) = (w, \boldsymbol{v})\f$.
+     * \f[p \times q = \frac{pq- qp}{2}.\f]
+     * \f[p \times q = \boldsymbol{u} \times \boldsymbol{v}.\f]
+     * \f[p \times q = (cz-dy)i + (dx-bz)j + (by-xc)k. \f]
+     *
+     * For example
+     * ```
+     * Quatd q{1,2,3,4};
+     * Quatd p{5,6,7,8};
+     * p.crossProduct(q)
+     * ```
+     */
+    Quat<_Tp> crossProduct(const Quat<_Tp> &q) const;
+
+    /**
+     * @brief return the norm of quaternion.
+     * \f[||q|| = \sqrt{w^2 + x^2 + y^2 + z^2}.\f]
+     */
+    _Tp norm() const;
+
+    /**
+     * @brief return a normalized \f$p\f$.
+     * \f[p = \frac{q}{||q||}\f]
+     * where \f$p\f$ satisfies \f$(p.x)^2 + (p.y)^2 + (p.z)^2 + (p.w)^2 = 1.\f$
+     */
+    Quat<_Tp> normalize() const;
+
+    /**
+     * @brief return \f$q^{-1}\f$ which is an inverse of \f$q\f$
+     * which satisfies \f$q * q^{-1} = 1\f$.
+     * @param q a quaternion.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * inv(q);
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * q = q.normalize();
+     * inv(q, assumeUnit);//This assumeUnit means p is a unit quaternion
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> inv(const Quat<T> &q, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief return \f$q^{-1}\f$ which is an inverse of \f$q\f$
+     * satisfying \f$q * q^{-1} = 1\f$.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.inv();
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * q = q.normalize();
+     * q.inv(assumeUnit);  //assumeUnit means p is a unit quaternion
+     * ```
+     */
+    Quat<_Tp> inv(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return sinh value of quaternion q, sinh could be calculated as:
+     * \f[\sinh(p) = \sin(w)\cos(||\boldsymbol{v}||) + \cosh(w)\frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * sinh(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> sinh(const Quat<T> &q);
+
+    /**
+     * @brief return sinh value of this quaternion, sinh could be calculated as:
+     * \f$\sinh(p) = \sin(w)\cos(||\boldsymbol{v}||) + \cosh(w)\frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||\f$
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.sinh();
+     * ```
+     */
+    Quat<_Tp> sinh() const;
+
+    /**
+     * @brief return cosh value of quaternion q, cosh could be calculated as:
+     * \f[\cosh(p) = \cosh(w) * \cos(||\boldsymbol{v}||) + \sinh(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sin(||\boldsymbol{v}||)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * cosh(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> cosh(const Quat<T> &q);
+
+    /**
+     * @brief return cosh value of this quaternion, cosh could be calculated as:
+     * \f[\cosh(p) = \cosh(w) * \cos(||\boldsymbol{v}||) + \sinh(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}sin(||\boldsymbol{v}||)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.cosh();
+     * ```
+     */
+    Quat<_Tp> cosh() const;
+
+    /**
+     * @brief return tanh value of quaternion q, tanh could be calculated as:
+     * \f[ \tanh(q) = \frac{\sinh(q)}{\cosh(q)}.\f]
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * tanh(q);
+     * ```
+     * @sa sinh, cosh
+     */
+    template <typename T>
+    friend Quat<T> tanh(const Quat<T> &q);
+
+    /**
+     * @brief return tanh value of this quaternion, tanh could be calculated as:
+     * \f[ \tanh(q) = \frac{\sinh(q)}{\cosh(q)}.\f]
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.tanh();
+     * ```
+     * @sa sinh, cosh
+     */
+    Quat<_Tp> tanh() const;
+
+    /**
+     * @brief return tanh value of quaternion q, sin could be calculated as:
+     * \f[\sin(p) = \sin(w) * \cosh(||\boldsymbol{v}||) + \cos(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * sin(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> sin(const Quat<T> &q);
+
+    /**
+     * @brief return sin value of this quaternion, sin could be calculated as:
+     * \f[\sin(p) = \sin(w) * \cosh(||\boldsymbol{v}||) + \cos(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.sin();
+     * ```
+     */
+    Quat<_Tp> sin() const;
+
+    /**
+     * @brief return sin value of quaternion q, cos could be calculated as:
+     * \f[\cos(p) = \cos(w) * \cosh(||\boldsymbol{v}||) - \sin(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * cos(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> cos(const Quat<T> &q);
+
+    /**
+     * @brief return cos value of this quaternion, cos could be calculated as:
+     * \f[\cos(p) = \cos(w) * \cosh(||\boldsymbol{v}||) - \sin(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.cos();
+     * ```
+     */
+    Quat<_Tp> cos() const;
+
+    /**
+     * @brief return tan value of quaternion q, tan could be calculated as:
+     * \f[\tan(q) = \frac{\sin(q)}{\cos(q)}.\f]
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * tan(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> tan(const Quat<T> &q);
+
+    /**
+     * @brief return tan value of this quaternion, tan could be calculated as:
+     * \f[\tan(q) = \frac{\sin(q)}{\cos(q)}.\f]
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.tan();
+     * ```
+     */
+    Quat<_Tp> tan() const;
+
+    /**
+     * @brief return arcsin value of quaternion q, arcsin could be calculated as:
+     * \f[\arcsin(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arcsinh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * asin(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> asin(const Quat<T> &q);
+
+    /**
+     * @brief return arcsin value of this quaternion, arcsin could be calculated as:
+     * \f[\arcsin(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arcsinh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.asin();
+     * ```
+     */
+    Quat<_Tp> asin() const;
+
+    /**
+     * @brief return arccos value of quaternion q, arccos could be calculated as:
+     * \f[\arccos(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arccosh(q)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * acos(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> acos(const Quat<T> &q);
+
+    /**
+     * @brief return arccos value of this quaternion, arccos could be calculated as:
+     * \f[\arccos(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arccosh(q)\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.acos();
+     * ```
+     */
+    Quat<_Tp> acos() const;
+
+    /**
+     * @brief return arctan value of quaternion q, arctan could be calculated as:
+     * \f[\arctan(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arctanh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * atan(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> atan(const Quat<T> &q);
+
+    /**
+     * @brief return arctan value of this quaternion, arctan could be calculated as:
+     * \f[\arctan(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arctanh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f]
+     * where \f$\boldsymbol{v} = [x, y, z].\f$
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.atan();
+     * ```
+     */
+    Quat<_Tp> atan() const;
+
+    /**
+     * @brief return arcsinh value of quaternion q, arcsinh could be calculated as:
+     * \f[arcsinh(q) = \ln(q + \sqrt{q^2 + 1})\f].
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * asinh(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> asinh(const Quat<T> &q);
+
+    /**
+     * @brief return arcsinh value of this quaternion, arcsinh could be calculated as:
+     * \f[arcsinh(q) = \ln(q + \sqrt{q^2 + 1})\f].
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.asinh();
+     * ```
+     */
+    Quat<_Tp> asinh() const;
+
+    /**
+     * @brief return arccosh value of quaternion q, arccosh could be calculated as:
+     * \f[arccosh(q) = \ln(q + \sqrt{q^2 - 1})\f].
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * acosh(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> acosh(const Quat<T> &q);
+
+    /**
+     * @brief return arccosh value of this quaternion, arccosh could be calculated as:
+     * \f[arcosh(q) = \ln(q + \sqrt{q^2 - 1})\f].
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.acosh();
+     * ```
+     */
+    Quat<_Tp> acosh() const;
+
+    /**
+     * @brief return arctanh value of quaternion q, arctanh could be calculated as:
+     * \f[arctanh(q) = \frac{\ln(q + 1) - \ln(1 - q)}{2}\f].
+     * @param q a quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * atanh(q);
+     * ```
+     */
+    template <typename T>
+    friend Quat<T> atanh(const Quat<T> &q);
+
+    /**
+     * @brief return arctanh value of this quaternion, arctanh could be calculated as:
+     * \f[arcsinh(q) = \frac{\ln(q + 1) - \ln(1 - q)}{2}\f].
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.atanh();
+     * ```
+     */
+    Quat<_Tp> atanh() const;
+
+    /**
+     * @brief return true if this quaternion is a unit quaternion.
+     * @param eps tolerance scope of normalization. The eps could be defined as
+     *
+     * \f[eps = |1 - dotValue|\f] where \f[dotValue = (this.w^2 + this.x^2 + this,y^2 + this.z^2).\f]
+     * And this function will consider it is normalized when the dotValue over a range \f$[1-eps, 1+eps]\f$.
+     */
+    bool isNormal(_Tp eps=CV_QUAT_EPS) const;
+
+    /**
+     * @brief to throw an error if this quaternion is not a unit quaternion.
+     * @param eps tolerance scope of normalization.
+     * @sa isNormal
+     */
+    void assertNormal(_Tp eps=CV_QUAT_EPS) const;
+
+    /**
+     * @brief transform a quaternion to a 3x3 rotation matrix.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and
+     * this function will save some computations. Otherwise, this function will normalized this
+     * quaternion at first then to do the transformation.
+     *
+     * @note Matrix A which is to be rotated should have the form
+     * \f[\begin{bmatrix}
+     * x_0& x_1& x_2&...&x_n\\
+     * y_0& y_1& y_2&...&y_n\\
+     * z_0& z_1& z_2&...&z_n
+     * \end{bmatrix}\f]
+     * where the same subscript represents a point. The shape of A assume to be [3, n]
+     * The points matrix A can be rotated by toRotMat3x3() * A.
+     * The result has 3 rows and n columns too.
+
+     * For example
+     * ```
+     * double angle = CV_PI;
+     * Vec3d axis{0,0,1};
+     * Quatd q_unit = Quatd::createFromAngleAxis(angle, axis); //quaternion could also be get by interpolation by two or more quaternions.
+     *
+     * //assume there is two points (1,0,0) and (1,0,1) to be rotated
+     * Mat pointsA = (Mat_<double>(2, 3) << 1,0,0,1,0,1);
+     * //change the shape
+     * pointsA = pointsA.t();
+     * // rotate 180 degrees around the z axis
+     * Mat new_point = q_unit.toRotMat3x3() * pointsA;
+     * // print two points
+     * cout << new_point << endl;
+     * ```
+     */
+    Matx<_Tp, 3, 3> toRotMat3x3(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief transform a quaternion to a 4x4 rotation matrix.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and
+     * this function will save some computations. Otherwise, this function will normalized this
+     * quaternion at first then to do the transformation.
+     *
+     * The operations is similar as toRotMat3x3
+     * except that the points matrix should have the form
+     * \f[\begin{bmatrix}
+     * x_0& x_1& x_2&...&x_n\\
+     * y_0& y_1& y_2&...&y_n\\
+     * z_0& z_1& z_2&...&z_n\\
+     * 0&0&0&...&0
+     * \end{bmatrix}\f]
+     *
+     * @sa toRotMat3x3
+     */
+    Matx<_Tp, 4, 4> toRotMat4x4(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief transform the this quaternion to a Vec<T, 4>.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.toVec();
+     * ```
+     */
+    Vec<_Tp, 4> toVec() const;
+
+    /**
+     * @brief transform this quaternion to a Rotation vector.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and
+     * this function will save some computations.
+     * Rotation vector rVec is defined as:
+     * \f[ rVec = [\theta v_x, \theta v_y, \theta v_z]\f]
+     * where \f$\theta\f$ represents rotation angle, and \f$\boldsymbol{v}\f$ represents the normalized rotation axis.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.toRotVec();
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * q.normalize().toRotVec(assumeUnit); //answer is same as q.toRotVec().
+     * ```
+     */
+    Vec<_Tp, 3> toRotVec(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief get the angle of quaternion, it returns the rotation angle.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and
+     * this function will save some computations.
+     * \f[\psi = 2 *arccos(\frac{w}{||q||})\f]
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.getAngle();
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * q.normalize().getAngle(assumeUnit);//same as q.getAngle().
+     * ```
+     * @note It always return the value between \f$[0, 2\pi]\f$.
+     */
+    _Tp getAngle(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief get the axis of quaternion, it returns a vector of length 3.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and
+     * this function will save some computations.
+     *
+     * the unit axis \f$\boldsymbol{u}\f$ is defined by
+     * \f[\begin{equation}
+     *    \begin{split}
+     *      \boldsymbol{v}
+     *      &= \boldsymbol{u} ||\boldsymbol{v}||\\
+     *      &= \boldsymbol{u}||q||sin(\frac{\theta}{2})
+     *    \end{split}
+     *    \end{equation}\f]
+     *  where \f$v=[x, y ,z]\f$ and \f$\theta\f$ represents rotation angle.
+     *
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * q.getAxis();
+     *
+     * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+     * q.normalize().getAxis(assumeUnit);//same as q.getAxis()
+     * ```
+     */
+    Vec<_Tp, 3> getAxis(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the dot between quaternion \f$q\f$ and this quaternion.
+     *
+     * dot(p, q) is a good metric of how close the quaternions are.
+     * Indeed, consider the unit quaternion difference \f$p^{-1} * q\f$, its real part is dot(p, q).
+     * At the same time its real part is equal to \f$\cos(\beta/2)\f$ where \f$\beta\f$ is
+     * an angle of rotation between p and q, i.e.,
+     * Therefore, the closer dot(p, q) to 1,
+     * the smaller rotation between them.
+     * \f[p \cdot q = p.w \cdot q.w + p.x \cdot q.x + p.y \cdot q.y + p.z \cdot q.z\f]
+     * @param q the other quaternion.
+     *
+     * For example
+     * ```
+     * Quatd q(1,2,3,4);
+     * Quatd p(5,6,7,8);
+     * p.dot(q);
+     * ```
+     */
+    _Tp dot(Quat<_Tp> q) const;
+
+    /**
+     * @brief To calculate the interpolation from \f$q_0\f$ to \f$q_1\f$ by Linear Interpolation(Nlerp)
+     * For two quaternions, this interpolation curve can be displayed as:
+     * \f[Lerp(q_0, q_1, t) = (1 - t)q_0 + tq_1.\f]
+     * Obviously, the lerp will interpolate along a straight line if we think of \f$q_0\f$ and \f$q_1\f$ as a vector
+     * in a two-dimensional space. When \f$t = 0\f$, it returns \f$q_0\f$ and when \f$t= 1\f$, it returns \f$q_1\f$.
+     * \f$t\f$ should to be ranged in \f$[0, 1]\f$ normally.
+     * @param q0 a quaternion used in linear interpolation.
+     * @param q1 a quaternion used in linear interpolation.
+     * @param t percent of vector \f$\overrightarrow{q_0q_1}\f$ over a range [0, 1].
+     * @note it returns a non-unit quaternion.
+     */
+    static Quat<_Tp> lerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t);
+
+    /**
+     * @brief To calculate the interpolation from \f$q_0\f$ to \f$q_1\f$ by Normalized Linear Interpolation(Nlerp).
+     * it returns a normalized quaternion of Linear Interpolation(Lerp).
+     * \f[ Nlerp(q_0, q_1, t) = \frac{(1 - t)q_0 + tq_1}{||(1 - t)q_0 + tq_1||}.\f]
+     * The interpolation will always choose the shortest path but the constant speed is not guaranteed.
+     * @param q0 a quaternion used in normalized linear interpolation.
+     * @param q1 a quaternion used in normalized linear interpolation.
+     * @param t percent of vector \f$\overrightarrow{q_0q_1}\f$ over a range [0, 1].
+     * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all inputs
+     quaternion will be normalized inside the function.
+     * @sa lerp
+     */
+    static Quat<_Tp> nlerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+    /**
+     @brief To calculate the interpolation between \f$q_0\f$ and \f$q_1\f$ by Spherical Linear
+     Interpolation(Slerp), which can be defined as:
+    \f[ Slerp(q_0, q_1, t) = \frac{\sin((1-t)\theta)}{\sin(\theta)}q_0 + \frac{\sin(t\theta)}{\sin(\theta)}q_1\f]
+    where \f$\theta\f$ can be calculated as:
+    \f[\theta=cos^{-1}(q_0\cdot q_1)\f]
+    resulting from the both of their norm is unit.
+    @param q0 a quaternion used in Slerp.
+    @param q1 a quaternion used in Slerp.
+    @param t percent of angle between \f$q_0\f$ and \f$q_1\f$ over a range [0, 1].
+    @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternions. Otherwise, all input
+    quaternions will be normalized inside the function.
+    @param directChange if QUAT_ASSUME_UNIT, the interpolation will choose the nearest path.
+    @note If the interpolation angle is small, the error between Nlerp and Slerp is not so large. To improve efficiency and
+    avoid zero division error, we use Nlerp instead of Slerp.
+    */
+    static Quat<_Tp> slerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT, bool directChange=true);
+
+    /**
+     * @brief To calculate the interpolation between \f$q_0\f$,\f$q_1\f$,\f$q_2\f$,\f$q_3\f$  by Spherical and quadrangle(Squad). This could be defined as:
+     * \f[Squad(q_i, s_i, s_{i+1}, q_{i+1}, t) = Slerp(Slerp(q_i, q_{i+1}, t), Slerp(s_i, s_{i+1}, t), 2t(1-t))\f]
+     * where
+     * \f[s_i = q_i\exp(-\frac{\log(q^*_iq_{i+1}) + \log(q^*_iq_{i-1})}{4})\f]
+     *
+     * The Squad expression is analogous to the \f$B\acute{e}zier\f$ curve, but involves spherical linear
+     * interpolation instead of simple linear interpolation. Each \f$s_i\f$ needs to be calculated by three
+     * quaternions.
+     *
+     * @param q0 the first quaternion.
+     * @param s0 the second quaternion.
+     * @param s1 the third quaternion.
+     * @param q1 thr fourth quaternion.
+     * @param t interpolation parameter of quadratic and linear interpolation over a range \f$[0, 1]\f$.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all input
+     * quaternions will be normalized inside the function.
+     * @param directChange if QUAT_ASSUME_UNIT, squad will find the nearest path to interpolate.
+     * @sa interPoint, spline
+     */
+    static Quat<_Tp> squad(const Quat<_Tp> &q0, const Quat<_Tp> &s0,
+                            const Quat<_Tp> &s1, const Quat<_Tp> &q1,
+                            const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT,
+                            bool directChange=true);
+
+    /**
+     * @brief This is the part calculation of squad.
+     * To calculate the intermedia quaternion \f$s_i\f$ between each three quaternion
+     * \f[s_i = q_i\exp(-\frac{\log(q^*_iq_{i+1}) + \log(q^*_iq_{i-1})}{4}).\f]
+     * @param q0 the first quaternion.
+     * @param q1 the second quaternion.
+     * @param q2 the third quaternion.
+     * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all input
+     * quaternions will be normalized inside the function.
+     * @sa squad
+     */
+    static Quat<_Tp> interPoint(const Quat<_Tp> &q0, const Quat<_Tp> &q1,
+                                 const Quat<_Tp> &q2, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+    /**
+     * @brief to calculate a quaternion which is the result of a \f$C^1\f$ continuous
+     * spline curve constructed by squad at the ratio t. Here, the interpolation values are
+     * between \f$q_1\f$ and \f$q_2\f$. \f$q_0\f$ and \f$q_2\f$ are used to ensure the \f$C^1\f$
+     * continuity. if t = 0, it returns \f$q_1\f$, if t = 1, it returns \f$q_2\f$.
+     * @param q0 the first input quaternion to ensure \f$C^1\f$ continuity.
+     * @param q1 the second input quaternion.
+     * @param q2 the third input quaternion.
+     * @param q3 the fourth input quaternion the same use of \f$q1\f$.
+     * @param t ratio over a range [0, 1].
+     * @param assumeUnit if QUAT_ASSUME_UNIT, \f$q_0, q_1, q_2, q_3\f$ assume to be unit quaternion. Otherwise, all input
+     * quaternions will be normalized inside the function.
+     *
+     * For example:
+     *
+     * If there are three double quaternions \f$v_0, v_1, v_2\f$ waiting to be interpolated.
+     *
+     * Interpolation between \f$v_0\f$ and \f$v_1\f$ with a ratio \f$t_0\f$ could be calculated as
+     * ```
+     * Quatd::spline(v0, v0, v1, v2, t0);
+     * ```
+     * Interpolation between \f$v_1\f$ and \f$v_2\f$ with a ratio \f$t_0\f$ could be calculated as
+     * ```
+     * Quatd::spline(v0, v1, v2, v2, t0);
+     * ```
+     * @sa squad, slerp
+     */
+    static Quat<_Tp> spline(const Quat<_Tp> &q0, const Quat<_Tp> &q1,
+                            const Quat<_Tp> &q2, const Quat<_Tp> &q3,
+                            const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+
+    Quat<_Tp> operator-() const;
+
+    bool operator==(const Quat<_Tp>&) const;
+
+    Quat<_Tp> operator+(const Quat<_Tp>&) const;
+
+    Quat<_Tp>& operator+=(const Quat<_Tp>&);
+
+    Quat<_Tp> operator-(const Quat<_Tp>&) const;
+
+    Quat<_Tp>& operator-=(const Quat<_Tp>&);
+
+    Quat<_Tp>& operator*=(const Quat<_Tp>&);
+
+    Quat<_Tp>& operator*=(const _Tp&);
+
+    Quat<_Tp> operator*(const Quat<_Tp>&) const;
+
+    Quat<_Tp> operator/(const _Tp&) const;
+
+    Quat<_Tp> operator/(const Quat<_Tp>&) const;
+
+    Quat<_Tp>& operator/=(const _Tp&);
+
+    Quat<_Tp>& operator/=(const Quat<_Tp>&);
+
+    _Tp& operator[](std::size_t n);
+
+    const _Tp& operator[](std::size_t n) const;
+
+    template <typename S, typename T>
+    friend Quat<S> cv::operator*(const T, const Quat<S>&);
+
+    template <typename S, typename T>
+    friend Quat<S> cv::operator*(const Quat<S>&, const T);
+
+    template <typename S>
+    friend std::ostream& cv::operator<<(std::ostream&, const Quat<S>&);
+
+    _Tp w, x, y, z;
+
+};
+
+template <typename T>
+Quat<T> inv(const Quat<T> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+template <typename T>
+Quat<T> sinh(const Quat<T> &q);
+
+template <typename T>
+Quat<T> cosh(const Quat<T> &q);
+
+template <typename T>
+Quat<T> tanh(const Quat<T> &q);
+
+template <typename T>
+Quat<T> sin(const Quat<T> &q);
+
+template <typename T>
+Quat<T> cos(const Quat<T> &q);
+
+template <typename T>
+Quat<T> tan(const Quat<T> &q);
+
+template <typename T>
+Quat<T> asinh(const Quat<T> &q);
+
+template <typename T>
+Quat<T> acosh(const Quat<T> &q);
+
+template <typename T>
+Quat<T> atanh(const Quat<T> &q);
+
+template <typename T>
+Quat<T> asin(const Quat<T> &q);
+
+template <typename T>
+Quat<T> acos(const Quat<T> &q);
+
+template <typename T>
+Quat<T> atan(const Quat<T> &q);
+
+template <typename T>
+Quat<T> power(const Quat<T> &q, const Quat<T> &p, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+template <typename T>
+Quat<T> exp(const Quat<T> &q);
+
+template <typename T>
+Quat<T> log(const Quat<T> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+template <typename T, typename _T>
+Quat<T> power(const Quat<T>& q, _T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+template <typename T>
+Quat<T> crossProduct(const Quat<T> &p, const Quat<T> &q);
+
+template <typename S>
+Quat<S> sqrt(const Quat<S> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+template <typename S, typename T>
+Quat<S> operator*(const T, const Quat<S>&);
+
+template <typename S, typename T>
+Quat<S> operator*(const Quat<S>&, const T);
+
+template <typename S>
+std::ostream& operator<<(std::ostream&, const Quat<S>&);
+
+using Quatd = Quat<double>;
+using Quatf = Quat<float>;
+
+//! @} core
+}
+
+#include "opencv2/core/quaternion.inl.hpp"
+
+#endif /* OPENCV_CORE_QUATERNION_HPP */
diff --git a/modules/core/include/opencv2/core/quaternion.inl.hpp b/modules/core/include/opencv2/core/quaternion.inl.hpp
new file mode 100644
index 0000000000..769f53ed4b
--- /dev/null
+++ b/modules/core/include/opencv2/core/quaternion.inl.hpp
@@ -0,0 +1,849 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Liangqian Kong <chargerKong@126.com>
+//         Longbu Wang <riskiest@gmail.com>
+
+#ifndef OPENCV_CORE_QUATERNION_INL_HPP
+#define OPENCV_CORE_QUATERNION_INL_HPP
+
+#ifndef OPENCV_CORE_QUATERNION_HPP
+#erorr This is not a standalone header. Include quaternion.hpp instead.
+#endif
+
+//@cond IGNORE
+///////////////////////////////////////////////////////////////////////////////////////
+//Implementation
+namespace cv {
+
+template <typename T>
+Quat<T>::Quat() : w(0), x(0), y(0), z(0) {}
+
+template <typename T>
+Quat<T>::Quat(const Vec<T, 4> &coeff):w(coeff[0]), x(coeff[1]), y(coeff[2]), z(coeff[3]){}
+
+template <typename T>
+Quat<T>::Quat(const T qw, const T qx, const T qy, const T qz):w(qw), x(qx), y(qy), z(qz){}
+
+template <typename T>
+Quat<T> Quat<T>::createFromAngleAxis(const T angle, const Vec<T, 3> &axis)
+{
+    T w, x, y, z;
+    T vNorm = std::sqrt(axis.dot(axis));
+    if (vNorm < CV_QUAT_EPS)
+    {
+        CV_Error(Error::StsBadArg, "this quaternion does not represent a rotation");
+    }
+    const T angle_half = angle * 0.5;
+    w = std::cos(angle_half);
+    const T sin_v = std::sin(angle_half);
+    const T sin_norm = sin_v / vNorm;
+    x = sin_norm * axis[0];
+    y = sin_norm * axis[1];
+    z = sin_norm * axis[2];
+    return Quat<T>(w, x, y, z);
+}
+
+template <typename T>
+Quat<T> Quat<T>::createFromRotMat(InputArray _R)
+{
+    CV_CheckTypeEQ(_R.type(), cv::traits::Type<T>::value, "");
+    if (_R.rows() != 3 || _R.cols() != 3)
+    {
+        CV_Error(Error::StsBadArg, "Cannot convert matrix to quaternion: rotation matrix should be a 3x3 matrix");
+    }
+    Matx<T, 3, 3> R;
+    _R.copyTo(R);
+
+    T S, w, x, y, z;
+    T trace = R(0, 0) + R(1, 1) + R(2, 2);
+    if (trace > 0)
+    {
+        S = std::sqrt(trace + 1) * 2;
+        x = (R(1, 2) - R(2, 1)) / S;
+        y = (R(2, 0) - R(0, 2)) / S;
+        z = (R(0, 1) - R(1, 0)) / S;
+        w = -0.25 * S;
+    }
+    else if (R(0, 0) > R(1, 1) && R(0, 0) > R(2, 2))
+    {
+
+        S = std::sqrt(1.0 + R(0, 0) - R(1, 1) - R(2, 2)) * 2;
+        x = -0.25 * S;
+        y = -(R(1, 0) + R(0, 1)) / S;
+        z = -(R(0, 2) + R(2, 0)) / S;
+        w = (R(1, 2) - R(2, 1)) / S;
+    }
+    else if (R(1, 1) > R(2, 2))
+    {
+        S = std::sqrt(1.0 - R(0, 0) + R(1, 1) - R(2, 2)) * 2;
+        x = (R(0, 1) + R(1, 0)) / S;
+        y = 0.25 * S;
+        z = (R(1, 2) + R(2, 1)) / S;
+        w = (R(0, 2) - R(2, 0)) / S;
+    }
+    else
+    {
+        S = std::sqrt(1.0 - R(0, 0) - R(1, 1) + R(2, 2)) * 2;
+        x = (R(0, 2) + R(2, 0)) / S;
+        y = (R(1, 2) + R(2, 1)) / S;
+        z = 0.25 * S;
+        w = -(R(0, 1) - R(1, 0)) / S;
+    }
+    return Quat<T> (w, x, y, z);
+}
+
+template <typename T>
+Quat<T> Quat<T>::createFromRvec(InputArray _rvec)
+{
+    if (!((_rvec.cols() == 1 && _rvec.rows() == 3) || (_rvec.cols() == 3 && _rvec.rows() == 1))) {
+        CV_Error(Error::StsBadArg, "Cannot convert rotation vector to quaternion: The length of rotation vector should be 3");
+    }
+    Vec<T, 3> rvec;
+    _rvec.copyTo(rvec);
+    T psi = std::sqrt(rvec.dot(rvec));
+    if (abs(psi) < CV_QUAT_EPS) {
+        return Quat<T> (1, 0, 0, 0);
+    }
+    Vec<T, 3> axis = rvec / psi;
+    return createFromAngleAxis(psi, axis);
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::operator-() const
+{
+    return Quat<T>(-w, -x, -y, -z);
+}
+
+
+template <typename T>
+inline bool Quat<T>::operator==(const Quat<T> &q) const
+{
+    return (abs(w - q.w) < CV_QUAT_EPS && abs(x - q.x) < CV_QUAT_EPS && abs(y - q.y) < CV_QUAT_EPS && abs(z - q.z) < CV_QUAT_EPS);
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::operator+(const Quat<T> &q1) const
+{
+    return Quat<T>(w + q1.w, x + q1.x, y + q1.y, z + q1.z);
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::operator-(const Quat<T> &q1) const
+{
+    return Quat<T>(w - q1.w, x - q1.x, y - q1.y, z - q1.z);
+}
+
+template <typename T>
+inline Quat<T>& Quat<T>::operator+=(const Quat<T> &q1)
+{
+    w += q1.w;
+    x += q1.x;
+    y += q1.y;
+    z += q1.z;
+    return *this;
+}
+
+template <typename T>
+inline Quat<T>& Quat<T>::operator-=(const Quat<T> &q1)
+{
+    w -= q1.w;
+    x -= q1.x;
+    y -= q1.y;
+    z -= q1.z;
+    return *this;
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::operator*(const Quat<T> &q1) const
+{
+    Vec<T, 4> q{w, x, y, z};
+    Vec<T, 4> q2{q1.w, q1.x, q1.y, q1.z};
+    return Quat<T>(q * q2);
+}
+
+
+template <typename T, typename S>
+Quat<T> operator*(const Quat<T> &q1, const S a)
+{
+    return Quat<T>(a * q1.w, a * q1.x, a * q1.y, a * q1.z);
+}
+
+template <typename T, typename S>
+Quat<T> operator*(const S a, const Quat<T> &q1)
+{
+    return Quat<T>(a * q1.w, a * q1.x, a * q1.y, a * q1.z);
+}
+
+template <typename T>
+inline Quat<T>& Quat<T>::operator*=(const Quat<T> &q1)
+{
+    T qw, qx, qy, qz;
+    qw = w * q1.w - x * q1.x - y * q1.y - z * q1.z;
+    qx = x * q1.w + w * q1.x + y * q1.z - z * q1.y;
+    qy = y * q1.w + w * q1.y + z * q1.x - x * q1.z;
+    qz = z * q1.w + w * q1.z + x * q1.y - y * q1.x;
+    w = qw;
+    x = qx;
+    y = qy;
+    z = qz;
+    return *this;
+}
+
+template <typename T>
+inline Quat<T>& Quat<T>::operator/=(const Quat<T> &q1)
+{
+    Quat<T> q(*this * q1.inv());
+    w = q.w;
+    x = q.x;
+    y = q.y;
+    z = q.z;
+    return *this;
+}
+template <typename T>
+Quat<T>& Quat<T>::operator*=(const T &q1)
+{
+    w *= q1;
+    x *= q1;
+    y *= q1;
+    z *= q1;
+    return *this;
+}
+
+template <typename T>
+inline Quat<T>& Quat<T>::operator/=(const T &a)
+{
+    const T a_inv = 1.0 / a;
+    w *= a_inv;
+    x *= a_inv;
+    y *= a_inv;
+    z *= a_inv;
+    return *this;
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::operator/(const T &a) const
+{
+    const T a_inv = 1.0 / a;
+    return Quat<T>(w * a_inv, x * a_inv, y * a_inv, z * a_inv);
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::operator/(const Quat<T> &q) const
+{
+    return *this * q.inv();
+}
+
+template <typename T>
+inline const T& Quat<T>::operator[](std::size_t n) const
+{
+    switch (n) {
+        case 0:
+            return w;
+        case 1:
+            return x;
+        case 2:
+            return y;
+        case 3:
+            return z;
+        default:
+            CV_Error(Error::StsOutOfRange, "subscript exceeds the index range");
+    }
+}
+
+template <typename T>
+inline T& Quat<T>::operator[](std::size_t n)
+{
+    switch (n) {
+        case 0:
+            return w;
+        case 1:
+            return x;
+        case 2:
+            return y;
+        case 3:
+            return z;
+        default:
+            CV_Error(Error::StsOutOfRange, "subscript exceeds the index range");
+    }
+}
+
+template <typename T>
+std::ostream & operator<<(std::ostream &os, const Quat<T> &q)
+{
+    os << "Quat " << Vec<T, 4>{q.w, q.x, q.y, q.z};
+    return os;
+}
+
+template <typename T>
+inline T Quat<T>::at(size_t index) const
+{
+    return (*this)[index];
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::conjugate() const
+{
+    return Quat<T>(w, -x, -y, -z);
+}
+
+template <typename T>
+inline T Quat<T>::norm() const
+{
+    return std::sqrt(dot(*this));
+}
+
+template <typename T>
+Quat<T> exp(const Quat<T> &q)
+{
+    return q.exp();
+}
+
+template <typename T>
+Quat<T> Quat<T>::exp() const
+{
+    Vec<T, 3> v{x, y, z};
+    T normV = std::sqrt(v.dot(v));
+    T k = normV < CV_QUAT_EPS ? 1 : std::sin(normV) / normV;
+    return std::exp(w) * Quat<T>(std::cos(normV), v[0] * k, v[1] * k, v[2] * k);
+}
+
+template <typename T>
+Quat<T> log(const Quat<T> &q, QuatAssumeType assumeUnit)
+{
+    return q.log(assumeUnit);
+}
+
+template <typename T>
+Quat<T> Quat<T>::log(QuatAssumeType assumeUnit) const
+{
+    Vec<T, 3> v{x, y, z};
+    T vNorm = std::sqrt(v.dot(v));
+    if (assumeUnit)
+    {
+        T k = vNorm < CV_QUAT_EPS ? 1 : std::acos(w) / vNorm;
+        return Quat<T>(0, v[0] * k, v[1] * k, v[2] * k);
+    }
+    T qNorm = norm();
+    if (qNorm < CV_QUAT_EPS)
+    {
+        CV_Error(Error::StsBadArg, "Cannot apply this quaternion to log function: undefined");
+    }
+    T k = vNorm < CV_QUAT_EPS ? 1 : std::acos(w / qNorm) / vNorm;
+    return Quat<T>(std::log(qNorm), v[0] * k, v[1] * k, v[2] *k);
+}
+
+template <typename T, typename _T>
+inline Quat<T> power(const Quat<T> &q1, _T alpha, QuatAssumeType assumeUnit)
+{
+    return q1.power(alpha, assumeUnit);
+}
+
+template <typename T>
+template <typename _T>
+inline Quat<T> Quat<T>::power(_T alpha, QuatAssumeType assumeUnit) const
+{
+    if (x * x + y * y + z * z > CV_QUAT_EPS)
+    {
+        T angle = getAngle(assumeUnit);
+        Vec<T, 3> axis = getAxis(assumeUnit);
+        if (assumeUnit)
+        {
+            return createFromAngleAxis(alpha * angle, axis);
+        }
+        return std::pow(norm(), alpha) * createFromAngleAxis(alpha * angle, axis);
+    }
+    else
+    {
+        return std::pow(norm(), alpha) * Quat<T>(w, x, y, z);
+    }
+}
+
+
+template <typename T>
+inline Quat<T> sqrt(const Quat<T> &q, QuatAssumeType assumeUnit)
+{
+    return q.sqrt(assumeUnit);
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::sqrt(QuatAssumeType assumeUnit) const
+{
+    return power(0.5, assumeUnit);
+}
+
+
+template <typename T>
+inline Quat<T> power(const Quat<T> &p, const Quat<T> &q, QuatAssumeType assumeUnit)
+{
+    return p.power(q, assumeUnit);
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::power(const Quat<T> &q, QuatAssumeType assumeUnit) const
+{
+    return cv::exp(q * log(assumeUnit));
+}
+
+template <typename T>
+inline T Quat<T>::dot(Quat<T> q1) const
+{
+    return w * q1.w + x * q1.x + y * q1.y + z * q1.z;
+}
+
+
+template <typename T>
+inline Quat<T> crossProduct(const Quat<T> &p, const Quat<T> &q)
+{
+    return p.crossProduct(q);
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::crossProduct(const Quat<T> &q) const
+{
+    return Quat<T> (0, y * q.z - z * q.y, z * q.x - x * q.z, x * q.y - q.x * y);
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::normalize() const
+{
+    T normVal = norm();
+    if (normVal < CV_QUAT_EPS)
+    {
+        CV_Error(Error::StsBadArg, "Cannot normalize this quaternion: the norm is too small.");
+    }
+    return Quat<T>(w / normVal, x / normVal, y / normVal, z / normVal) ;
+}
+
+template <typename T>
+inline Quat<T> inv(const Quat<T> &q, QuatAssumeType assumeUnit)
+{
+    return q.inv(assumeUnit);
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::inv(QuatAssumeType assumeUnit) const
+{
+    if (assumeUnit)
+    {
+        return conjugate();
+    }
+    T norm2 = dot(*this);
+    if (norm2 < CV_QUAT_EPS)
+    {
+        CV_Error(Error::StsBadArg, "This quaternion do not have inverse quaternion");
+    }
+    return conjugate() / norm2;
+}
+
+template <typename T>
+inline Quat<T> sinh(const Quat<T> &q)
+{
+    return q.sinh();
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::sinh() const
+{
+    Vec<T, 3> v{x, y ,z};
+    T vNorm = std::sqrt(v.dot(v));
+    T k = vNorm < CV_QUAT_EPS ? 1 : std::cosh(w) * std::sin(vNorm) / vNorm;
+    return Quat<T>(std::sinh(w) * std::cos(vNorm), v[0] * k, v[1] * k, v[2] * k);
+}
+
+
+template <typename T>
+inline Quat<T> cosh(const Quat<T> &q)
+{
+    return q.cosh();
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::cosh() const
+{
+    Vec<T, 3> v{x, y ,z};
+    T vNorm = std::sqrt(v.dot(v));
+    T k = vNorm < CV_QUAT_EPS ? 1 : std::sinh(w) * std::sin(vNorm) / vNorm;
+    return Quat<T>(std::cosh(w) * std::cos(vNorm), v[0] * k, v[1] * k, v[2] * k);
+}
+
+template <typename T>
+inline Quat<T> tanh(const Quat<T> &q)
+{
+    return q.tanh();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::tanh() const
+{
+    return sinh() * cosh().inv();
+}
+
+
+template <typename T>
+inline Quat<T> sin(const Quat<T> &q)
+{
+    return q.sin();
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::sin() const
+{
+    Vec<T, 3> v{x, y ,z};
+    T vNorm = std::sqrt(v.dot(v));
+    T k = vNorm < CV_QUAT_EPS ? 1 : std::cos(w) * std::sinh(vNorm) / vNorm;
+    return Quat<T>(std::sin(w) * std::cosh(vNorm), v[0] * k, v[1] * k, v[2] * k);
+}
+
+template <typename T>
+inline Quat<T> cos(const Quat<T> &q)
+{
+    return q.cos();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::cos() const
+{
+    Vec<T, 3> v{x, y ,z};
+    T vNorm = std::sqrt(v.dot(v));
+    T k = vNorm < CV_QUAT_EPS ? 1 : std::sin(w) * std::sinh(vNorm) / vNorm;
+    return Quat<T>(std::cos(w) * std::cosh(vNorm), -v[0] * k, -v[1] * k, -v[2] * k);
+}
+
+template <typename T>
+inline Quat<T> tan(const Quat<T> &q)
+{
+    return q.tan();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::tan() const
+{
+    return sin() * cos().inv();
+}
+
+template <typename T>
+inline Quat<T> asinh(const Quat<T> &q)
+{
+    return q.asinh();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::asinh() const
+{
+    return cv::log(*this + cv::power(*this * *this + Quat<T>(1, 0, 0, 0), 0.5));
+}
+
+template <typename T>
+inline Quat<T> acosh(const Quat<T> &q)
+{
+    return q.acosh();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::acosh() const
+{
+    return cv::log(*this + cv::power(*this * *this - Quat<T>(1,0,0,0), 0.5));
+}
+
+template <typename T>
+inline Quat<T> atanh(const Quat<T> &q)
+{
+    return q.atanh();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::atanh() const
+{
+    Quat<T> ident(1, 0, 0, 0);
+    Quat<T> c1 = (ident + *this).log();
+    Quat<T> c2 = (ident - *this).log();
+    return 0.5 * (c1 - c2);
+}
+
+template <typename T>
+inline Quat<T> asin(const Quat<T> &q)
+{
+    return q.asin();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::asin() const
+{
+    Quat<T> v(0, x, y, z);
+    T vNorm = v.norm();
+    T k = vNorm < CV_QUAT_EPS ? 1 : vNorm;
+    return -v / k * (*this * v / k).asinh();
+}
+
+template <typename T>
+inline Quat<T> acos(const Quat<T> &q)
+{
+    return q.acos();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::acos() const
+{
+    Quat<T> v(0, x, y, z);
+    T vNorm = v.norm();
+    T k = vNorm < CV_QUAT_EPS ? 1 : vNorm;
+    return -v / k * acosh();
+}
+
+template <typename T>
+inline Quat<T> atan(const Quat<T> &q)
+{
+    return q.atan();
+}
+
+template <typename T>
+inline Quat<T> Quat<T>::atan() const
+{
+    Quat<T> v(0, x, y, z);
+    T vNorm = v.norm();
+    T k = vNorm < CV_QUAT_EPS ? 1 : vNorm;
+    return -v / k * (*this * v / k).atanh();
+}
+
+template <typename T>
+inline T Quat<T>::getAngle(QuatAssumeType assumeUnit) const
+{
+    if (assumeUnit)
+    {
+        return 2 * std::acos(w);
+    }
+    if (norm() < CV_QUAT_EPS)
+    {
+        CV_Error(Error::StsBadArg, "This quaternion does not represent a rotation");
+    }
+    return 2 * std::acos(w / norm());
+}
+
+template <typename T>
+inline Vec<T, 3> Quat<T>::getAxis(QuatAssumeType assumeUnit) const
+{
+    T angle = getAngle(assumeUnit);
+    const T sin_v = std::sin(angle * 0.5);
+    if (assumeUnit)
+    {
+        return Vec<T, 3>{x, y, z} / sin_v;
+    }
+    return Vec<T, 3> {x, y, z} / (norm() * sin_v);
+}
+
+template <typename T>
+Matx<T, 4, 4> Quat<T>::toRotMat4x4(QuatAssumeType assumeUnit) const
+{
+    T a = w, b = x, c = y, d = z;
+    if (!assumeUnit)
+    {
+        Quat<T> qTemp = normalize();
+        a = qTemp.w;
+        b = qTemp.x;
+        c = qTemp.y;
+        d = qTemp.z;
+    }
+    Matx<T, 4, 4> R{
+        1 - 2 * (c * c + d * d), 2 * (b * c - a * d)    , 2 * (b * d + a * c)    , 0,
+        2 * (b * c + a * d)    , 1 - 2 * (b * b + d * d), 2 * (c * d - a * b)    , 0,
+        2 * (b * d - a * c)    , 2 * (c * d + a * b)    , 1 - 2 * (b * b + c * c), 0,
+        0                      , 0                      , 0                      , 1,
+    };
+    return R;
+}
+
+template <typename T>
+Matx<T, 3, 3> Quat<T>::toRotMat3x3(QuatAssumeType assumeUnit) const
+{
+    T a = w, b = x, c = y, d = z;
+    if (!assumeUnit)
+    {
+        Quat<T> qTemp = normalize();
+        a = qTemp.w;
+        b = qTemp.x;
+        c = qTemp.y;
+        d = qTemp.z;
+    }
+    Matx<T, 3, 3> R{
+        1 - 2 * (c * c + d * d), 2 * (b * c - a * d)    , 2 * (b * d + a * c),
+        2 * (b * c + a * d)    , 1 - 2 * (b * b + d * d), 2 * (c * d - a * b),
+        2 * (b * d - a * c)    , 2 * (c * d + a * b)    , 1 - 2 * (b * b + c * c)
+    };
+    return R;
+}
+
+template <typename T>
+Vec<T, 3> Quat<T>::toRotVec(QuatAssumeType assumeUnit) const
+{
+    T angle = getAngle(assumeUnit);
+    Vec<T, 3> axis = getAxis(assumeUnit);
+    return angle * axis;
+}
+
+template <typename T>
+Vec<T, 4> Quat<T>::toVec() const
+{
+    return Vec<T, 4>{w, x, y, z};
+}
+
+template <typename T>
+Quat<T> Quat<T>::lerp(const Quat<T> &q0, const Quat<T> &q1, const T t)
+{
+    return (1 - t) * q0 + t * q1;
+}
+
+template <typename T>
+Quat<T> Quat<T>::slerp(const Quat<T> &q0, const Quat<T> &q1, const T t, QuatAssumeType assumeUnit, bool directChange)
+{
+    Quatd v0(q0);
+    Quatd v1(q1);
+    if (!assumeUnit)
+    {
+        v0 = v0.normalize();
+        v1 = v1.normalize();
+    }
+    T cosTheta = v0.dot(v1);
+    constexpr T DOT_THRESHOLD = 0.995;
+    if (cosTheta > DOT_THRESHOLD)
+    {
+        return nlerp(v0, v1, t, QUAT_ASSUME_UNIT);
+    }
+
+    if (directChange && cosTheta < 0)
+    {
+        v0 = -v0;
+        cosTheta = -cosTheta;
+    }
+    T sinTheta = std::sqrt(1 - cosTheta * cosTheta);
+    T angle = atan2(sinTheta, cosTheta);
+    return (std::sin((1 - t) * angle) / (sinTheta) * v0 + std::sin(t * angle) / (sinTheta) * v1).normalize();
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::nlerp(const Quat<T> &q0, const Quat<T> &q1, const T t, QuatAssumeType assumeUnit)
+{
+    Quat<T> v0(q0), v1(q1);
+    if (v1.dot(v0) < 0)
+    {
+        v0 = -v0;
+    }
+    if (assumeUnit)
+    {
+        return ((1 - t) * v0 + t * v1).normalize();
+    }
+    v0 = v0.normalize();
+    v1 = v1.normalize();
+    return ((1 - t) * v0 + t * v1).normalize();
+}
+
+
+template <typename T>
+inline bool Quat<T>::isNormal(T eps) const
+{
+
+    double normVar = norm();
+    if ((normVar > 1 - eps) && (normVar < 1 + eps))
+        return true;
+    return false;
+}
+
+template <typename T>
+inline void Quat<T>::assertNormal(T eps) const
+{
+    if (!isNormal(eps))
+        CV_Error(Error::StsBadArg, "Quaternion should be normalized");
+}
+
+
+template <typename T>
+inline Quat<T> Quat<T>::squad(const Quat<T> &q0, const Quat<T> &q1,
+                            const Quat<T> &q2, const Quat<T> &q3,
+                            const T t, QuatAssumeType assumeUnit,
+                            bool directChange)
+{
+    Quat<T> v0(q0), v1(q1), v2(q2), v3(q3);
+    if (!assumeUnit)
+    {
+        v0 = v0.normalize();
+        v1 = v1.normalize();
+        v2 = v2.normalize();
+        v3 = v3.normalize();
+    }
+
+    Quat<T> c0 = slerp(v0, v3, t, assumeUnit, directChange);
+    Quat<T> c1 = slerp(v1, v2, t, assumeUnit, directChange);
+    return slerp(c0, c1, 2 * t * (1 - t), assumeUnit, directChange);
+}
+
+template <typename T>
+Quat<T> Quat<T>::interPoint(const Quat<T> &q0, const Quat<T> &q1,
+                            const Quat<T> &q2, QuatAssumeType assumeUnit)
+{
+    Quat<T> v0(q0), v1(q1), v2(q2);
+    if (!assumeUnit)
+    {
+        v0 = v0.normalize();
+        v1 = v1.normalize();
+        v2 = v2.normalize();
+    }
+    return v1 * cv::exp(-(cv::log(v1.conjugate() * v0, assumeUnit) + (cv::log(v1.conjugate() * v2, assumeUnit))) / 4);
+}
+
+template <typename T>
+Quat<T> Quat<T>::spline(const Quat<T> &q0, const Quat<T> &q1, const Quat<T> &q2, const Quat<T> &q3, const T t, QuatAssumeType assumeUnit)
+{
+    Quatd v0(q0), v1(q1), v2(q2), v3(q3);
+    if (!assumeUnit)
+    {
+        v0 = v0.normalize();
+        v1 = v1.normalize();
+        v2 = v2.normalize();
+        v3 = v3.normalize();
+    }
+    T cosTheta;
+    std::vector<Quat<T>> vec{v0, v1, v2, v3};
+    for (size_t i = 0; i < 3; ++i)
+    {
+        cosTheta = vec[i].dot(vec[i + 1]);
+        if (cosTheta < 0)
+        {
+            vec[i + 1] = -vec[i + 1];
+        }
+    }
+    Quat<T> s1 = interPoint(vec[0], vec[1], vec[2], QUAT_ASSUME_UNIT);
+    Quat<T> s2 = interPoint(vec[1], vec[2], vec[3], QUAT_ASSUME_UNIT);
+    return squad(vec[1], s1, s2, vec[2], t, assumeUnit, QUAT_ASSUME_NOT_UNIT);
+}
+
+}  // namepsace
+//! @endcond
+
+#endif /*OPENCV_CORE_QUATERNION_INL_HPP*/
diff --git a/modules/core/misc/objc/common/Converters.h b/modules/core/misc/objc/common/Converters.h
index 9a238deb82..29d1b91eb5 100755
--- a/modules/core/misc/objc/common/Converters.h
+++ b/modules/core/misc/objc/common/Converters.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import <opencv2/opencv.hpp>
+#import <opencv2/core.hpp>
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/CvType.h b/modules/core/misc/objc/common/CvType.h
index fb6f86aa48..b1fd71d487 100644
--- a/modules/core/misc/objc/common/CvType.h
+++ b/modules/core/misc/objc/common/CvType.h
@@ -5,7 +5,7 @@
 //
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/DMatch.h b/modules/core/misc/objc/common/DMatch.h
index 51bed493b8..91c2c59bfa 100644
--- a/modules/core/misc/objc/common/DMatch.h
+++ b/modules/core/misc/objc/common/DMatch.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Double2.h b/modules/core/misc/objc/common/Double2.h
index 2162acb6d0..8e46c883d0 100644
--- a/modules/core/misc/objc/common/Double2.h
+++ b/modules/core/misc/objc/common/Double2.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Double3.h b/modules/core/misc/objc/common/Double3.h
index 2aaba9af80..5c741648f7 100644
--- a/modules/core/misc/objc/common/Double3.h
+++ b/modules/core/misc/objc/common/Double3.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Float4.h b/modules/core/misc/objc/common/Float4.h
index 2a89278040..c78e88b72e 100644
--- a/modules/core/misc/objc/common/Float4.h
+++ b/modules/core/misc/objc/common/Float4.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Float6.h b/modules/core/misc/objc/common/Float6.h
index d2ec19a60e..7e09772c5c 100644
--- a/modules/core/misc/objc/common/Float6.h
+++ b/modules/core/misc/objc/common/Float6.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Int4.h b/modules/core/misc/objc/common/Int4.h
index 1a17266572..11cc12db14 100644
--- a/modules/core/misc/objc/common/Int4.h
+++ b/modules/core/misc/objc/common/Int4.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/KeyPoint.h b/modules/core/misc/objc/common/KeyPoint.h
index 547960dc9d..096a1089c9 100644
--- a/modules/core/misc/objc/common/KeyPoint.h
+++ b/modules/core/misc/objc/common/KeyPoint.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Mat.h b/modules/core/misc/objc/common/Mat.h
index 72f81dd9b7..fd1dce27ba 100644
--- a/modules/core/misc/objc/common/Mat.h
+++ b/modules/core/misc/objc/common/Mat.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
@@ -97,6 +97,7 @@ CV_EXPORTS @interface Mat : NSObject
 - (void)createEx:(NSArray<NSNumber*>*)sizes type:(int)type  NS_SWIFT_NAME(create(sizes:type:));
 - (void)copySize:(Mat*)mat;
 - (Mat*)cross:(Mat*)mat;
+- (unsigned char*)dataPtr NS_SWIFT_NAME(dataPointer());
 - (int)depth;
 - (Mat*)diag:(int)diagonal;
 - (Mat*)diag;
diff --git a/modules/core/misc/objc/common/Mat.mm b/modules/core/misc/objc/common/Mat.mm
index c075e26046..5d41a3622e 100644
--- a/modules/core/misc/objc/common/Mat.mm
+++ b/modules/core/misc/objc/common/Mat.mm
@@ -286,6 +286,10 @@ static bool updateIdx(cv::Mat* mat, std::vector<int>& indices, int inc) {
     return [[Mat alloc] initWithNativeMat:new cv::Mat(_nativePtr->cross(*(cv::Mat*)mat.nativePtr))];
 }
 
+- (unsigned char*)dataPtr {
+    return _nativePtr->data;
+}
+
 - (int)depth {
     return _nativePtr->depth();
 }
diff --git a/modules/core/misc/objc/common/MinMaxLocResult.h b/modules/core/misc/objc/common/MinMaxLocResult.h
index e8daed4cc3..5ec6029e31 100644
--- a/modules/core/misc/objc/common/MinMaxLocResult.h
+++ b/modules/core/misc/objc/common/MinMaxLocResult.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Point2d.h b/modules/core/misc/objc/common/Point2d.h
index dbb8d55efa..0426b11d9a 100644
--- a/modules/core/misc/objc/common/Point2d.h
+++ b/modules/core/misc/objc/common/Point2d.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Point2f.h b/modules/core/misc/objc/common/Point2f.h
index 0da4fba5d8..6d13c774d8 100644
--- a/modules/core/misc/objc/common/Point2f.h
+++ b/modules/core/misc/objc/common/Point2f.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Point2i.h b/modules/core/misc/objc/common/Point2i.h
index 9e5d74624a..e43ee3a8ec 100644
--- a/modules/core/misc/objc/common/Point2i.h
+++ b/modules/core/misc/objc/common/Point2i.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Point3d.h b/modules/core/misc/objc/common/Point3d.h
index 72b0d39ea8..618ded35fa 100644
--- a/modules/core/misc/objc/common/Point3d.h
+++ b/modules/core/misc/objc/common/Point3d.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Point3f.h b/modules/core/misc/objc/common/Point3f.h
index 2370fffeaa..c98add1cec 100644
--- a/modules/core/misc/objc/common/Point3f.h
+++ b/modules/core/misc/objc/common/Point3f.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Point3i.h b/modules/core/misc/objc/common/Point3i.h
index b0edeaa470..9eab2ee0ea 100644
--- a/modules/core/misc/objc/common/Point3i.h
+++ b/modules/core/misc/objc/common/Point3i.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Range.h b/modules/core/misc/objc/common/Range.h
index 91e76393d2..fae138c6f9 100644
--- a/modules/core/misc/objc/common/Range.h
+++ b/modules/core/misc/objc/common/Range.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Rect2d.h b/modules/core/misc/objc/common/Rect2d.h
index ba91509b77..0ffcae9ab6 100644
--- a/modules/core/misc/objc/common/Rect2d.h
+++ b/modules/core/misc/objc/common/Rect2d.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Rect2f.h b/modules/core/misc/objc/common/Rect2f.h
index 6a8863800f..1f44f56263 100644
--- a/modules/core/misc/objc/common/Rect2f.h
+++ b/modules/core/misc/objc/common/Rect2f.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Rect2i.h b/modules/core/misc/objc/common/Rect2i.h
index 2e4e55cf30..6ed86d50bd 100644
--- a/modules/core/misc/objc/common/Rect2i.h
+++ b/modules/core/misc/objc/common/Rect2i.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/RotatedRect.h b/modules/core/misc/objc/common/RotatedRect.h
index c94053b6c1..a2049e6bf0 100644
--- a/modules/core/misc/objc/common/RotatedRect.h
+++ b/modules/core/misc/objc/common/RotatedRect.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Scalar.h b/modules/core/misc/objc/common/Scalar.h
index 63c3d1de58..d565155010 100644
--- a/modules/core/misc/objc/common/Scalar.h
+++ b/modules/core/misc/objc/common/Scalar.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Size2d.h b/modules/core/misc/objc/common/Size2d.h
index 11c6c50a02..cd2e4e4bc0 100644
--- a/modules/core/misc/objc/common/Size2d.h
+++ b/modules/core/misc/objc/common/Size2d.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Size2f.h b/modules/core/misc/objc/common/Size2f.h
index 2d1f2865c3..73ae9a2da0 100644
--- a/modules/core/misc/objc/common/Size2f.h
+++ b/modules/core/misc/objc/common/Size2f.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/Size2i.h b/modules/core/misc/objc/common/Size2i.h
index 61aa8da885..cd74e2c84a 100644
--- a/modules/core/misc/objc/common/Size2i.h
+++ b/modules/core/misc/objc/common/Size2i.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/common/TermCriteria.h b/modules/core/misc/objc/common/TermCriteria.h
index c7396582b2..ff6bfd565c 100644
--- a/modules/core/misc/objc/common/TermCriteria.h
+++ b/modules/core/misc/objc/common/TermCriteria.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/core/misc/objc/gen_dict.json b/modules/core/misc/objc/gen_dict.json
index 25521c9326..c2ee554eba 100644
--- a/modules/core/misc/objc/gen_dict.json
+++ b/modules/core/misc/objc/gen_dict.json
@@ -217,6 +217,13 @@
             "from_cpp": "[NSString stringWithUTF8String:%(n)s.c_str()]",
             "swift_type": "String"
         },
+        "string": {
+            "cast_to": "std::string",
+            "objc_type": "NSString*",
+            "to_cpp": "std::string(%(n)s.UTF8String)",
+            "from_cpp": "[NSString stringWithUTF8String:%(n)s.c_str()]",
+            "swift_type": "String"
+        },
         "TermCriteria": {
             "objc_type": "TermCriteria*",
             "to_cpp": "%(n)s.nativeRef",
diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp
index 001fc78264..5154041b6d 100644
--- a/modules/core/src/convert.simd.hpp
+++ b/modules/core/src/convert.simd.hpp
@@ -5,6 +5,11 @@
 #include "precomp.hpp"
 #include "convert.hpp"
 
+#if !defined(OPENCV_SUPRESS_WARNING_AVX2_WITHOUT_FP16C) && \
+    (defined(__GNUC__) && defined(__AVX2__) && !defined(__F16C__))
+#warning "Non-optimal compiler flags: AVX2 without FP16. Generated code is very slow. Consider adding '-mf16c' compiler option."
+#endif
+
 namespace cv {
 namespace hal {
 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 7f4329df78..dcd585d834 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -1032,8 +1032,7 @@ void flip( InputArray _src, OutputArray _dst, int flip_mode )
     }
 
     if ((size.width == 1 && flip_mode > 0) ||
-        (size.height == 1 && flip_mode == 0) ||
-        (size.height == 1 && size.width == 1 && flip_mode < 0))
+        (size.height == 1 && flip_mode == 0))
     {
         return _src.copyTo(_dst);
     }
diff --git a/modules/core/src/directx.cpp b/modules/core/src/directx.cpp
index c9bd1a4fa1..f028702d7f 100644
--- a/modules/core/src/directx.cpp
+++ b/modules/core/src/directx.cpp
@@ -49,6 +49,7 @@
 #ifdef HAVE_DIRECTX
 #include <vector>
 #include "directx.inc.hpp"
+#include "directx.hpp"
 #else // HAVE_DIRECTX
 #define NO_DIRECTX_SUPPORT_ERROR CV_Error(cv::Error::StsBadFunc, "OpenCV was build without DirectX support")
 #endif
@@ -234,11 +235,191 @@ int getTypeFromD3DFORMAT(const int iD3DFORMAT)
 #endif
 }
 
-namespace ocl {
-
 #if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
-static bool g_isDirect3DDevice9Ex = false; // Direct3DDevice9Ex or Direct3DDevice9 was used
+namespace internal {
+struct OpenCLDirectXImpl
+{
+    cl_platform_id platform_;
+
+    cl_platform_id initializedPlatform9 = NULL;
+    cl_platform_id initializedPlatform10 = NULL;
+    cl_platform_id initializedPlatform11 = NULL;
+public:
+    OpenCLDirectXImpl()
+        : platform_(0)
+    {
+    }
+
+    bool isDirect3DDevice9Ex = false; // Direct3DDevice9Ex or Direct3DDevice9 was used
+
+#ifdef HAVE_OPENCL_D3D11_NV
+    clCreateFromD3D11Texture2DNV_fn clCreateFromD3D11Texture2DNV = NULL;
+    clEnqueueAcquireD3D11ObjectsNV_fn clEnqueueAcquireD3D11ObjectsNV = NULL;
+    clEnqueueReleaseD3D11ObjectsNV_fn clEnqueueReleaseD3D11ObjectsNV = NULL;
 #endif
+    clCreateFromD3D11Texture2DKHR_fn clCreateFromD3D11Texture2DKHR = NULL;
+    clEnqueueAcquireD3D11ObjectsKHR_fn clEnqueueAcquireD3D11ObjectsKHR = NULL;
+    clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR = NULL;
+
+    clCreateFromD3D10Texture2DKHR_fn clCreateFromD3D10Texture2DKHR = NULL;
+    clEnqueueAcquireD3D10ObjectsKHR_fn clEnqueueAcquireD3D10ObjectsKHR = NULL;
+    clEnqueueReleaseD3D10ObjectsKHR_fn clEnqueueReleaseD3D10ObjectsKHR = NULL;
+
+    clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL;
+    clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR = NULL;
+    clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR = NULL;
+
+    cl_platform_id getPlatform()
+    {
+        if (!platform_)
+        {
+            CV_Assert(cv::ocl::haveOpenCL());
+
+            cl_device_id device = (cl_device_id)ocl::Device::getDefault().ptr();
+            CV_Assert(device);
+            cl_int status = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform_), &platform_, NULL);
+            if (status != CL_SUCCESS)
+                CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platform corresponding to device");
+        }
+
+        return platform_;
+    }
+
+
+    bool initializeD3D11()
+    {
+        using namespace cv::ocl;
+        cl_platform_id platform = getPlatform();
+
+        bool useCLNVEXT = false;
+        size_t exts_len;
+        cl_int status = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, 0, NULL, &exts_len);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get length of CL_PLATFORM_EXTENSIONS");
+        cv::AutoBuffer<char> extensions(exts_len);
+        status = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, exts_len, static_cast<void*>(extensions.data()), NULL);
+        if (status != CL_SUCCESS)
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available CL_PLATFORM_EXTENSIONS");
+        bool is_support_cl_khr_d3d11_sharing = false;
+        if (strstr(extensions.data(), "cl_khr_d3d11_sharing"))
+            is_support_cl_khr_d3d11_sharing = true;
+#ifdef HAVE_OPENCL_D3D11_NV
+        bool is_support_cl_nv_d3d11_sharing = false;
+        if (strstr(extensions.data(), "cl_nv_d3d11_sharing"))
+            is_support_cl_nv_d3d11_sharing = true;
+        if (!is_support_cl_nv_d3d11_sharing && !is_support_cl_khr_d3d11_sharing)
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: No supported extensions");
+#else
+        if (!is_support_cl_khr_d3d11_sharing)
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: No supported extensions");
+#endif
+
+#ifdef HAVE_OPENCL_D3D11_NV
+        if (is_support_cl_nv_d3d11_sharing)
+        {
+            if (initializedPlatform11 != platform)
+            {
+                clCreateFromD3D11Texture2DNV = (clCreateFromD3D11Texture2DNV_fn)
+                    clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromD3D11Texture2DNV");
+                clEnqueueAcquireD3D11ObjectsNV = (clEnqueueAcquireD3D11ObjectsNV_fn)
+                    clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireD3D11ObjectsNV");
+                clEnqueueReleaseD3D11ObjectsNV = (clEnqueueReleaseD3D11ObjectsNV_fn)
+                    clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseD3D11ObjectsNV");
+                initializedPlatform11 = platform;
+            }
+            if (clCreateFromD3D11Texture2DNV && clEnqueueAcquireD3D11ObjectsNV && clEnqueueReleaseD3D11ObjectsNV)
+            {
+                useCLNVEXT = true;
+            }
+        }
+        else
+#endif
+        {
+            if (is_support_cl_khr_d3d11_sharing)
+            {
+                if (initializedPlatform11 != platform)
+                {
+                    clCreateFromD3D11Texture2DKHR = (clCreateFromD3D11Texture2DKHR_fn)
+                        clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromD3D11Texture2DKHR");
+                    clEnqueueAcquireD3D11ObjectsKHR = (clEnqueueAcquireD3D11ObjectsKHR_fn)
+                        clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireD3D11ObjectsKHR");
+                    clEnqueueReleaseD3D11ObjectsKHR = (clEnqueueReleaseD3D11ObjectsKHR_fn)
+                        clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseD3D11ObjectsKHR");
+                    initializedPlatform11 = platform;
+                }
+                if (!clCreateFromD3D11Texture2DKHR || !clEnqueueAcquireD3D11ObjectsKHR || !clEnqueueReleaseD3D11ObjectsKHR)
+                {
+                    CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't find functions for D3D11");
+                }
+            }
+        }
+        return useCLNVEXT;
+    }
+
+    void initializeD3D9()
+    {
+        using namespace cv::ocl;
+        cl_platform_id platform = getPlatform();
+        if (initializedPlatform9 != platform)
+        {
+            clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromDX9MediaSurfaceKHR");
+            clEnqueueAcquireDX9MediaSurfacesKHR = (clEnqueueAcquireDX9MediaSurfacesKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireDX9MediaSurfacesKHR");
+            clEnqueueReleaseDX9MediaSurfacesKHR = (clEnqueueReleaseDX9MediaSurfacesKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseDX9MediaSurfacesKHR");
+            initializedPlatform9 = platform;
+        }
+        if (!clCreateFromDX9MediaSurfaceKHR || !clEnqueueAcquireDX9MediaSurfacesKHR || !clEnqueueReleaseDX9MediaSurfacesKHR)
+        {
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't find functions for D3D9");
+        }
+    }
+
+    void initializeD3D10()
+    {
+        using namespace cv::ocl;
+        cl_platform_id platform = getPlatform();
+        if (initializedPlatform10 != platform)
+        {
+            clCreateFromD3D10Texture2DKHR = (clCreateFromD3D10Texture2DKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromD3D10Texture2DKHR");
+            clEnqueueAcquireD3D10ObjectsKHR = (clEnqueueAcquireD3D10ObjectsKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireD3D10ObjectsKHR");
+            clEnqueueReleaseD3D10ObjectsKHR = (clEnqueueReleaseD3D10ObjectsKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseD3D10ObjectsKHR");
+            initializedPlatform10 = platform;
+        }
+        if (!clCreateFromD3D10Texture2DKHR || !clEnqueueAcquireD3D10ObjectsKHR || !clEnqueueReleaseD3D10ObjectsKHR)
+        {
+            CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't find functions for D3D10");
+        }
+    }
+};
+
+OpenCLDirectXImpl* createDirectXImpl()
+{
+    return new OpenCLDirectXImpl();
+}
+void deleteDirectXImpl(OpenCLDirectXImpl** p)
+{
+    if (*p)
+    {
+        delete (*p);
+        *p = NULL;
+    }
+}
+OpenCLDirectXImpl& getImpl()
+{
+    OpenCLDirectXImpl* i = getDirectXImpl(ocl::Context::getDefault());
+    CV_Assert(i);
+    return *i;
+}
+}
+using namespace internal;
+#endif
+
+namespace ocl {
 
 Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
 {
@@ -715,7 +896,7 @@ Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDev
         throw;
     }
     clExecCtx.bind();
-    g_isDirect3DDevice9Ex = true;
+    getImpl().isDirect3DDevice9Ex = true;
     return const_cast<Context&>(clExecCtx.getContext());
 #endif
 }
@@ -838,96 +1019,13 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9
         throw;
     }
     clExecCtx.bind();
-    g_isDirect3DDevice9Ex = false;
+    getImpl().isDirect3DDevice9Ex = false;
     return const_cast<Context&>(clExecCtx.getContext());
 #endif
 }
 
 } // namespace cv::ocl
 
-#if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
-
-#ifdef HAVE_OPENCL_D3D11_NV
-clCreateFromD3D11Texture2DNV_fn clCreateFromD3D11Texture2DNV = NULL;
-clEnqueueAcquireD3D11ObjectsNV_fn clEnqueueAcquireD3D11ObjectsNV = NULL;
-clEnqueueReleaseD3D11ObjectsNV_fn clEnqueueReleaseD3D11ObjectsNV = NULL;
-#endif
-clCreateFromD3D11Texture2DKHR_fn clCreateFromD3D11Texture2DKHR = NULL;
-clEnqueueAcquireD3D11ObjectsKHR_fn clEnqueueAcquireD3D11ObjectsKHR = NULL;
-clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR = NULL;
-
-static bool __OpenCLinitializeD3D11()
-{
-    using namespace cv::ocl;
-    static cl_platform_id initializedPlatform = NULL;
-    cl_platform_id platform = (cl_platform_id)Platform::getDefault().ptr();
-
-    bool useCLNVEXT = false;
-    size_t exts_len;
-    cl_int status = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, 0, NULL, &exts_len);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get length of CL_PLATFORM_EXTENSIONS");
-    cv::AutoBuffer<char> extensions(exts_len);
-    status = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, exts_len, static_cast<void*>(extensions.data()), NULL);
-    if (status != CL_SUCCESS)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available CL_PLATFORM_EXTENSIONS");
-    bool is_support_cl_khr_d3d11_sharing = false;
-    if (strstr(extensions.data(), "cl_khr_d3d11_sharing"))
-        is_support_cl_khr_d3d11_sharing = true;
-#ifdef HAVE_OPENCL_D3D11_NV
-    bool is_support_cl_nv_d3d11_sharing = false;
-    if (strstr(extensions.data(), "cl_nv_d3d11_sharing"))
-        is_support_cl_nv_d3d11_sharing = true;
-    if (!is_support_cl_nv_d3d11_sharing && !is_support_cl_khr_d3d11_sharing)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: No supported extensions");
-#else
-    if (!is_support_cl_khr_d3d11_sharing)
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: No supported extensions");
-#endif
-
-#ifdef HAVE_OPENCL_D3D11_NV
-    if (is_support_cl_nv_d3d11_sharing)
-    {
-        if (initializedPlatform != platform)
-        {
-            clCreateFromD3D11Texture2DNV = (clCreateFromD3D11Texture2DNV_fn)
-                    clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromD3D11Texture2DNV");
-            clEnqueueAcquireD3D11ObjectsNV = (clEnqueueAcquireD3D11ObjectsNV_fn)
-                    clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireD3D11ObjectsNV");
-            clEnqueueReleaseD3D11ObjectsNV = (clEnqueueReleaseD3D11ObjectsNV_fn)
-                    clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseD3D11ObjectsNV");
-            initializedPlatform = platform;
-        }
-        if (clCreateFromD3D11Texture2DNV && clEnqueueAcquireD3D11ObjectsNV && clEnqueueReleaseD3D11ObjectsNV)
-        {
-            useCLNVEXT = true;
-        }
-    }
-    else
-#endif
-    {
-        if (is_support_cl_khr_d3d11_sharing)
-        {
-            if (initializedPlatform != platform)
-            {
-                clCreateFromD3D11Texture2DKHR = (clCreateFromD3D11Texture2DKHR_fn)
-                        clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromD3D11Texture2DKHR");
-                clEnqueueAcquireD3D11ObjectsKHR = (clEnqueueAcquireD3D11ObjectsKHR_fn)
-                        clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireD3D11ObjectsKHR");
-                clEnqueueReleaseD3D11ObjectsKHR = (clEnqueueReleaseD3D11ObjectsKHR_fn)
-                        clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseD3D11ObjectsKHR");
-                initializedPlatform = platform;
-            }
-            if (!clCreateFromD3D11Texture2DKHR || !clEnqueueAcquireD3D11ObjectsKHR || !clEnqueueReleaseD3D11ObjectsKHR)
-            {
-                CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't find functions for D3D11");
-            }
-        }
-    }
-    return useCLNVEXT;
-}
-#endif // defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
-
 } // namespace directx
 
 
@@ -1009,20 +1107,21 @@ static void __convertToD3D11Texture2DKHR(InputArray src, ID3D11Texture2D* pD3D11
     using namespace cv::ocl;
     Context& ctx = Context::getDefault();
     cl_context context = (cl_context)ctx.ptr();
+    OpenCLDirectXImpl& impl = getImpl();
 
     cl_int status = 0;
     cl_mem clImage = 0;
 #ifdef HAVE_DIRECTX_NV12
     cl_mem clImageUV = 0;
 #endif
-    clImage = clCreateFromD3D11Texture2DKHR(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 0, &status);
+    clImage = impl.clCreateFromD3D11Texture2DKHR(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DKHR failed");
 
 #ifdef HAVE_DIRECTX_NV12
     if(DXGI_FORMAT_NV12 == desc.Format)
     {
-        clImageUV = clCreateFromD3D11Texture2DKHR(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 1, &status);
+        clImageUV = impl.clCreateFromD3D11Texture2DKHR(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 1, &status);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DKHR failed");
     }
@@ -1030,21 +1129,21 @@ static void __convertToD3D11Texture2DKHR(InputArray src, ID3D11Texture2D* pD3D11
 
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
 
-    status = clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsKHR failed");
 
 #ifdef HAVE_DIRECTX_NV12
     if(DXGI_FORMAT_NV12 == desc.Format)
     {
-        status = clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsKHR failed");
 
         if(!ocl::ocl_convert_bgr_to_nv12(clBuffer, (int)u.step[0], u.cols, u.rows, clImage, clImageUV))
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_bgr_to_nv12 failed");
 
-        status = clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsKHR failed");
     }
@@ -1060,7 +1159,7 @@ static void __convertToD3D11Texture2DKHR(InputArray src, ID3D11Texture2D* pD3D11
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyBufferToImage failed");
     }
 
-    status = clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsKHR failed");
 
@@ -1107,40 +1206,41 @@ static void __convertToD3D11Texture2DNV(InputArray src, ID3D11Texture2D* pD3D11T
     using namespace cv::ocl;
     Context& ctx = Context::getDefault();
     cl_context context = (cl_context)ctx.ptr();
+    OpenCLDirectXImpl& impl = getImpl();
 
     cl_int status = 0;
     cl_mem clImage = 0;
 #ifdef HAVE_DIRECTX_NV12
     cl_mem clImageUV = 0;
 #endif
-    clImage = clCreateFromD3D11Texture2DNV(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 0, &status);
+    clImage = impl.clCreateFromD3D11Texture2DNV(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DNV failed");
 
 #ifdef HAVE_DIRECTX_NV12
     if (DXGI_FORMAT_NV12 == desc.Format)
     {
-        clImageUV = clCreateFromD3D11Texture2DNV(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 1, &status);
+        clImageUV = impl.clCreateFromD3D11Texture2DNV(context, CL_MEM_WRITE_ONLY, pD3D11Texture2D, 1, &status);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DNV failed");
     }
 #endif
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-    status = clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsNV failed");
 
 #ifdef HAVE_DIRECTX_NV12
     if(DXGI_FORMAT_NV12 == desc.Format)
     {
-        status = clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsNV failed");
 
         if(!ocl::ocl_convert_bgr_to_nv12(clBuffer, (int)u.step[0], u.cols, u.rows, clImage, clImageUV))
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_bgr_to_nv12 failed");
 
-        status = clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsNV failed");
     }
@@ -1156,7 +1256,7 @@ static void __convertToD3D11Texture2DNV(InputArray src, ID3D11Texture2D* pD3D11T
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyBufferToImage failed");
     }
 
-    status = clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsNV failed");
 
@@ -1201,11 +1301,12 @@ static void __convertFromD3D11Texture2DKHR(ID3D11Texture2D* pD3D11Texture2D, Out
     using namespace cv::ocl;
     Context& ctx = Context::getDefault();
     cl_context context = (cl_context)ctx.ptr();
+    OpenCLDirectXImpl& impl = getImpl();
 
     cl_int status = 0;
     cl_mem clImage = 0;
 
-    clImage = clCreateFromD3D11Texture2DKHR(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 0, &status);
+    clImage = impl.clCreateFromD3D11Texture2DKHR(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DKHR failed");
 
@@ -1213,7 +1314,7 @@ static void __convertFromD3D11Texture2DKHR(ID3D11Texture2D* pD3D11Texture2D, Out
     cl_mem clImageUV = 0;
     if(DXGI_FORMAT_NV12 == desc.Format)
     {
-        clImageUV = clCreateFromD3D11Texture2DKHR(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 1, &status);
+        clImageUV = impl.clCreateFromD3D11Texture2DKHR(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 1, &status);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DKHR failed");
     }
@@ -1221,21 +1322,21 @@ static void __convertFromD3D11Texture2DKHR(ID3D11Texture2D* pD3D11Texture2D, Out
 
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
 
-    status = clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsKHR failed");
 
 #ifdef HAVE_DIRECTX_NV12
     if(DXGI_FORMAT_NV12 == desc.Format)
     {
-        status = clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueAcquireD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsKHR failed");
 
         if(!ocl::ocl_convert_nv12_to_bgr(clImage, clImageUV, clBuffer, (int)u.step[0], u.cols, u.rows))
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_nv12_to_bgr failed");
 
-        status = clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsKHR failed");
     }
@@ -1251,7 +1352,7 @@ static void __convertFromD3D11Texture2DKHR(ID3D11Texture2D* pD3D11Texture2D, Out
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyImageToBuffer failed");
     }
 
-    status = clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseD3D11ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsKHR failed");
 
@@ -1296,11 +1397,12 @@ static void __convertFromD3D11Texture2DNV(ID3D11Texture2D* pD3D11Texture2D, Outp
     using namespace cv::ocl;
     Context& ctx = Context::getDefault();
     cl_context context = (cl_context)ctx.ptr();
+    OpenCLDirectXImpl& impl = getImpl();
 
     cl_int status = 0;
     cl_mem clImage = 0;
 
-    clImage = clCreateFromD3D11Texture2DNV(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 0, &status);
+    clImage = impl.clCreateFromD3D11Texture2DNV(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DNV failed");
 
@@ -1308,28 +1410,28 @@ static void __convertFromD3D11Texture2DNV(ID3D11Texture2D* pD3D11Texture2D, Outp
     cl_mem clImageUV = 0;
     if(DXGI_FORMAT_NV12 == desc.Format)
     {
-        clImageUV = clCreateFromD3D11Texture2DNV(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 1, &status);
+        clImageUV = impl.clCreateFromD3D11Texture2DNV(context, CL_MEM_READ_ONLY, pD3D11Texture2D, 1, &status);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D11Texture2DNV failed");
     }
 #endif
 
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-    status = clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsNV failed");
 
 #ifdef HAVE_DIRECTX_NV12
     if (DXGI_FORMAT::DXGI_FORMAT_NV12 == desc.Format)
     {
-        status = clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueAcquireD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D11ObjectsNV failed");
 
         if (!ocl::ocl_convert_nv12_to_bgr(clImage, clImageUV, clBuffer, (int)u.step[0], u.cols, u.rows))
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_nv12_to_bgr failed");
 
-        status = clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
+        status = impl.clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImageUV, 0, NULL, NULL);
         if (status != CL_SUCCESS)
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsNV failed");
     }
@@ -1345,7 +1447,7 @@ static void __convertFromD3D11Texture2DNV(ID3D11Texture2D* pD3D11Texture2D, Outp
             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyImageToBuffer failed");
     }
 
-    status = clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseD3D11ObjectsNV(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D11ObjectsNV failed");
 
@@ -1377,7 +1479,7 @@ void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D)
     NO_OPENCL_SUPPORT_ERROR;
 #else
 
-    bool useCLNVEXT = __OpenCLinitializeD3D11();
+    bool useCLNVEXT = getImpl().initializeD3D11();
     if(!useCLNVEXT){
         __convertToD3D11Texture2DKHR(src,pD3D11Texture2D);
     }
@@ -1399,7 +1501,7 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst
     NO_OPENCL_SUPPORT_ERROR;
 #else
 
-    bool useCLNVEXT = __OpenCLinitializeD3D11();
+    bool useCLNVEXT = getImpl().initializeD3D11();
     if(!useCLNVEXT){
         __convertFromD3D11Texture2DKHR(pD3D11Texture2D,dst);
     }
@@ -1412,40 +1514,14 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst
 #endif
 }
 
-#if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
-clCreateFromD3D10Texture2DKHR_fn clCreateFromD3D10Texture2DKHR = NULL;
-clEnqueueAcquireD3D10ObjectsKHR_fn clEnqueueAcquireD3D10ObjectsKHR = NULL;
-clEnqueueReleaseD3D10ObjectsKHR_fn clEnqueueReleaseD3D10ObjectsKHR = NULL;
-
-static void __OpenCLinitializeD3D10()
-{
-    using namespace cv::ocl;
-    static cl_platform_id initializedPlatform = NULL;
-    cl_platform_id platform = (cl_platform_id)Platform::getDefault().ptr();
-    if (initializedPlatform != platform)
-    {
-        clCreateFromD3D10Texture2DKHR = (clCreateFromD3D10Texture2DKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromD3D10Texture2DKHR");
-        clEnqueueAcquireD3D10ObjectsKHR = (clEnqueueAcquireD3D10ObjectsKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireD3D10ObjectsKHR");
-        clEnqueueReleaseD3D10ObjectsKHR = (clEnqueueReleaseD3D10ObjectsKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseD3D10ObjectsKHR");
-        initializedPlatform = platform;
-    }
-    if (!clCreateFromD3D10Texture2DKHR || !clEnqueueAcquireD3D10ObjectsKHR || !clEnqueueReleaseD3D10ObjectsKHR)
-    {
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't find functions for D3D10");
-    }
-}
-#endif // defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
-
 void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D)
 {
     CV_UNUSED(src); CV_UNUSED(pD3D10Texture2D);
 #if !defined(HAVE_DIRECTX)
     NO_DIRECTX_SUPPORT_ERROR;
 #elif defined(HAVE_OPENCL)
-    __OpenCLinitializeD3D10();
+    OpenCLDirectXImpl& impl = getImpl();
+    impl.initializeD3D10();
 
     D3D10_TEXTURE2D_DESC desc = { 0 };
     pD3D10Texture2D->GetDesc(&desc);
@@ -1468,14 +1544,14 @@ void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D)
     CV_Assert(u.isContinuous());
 
     cl_int status = 0;
-    cl_mem clImage = clCreateFromD3D10Texture2DKHR(context, CL_MEM_WRITE_ONLY, pD3D10Texture2D, 0, &status);
+    cl_mem clImage = impl.clCreateFromD3D10Texture2DKHR(context, CL_MEM_WRITE_ONLY, pD3D10Texture2D, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D10Texture2DKHR failed");
 
     cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ);
 
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-    status = clEnqueueAcquireD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D10ObjectsKHR failed");
     size_t offset = 0; // TODO
@@ -1484,7 +1560,7 @@ void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D)
     status = clEnqueueCopyBufferToImage(q, clBuffer, clImage, offset, dst_origin, region, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyBufferToImage failed");
-    status = clEnqueueReleaseD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D10ObjectsKHR failed");
 
@@ -1506,7 +1582,8 @@ void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst
 #if !defined(HAVE_DIRECTX)
     NO_DIRECTX_SUPPORT_ERROR;
 #elif defined(HAVE_OPENCL)
-    __OpenCLinitializeD3D10();
+    OpenCLDirectXImpl& impl = getImpl();
+    impl.initializeD3D10();
 
     D3D10_TEXTURE2D_DESC desc = { 0 };
     pD3D10Texture2D->GetDesc(&desc);
@@ -1527,14 +1604,14 @@ void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst
     CV_Assert(u.isContinuous());
 
     cl_int status = 0;
-    cl_mem clImage = clCreateFromD3D10Texture2DKHR(context, CL_MEM_READ_ONLY, pD3D10Texture2D, 0, &status);
+    cl_mem clImage = impl.clCreateFromD3D10Texture2DKHR(context, CL_MEM_READ_ONLY, pD3D10Texture2D, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromD3D10Texture2DKHR failed");
 
     cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ);
 
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-    status = clEnqueueAcquireD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireD3D10ObjectsKHR failed");
     size_t offset = 0; // TODO
@@ -1543,7 +1620,7 @@ void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst
     status = clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyImageToBuffer failed");
-    status = clEnqueueReleaseD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseD3D10ObjectsKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseD3D10ObjectsKHR failed");
 
@@ -1560,32 +1637,6 @@ void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst
 #endif
 }
 
-#if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
-clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL;
-clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR = NULL;
-clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR = NULL;
-
-static void __OpenCLinitializeD3D9()
-{
-    using namespace cv::ocl;
-    static cl_platform_id initializedPlatform = NULL;
-    cl_platform_id platform = (cl_platform_id)Platform::getDefault().ptr();
-    if (initializedPlatform != platform)
-    {
-        clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromDX9MediaSurfaceKHR");
-        clEnqueueAcquireDX9MediaSurfacesKHR = (clEnqueueAcquireDX9MediaSurfacesKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireDX9MediaSurfacesKHR");
-        clEnqueueReleaseDX9MediaSurfacesKHR = (clEnqueueReleaseDX9MediaSurfacesKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseDX9MediaSurfacesKHR");
-        initializedPlatform = platform;
-    }
-    if (!clCreateFromDX9MediaSurfaceKHR || !clEnqueueAcquireDX9MediaSurfacesKHR || !clEnqueueReleaseDX9MediaSurfacesKHR)
-    {
-        CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't find functions for D3D9");
-    }
-}
-#endif // defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
 
 void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle)
 {
@@ -1593,7 +1644,8 @@ void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurfa
 #if !defined(HAVE_DIRECTX)
     NO_DIRECTX_SUPPORT_ERROR;
 #elif defined(HAVE_OPENCL)
-    __OpenCLinitializeD3D9();
+    OpenCLDirectXImpl& impl = getImpl();
+    impl.initializeD3D9();
 
     D3DSURFACE_DESC desc;
     if (FAILED(pDirect3DSurface9->GetDesc(&desc)))
@@ -1620,8 +1672,8 @@ void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurfa
 
     cl_int status = 0;
     cl_dx9_surface_info_khr surfaceInfo = {pDirect3DSurface9, (HANDLE)surfaceSharedHandle};
-    cl_mem clImage = clCreateFromDX9MediaSurfaceKHR(context, CL_MEM_WRITE_ONLY,
-            ocl::g_isDirect3DDevice9Ex ? CL_ADAPTER_D3D9EX_KHR : CL_ADAPTER_D3D9_KHR,
+    cl_mem clImage = impl.clCreateFromDX9MediaSurfaceKHR(context, CL_MEM_WRITE_ONLY,
+        impl.isDirect3DDevice9Ex ? CL_ADAPTER_D3D9EX_KHR : CL_ADAPTER_D3D9_KHR,
             &surfaceInfo, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromDX9MediaSurfaceKHR failed");
@@ -1629,7 +1681,7 @@ void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurfa
     cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ);
 
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-    status = clEnqueueAcquireDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireDX9MediaSurfacesKHR failed");
     size_t offset = 0; // TODO
@@ -1638,7 +1690,7 @@ void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurfa
     status = clEnqueueCopyBufferToImage(q, clBuffer, clImage, offset, dst_origin, region, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyBufferToImage failed");
-    status = clEnqueueReleaseDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseDX9MediaSurfacesKHR failed");
 
@@ -1661,7 +1713,8 @@ void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArr
 #if !defined(HAVE_DIRECTX)
     NO_DIRECTX_SUPPORT_ERROR;
 #elif defined(HAVE_OPENCL)
-    __OpenCLinitializeD3D9();
+    OpenCLDirectXImpl& impl = getImpl();
+    impl.initializeD3D9();
 
     D3DSURFACE_DESC desc;
     if (FAILED(pDirect3DSurface9->GetDesc(&desc)))
@@ -1686,8 +1739,8 @@ void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArr
 
     cl_int status = 0;
     cl_dx9_surface_info_khr surfaceInfo = {pDirect3DSurface9, (HANDLE)surfaceSharedHandle};
-    cl_mem clImage = clCreateFromDX9MediaSurfaceKHR(context, CL_MEM_READ_ONLY,
-            ocl::g_isDirect3DDevice9Ex ? CL_ADAPTER_D3D9EX_KHR : CL_ADAPTER_D3D9_KHR,
+    cl_mem clImage = impl.clCreateFromDX9MediaSurfaceKHR(context, CL_MEM_READ_ONLY,
+            impl.isDirect3DDevice9Ex ? CL_ADAPTER_D3D9EX_KHR : CL_ADAPTER_D3D9_KHR,
             &surfaceInfo, 0, &status);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromDX9MediaSurfaceKHR failed");
@@ -1695,7 +1748,7 @@ void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArr
     cl_mem clBuffer = (cl_mem)u.handle(ACCESS_WRITE);
 
     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
-    status = clEnqueueAcquireDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueAcquireDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireDX9MediaSurfacesKHR failed");
     size_t offset = 0; // TODO
@@ -1704,7 +1757,7 @@ void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArr
     status = clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueCopyImageToBuffer failed");
-    status = clEnqueueReleaseDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
+    status = impl.clEnqueueReleaseDX9MediaSurfacesKHR(q, 1, &clImage, 0, NULL, NULL);
     if (status != CL_SUCCESS)
         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseDX9MediaSurfacesKHR failed");
 
diff --git a/modules/core/src/directx.hpp b/modules/core/src/directx.hpp
new file mode 100644
index 0000000000..9f23352d4d
--- /dev/null
+++ b/modules/core/src/directx.hpp
@@ -0,0 +1,23 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_SRC_DIRECTX_HPP
+#define OPENCV_CORE_SRC_DIRECTX_HPP
+
+#ifndef HAVE_DIRECTX
+#error Invalid build configuration
+#endif
+
+namespace cv {
+namespace directx {
+namespace internal {
+
+struct OpenCLDirectXImpl;
+OpenCLDirectXImpl* createDirectXImpl();
+void deleteDirectXImpl(OpenCLDirectXImpl**);
+OpenCLDirectXImpl* getDirectXImpl(ocl::Context& ctx);
+
+}}} // namespace internal
+
+#endif  // OPENCV_CORE_SRC_DIRECTX_HPP
diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp
index bfa61d0502..fcdb2a202f 100644
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@@ -122,6 +122,33 @@ static const double DFTTab[][2] =
 { 1.00000000000000000, 0.00000000292583616 }
 };
 
+namespace {
+template <typename T>
+struct Constants {
+    static const T sin_120;
+    static const T fft5_2;
+    static const T fft5_3;
+    static const T fft5_4;
+    static const T fft5_5;
+};
+
+template <typename T>
+const T Constants<T>::sin_120 = (T)0.86602540378443864676372317075294;
+
+template <typename T>
+const T Constants<T>::fft5_2 = (T)0.559016994374947424102293417182819;
+
+template <typename T>
+const T Constants<T>::fft5_3 = (T)-0.951056516295153572116439333379382;
+
+template <typename T>
+const T Constants<T>::fft5_4 = (T)-1.538841768587626701285145288018455;
+
+template <typename T>
+const T Constants<T>::fft5_5 = (T)0.363271264002680442947733378740309;
+
+}  //namespace
+
 #define BitRev(i,shift) \
    ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
            ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
@@ -372,6 +399,149 @@ DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wa
     }
 }
 
+// Reference radix-2 implementation.
+template<typename T> struct DFT_R2
+{
+    void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
+        const int nx = n/2;
+        for(int i = 0 ; i < c_n; i += n)
+        {
+            Complex<T>* v = dst + i;
+            T r0 = v[0].re + v[nx].re;
+            T i0 = v[0].im + v[nx].im;
+            T r1 = v[0].re - v[nx].re;
+            T i1 = v[0].im - v[nx].im;
+            v[0].re = r0; v[0].im = i0;
+            v[nx].re = r1; v[nx].im = i1;
+
+            for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
+            {
+                v = dst + i + j;
+                r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
+                i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
+                r0 = v[0].re; i0 = v[0].im;
+
+                v[0].re = r0 + r1; v[0].im = i0 + i1;
+                v[nx].re = r0 - r1; v[nx].im = i0 - i1;
+            }
+        }
+    }
+};
+
+// Reference radix-3 implementation.
+template<typename T> struct DFT_R3
+{
+    void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
+        const int nx = n / 3;
+        for(int i = 0; i < c_n; i += n )
+        {
+            {
+                Complex<T>* v = dst + i;
+                T r1 = v[nx].re + v[nx*2].re;
+                T i1 = v[nx].im + v[nx*2].im;
+                T r0 = v[0].re;
+                T i0 = v[0].im;
+                T r2 = Constants<T>::sin_120*(v[nx].im - v[nx*2].im);
+                T i2 = Constants<T>::sin_120*(v[nx*2].re - v[nx].re);
+                v[0].re = r0 + r1; v[0].im = i0 + i1;
+                r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
+                v[nx].re = r0 + r2; v[nx].im = i0 + i2;
+                v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
+            }
+
+            for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
+            {
+                Complex<T>* v = dst + i + j;
+                T r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
+                T i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
+                T i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
+                T r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
+                T r1 = r0 + i2; T i1 = i0 + r2;
+
+                r2 = Constants<T>::sin_120*(i0 - r2); i2 = Constants<T>::sin_120*(i2 - r0);
+                r0 = v[0].re; i0 = v[0].im;
+                v[0].re = r0 + r1; v[0].im = i0 + i1;
+                r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
+                v[nx].re = r0 + r2; v[nx].im = i0 + i2;
+                v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
+            }
+        }
+    }
+};
+
+// Reference radix-5 implementation.
+template<typename T> struct DFT_R5
+{
+    void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
+        const int nx = n / 5;
+        for(int i = 0; i < c_n; i += n )
+        {
+            for(int j = 0, dw = 0; j < nx; j++, dw += dw0 )
+            {
+                Complex<T>* v0 = dst + i + j;
+                Complex<T>* v1 = v0 + nx*2;
+                Complex<T>* v2 = v1 + nx*2;
+
+                T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
+
+                r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
+                i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
+                r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
+                i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
+
+                r1 = r3 + r2; i1 = i3 + i2;
+                r3 -= r2; i3 -= i2;
+
+                r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
+                i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
+                r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
+                i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
+
+                r2 = r4 + r0; i2 = i4 + i0;
+                r4 -= r0; i4 -= i0;
+
+                r0 = v0[0].re; i0 = v0[0].im;
+                r5 = r1 + r2; i5 = i1 + i2;
+
+                v0[0].re = r0 + r5; v0[0].im = i0 + i5;
+
+                r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
+                r1 = Constants<T>::fft5_2*(r1 - r2); i1 = Constants<T>::fft5_2*(i1 - i2);
+                r2 = -Constants<T>::fft5_3*(i3 + i4); i2 = Constants<T>::fft5_3*(r3 + r4);
+
+                i3 *= -Constants<T>::fft5_5; r3 *= Constants<T>::fft5_5;
+                i4 *= -Constants<T>::fft5_4; r4 *= Constants<T>::fft5_4;
+
+                r5 = r2 + i3; i5 = i2 + r3;
+                r2 -= i4; i2 -= r4;
+
+                r3 = r0 + r1; i3 = i0 + i1;
+                r0 -= r1; i0 -= i1;
+
+                v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
+                v2[0].re = r3 - r2; v2[0].im = i3 - i2;
+
+                v1[0].re = r0 + r5; v1[0].im = i0 + i5;
+                v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
+            }
+        }
+    }
+};
+
+template<typename T> struct DFT_VecR2
+{
+    void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
+        DFT_R2<T>()(dst, c_n, n, dw0, wave);
+    }
+};
+
+template<typename T> struct DFT_VecR3
+{
+    void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
+        DFT_R3<T>()(dst, c_n, n, dw0, wave);
+    }
+};
+
 template<typename T> struct DFT_VecR4
 {
     int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
@@ -379,6 +549,98 @@ template<typename T> struct DFT_VecR4
 
 #if CV_SSE3
 
+// multiplies *a and *b:
+//  r_re + i*r_im = (a_re + i*a_im)*(b_re + i*b_im)
+// r_re and r_im are placed respectively in bits 31:0 and 63:32 of the resulting
+// vector register.
+inline __m128 complexMul(const Complex<float>* const a, const Complex<float>* const b) {
+    const __m128 z = _mm_setzero_ps();
+    const __m128 neg_elem0 = _mm_set_ps(0.0f,0.0f,0.0f,-0.0f);
+    // v_a[31:0] is a->re and v_a[63:32] is a->im.
+    const __m128 v_a = _mm_loadl_pi(z, (const __m64*)a);
+    const __m128 v_b = _mm_loadl_pi(z, (const __m64*)b);
+    // x_1 = v[nx] * wave[dw].
+    const __m128 v_a_riri = _mm_shuffle_ps(v_a, v_a, _MM_SHUFFLE(0, 1, 0, 1));
+    const __m128 v_b_irri = _mm_shuffle_ps(v_b, v_b, _MM_SHUFFLE(1, 0, 0, 1));
+    const __m128 mul = _mm_mul_ps(v_a_riri, v_b_irri);
+    const __m128 xored = _mm_xor_ps(mul, neg_elem0);
+    return _mm_hadd_ps(xored, z);
+}
+
+// optimized radix-2 transform
+template<> struct DFT_VecR2<float> {
+    void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
+        const __m128 z = _mm_setzero_ps();
+        const int nx = n/2;
+        for(int i = 0 ; i < c_n; i += n)
+        {
+            {
+                Complex<float>* v = dst + i;
+                float r0 = v[0].re + v[nx].re;
+                float i0 = v[0].im + v[nx].im;
+                float r1 = v[0].re - v[nx].re;
+                float i1 = v[0].im - v[nx].im;
+                v[0].re = r0; v[0].im = i0;
+                v[nx].re = r1; v[nx].im = i1;
+            }
+
+            for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
+            {
+                Complex<float>* v = dst + i + j;
+                const __m128 x_1 = complexMul(&v[nx], &wave[dw]);
+                const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]);
+                _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1));
+                _mm_storel_pi((__m64*)&v[nx], _mm_sub_ps(v_0, x_1));
+            }
+        }
+    }
+};
+
+// Optimized radix-3 implementation.
+template<> struct DFT_VecR3<float> {
+    void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
+        const int nx = n / 3;
+        const __m128 z = _mm_setzero_ps();
+        const __m128 neg_elem1 = _mm_set_ps(0.0f,0.0f,-0.0f,0.0f);
+        const __m128 sin_120 = _mm_set1_ps(Constants<float>::sin_120);
+        const __m128 one_half = _mm_set1_ps(0.5f);
+        for(int i = 0; i < c_n; i += n )
+        {
+            {
+                Complex<float>* v = dst + i;
+
+                float r1 = v[nx].re + v[nx*2].re;
+                float i1 = v[nx].im + v[nx*2].im;
+                float r0 = v[0].re;
+                float i0 = v[0].im;
+                float r2 = Constants<float>::sin_120*(v[nx].im - v[nx*2].im);
+                float i2 = Constants<float>::sin_120*(v[nx*2].re - v[nx].re);
+                v[0].re = r0 + r1; v[0].im = i0 + i1;
+                r0 -= (float)0.5*r1; i0 -= (float)0.5*i1;
+                v[nx].re = r0 + r2; v[nx].im = i0 + i2;
+                v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
+            }
+
+            for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
+            {
+                Complex<float>* v = dst + i + j;
+                const __m128 x_0 = complexMul(&v[nx], &wave[dw]);
+                const __m128 x_2 = complexMul(&v[nx*2], &wave[dw*2]);
+                const __m128 x_1 = _mm_add_ps(x_0, x_2);
+
+                const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]);
+                _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1));
+
+                const __m128 x_3 = _mm_mul_ps(sin_120, _mm_xor_ps(_mm_sub_ps(x_2, x_0), neg_elem1));
+                const __m128 x_3s = _mm_shuffle_ps(x_3, x_3, _MM_SHUFFLE(0, 1, 0, 1));
+                const __m128 x_4 = _mm_sub_ps(v_0, _mm_mul_ps(one_half, x_1));
+                _mm_storel_pi((__m64*)&v[nx], _mm_add_ps(x_4, x_3s));
+                _mm_storel_pi((__m64*)&v[nx*2], _mm_sub_ps(x_4, x_3s));
+            }
+        }
+    }
+};
+
 // optimized radix-4 transform
 template<> struct DFT_VecR4<float>
 {
@@ -573,12 +835,6 @@ struct OcvDftOptions {
 template<typename T> static void
 DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
 {
-    static const T sin_120 = (T)0.86602540378443864676372317075294;
-    static const T fft5_2 = (T)0.559016994374947424102293417182819;
-    static const T fft5_3 = (T)-0.951056516295153572116439333379382;
-    static const T fft5_4 = (T)-1.538841768587626701285145288018455;
-    static const T fft5_5 = (T)0.363271264002680442947733378740309;
-
     const Complex<T>* wave = (Complex<T>*)c.wave;
     const int * itab = c.itab;
 
@@ -775,30 +1031,18 @@ DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
         for( ; n < c.factors[0]; )
         {
             // do the remaining radix-2 transform
-            nx = n;
             n *= 2;
             dw0 /= 2;
 
-            for( i = 0; i < c.n; i += n )
+            if(c.haveSSE3)
             {
-                Complex<T>* v = dst + i;
-                T r0 = v[0].re + v[nx].re;
-                T i0 = v[0].im + v[nx].im;
-                T r1 = v[0].re - v[nx].re;
-                T i1 = v[0].im - v[nx].im;
-                v[0].re = r0; v[0].im = i0;
-                v[nx].re = r1; v[nx].im = i1;
-
-                for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
-                {
-                    v = dst + i + j;
-                    r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
-                    i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
-                    r0 = v[0].re; i0 = v[0].im;
-
-                    v[0].re = r0 + r1; v[0].im = i0 + i1;
-                    v[nx].re = r0 - r1; v[nx].im = i0 - i1;
-                }
+                DFT_VecR2<T> vr2;
+                vr2(dst, c.n, n, dw0, wave);
+            }
+            else
+            {
+                DFT_R2<T> vr2;
+                vr2(dst, c.n, n, dw0, wave);
             }
         }
     }
@@ -813,94 +1057,21 @@ DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
 
         if( factor == 3 )
         {
-            // radix-3
-            for( i = 0; i < c.n; i += n )
+            if(c.haveSSE3)
             {
-                Complex<T>* v = dst + i;
-
-                T r1 = v[nx].re + v[nx*2].re;
-                T i1 = v[nx].im + v[nx*2].im;
-                T r0 = v[0].re;
-                T i0 = v[0].im;
-                T r2 = sin_120*(v[nx].im - v[nx*2].im);
-                T i2 = sin_120*(v[nx*2].re - v[nx].re);
-                v[0].re = r0 + r1; v[0].im = i0 + i1;
-                r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
-                v[nx].re = r0 + r2; v[nx].im = i0 + i2;
-                v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
-
-                for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
-                {
-                    v = dst + i + j;
-                    r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
-                    i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
-                    i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
-                    r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
-                    r1 = r0 + i2; i1 = i0 + r2;
-
-                    r2 = sin_120*(i0 - r2); i2 = sin_120*(i2 - r0);
-                    r0 = v[0].re; i0 = v[0].im;
-                    v[0].re = r0 + r1; v[0].im = i0 + i1;
-                    r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
-                    v[nx].re = r0 + r2; v[nx].im = i0 + i2;
-                    v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
-                }
+                DFT_VecR3<T> vr3;
+                vr3(dst, c.n, n, dw0, wave);
+            }
+            else
+            {
+                DFT_R3<T> vr3;
+                vr3(dst, c.n, n, dw0, wave);
             }
         }
         else if( factor == 5 )
         {
-            // radix-5
-            for( i = 0; i < c.n; i += n )
-            {
-                for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
-                {
-                    Complex<T>* v0 = dst + i + j;
-                    Complex<T>* v1 = v0 + nx*2;
-                    Complex<T>* v2 = v1 + nx*2;
-
-                    T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
-
-                    r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
-                    i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
-                    r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
-                    i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
-
-                    r1 = r3 + r2; i1 = i3 + i2;
-                    r3 -= r2; i3 -= i2;
-
-                    r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
-                    i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
-                    r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
-                    i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
-
-                    r2 = r4 + r0; i2 = i4 + i0;
-                    r4 -= r0; i4 -= i0;
-
-                    r0 = v0[0].re; i0 = v0[0].im;
-                    r5 = r1 + r2; i5 = i1 + i2;
-
-                    v0[0].re = r0 + r5; v0[0].im = i0 + i5;
-
-                    r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
-                    r1 = fft5_2*(r1 - r2); i1 = fft5_2*(i1 - i2);
-                    r2 = -fft5_3*(i3 + i4); i2 = fft5_3*(r3 + r4);
-
-                    i3 *= -fft5_5; r3 *= fft5_5;
-                    i4 *= -fft5_4; r4 *= fft5_4;
-
-                    r5 = r2 + i3; i5 = i2 + r3;
-                    r2 -= i4; i2 -= r4;
-
-                    r3 = r0 + r1; i3 = i0 + i1;
-                    r0 -= r1; i0 -= i1;
-
-                    v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
-                    v2[0].re = r3 - r2; v2[0].im = i3 - i2;
-
-                    v1[0].re = r0 + r5; v1[0].im = i0 + i5;
-                    v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
-                }
-            }
+            DFT_R5<T> vr5;
+            vr5(dst, c.n, n, dw0, wave);
         }
         else
         {
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index 59544f8458..122b383379 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -204,6 +204,21 @@ MatAllocator* Mat::getStdAllocator()
 
 //==================================================================================================
 
+bool MatSize::operator==(const MatSize& sz) const
+{
+    int d = dims();
+    int dsz = sz.dims();
+    if( d != dsz )
+        return false;
+    if( d == 2 )
+        return p[0] == sz.p[0] && p[1] == sz.p[1];
+
+    for( int i = 0; i < d; i++ )
+        if( p[i] != sz.p[i] )
+            return false;
+    return true;
+}
+
 void setSize( Mat& m, int _dims, const int* _sz, const size_t* _steps, bool autoSteps)
 {
     CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
@@ -320,7 +335,330 @@ void finalizeHdr(Mat& m)
         m.dataend = m.datalimit = 0;
 }
 
-//==================================================================================================
+//======================================= Mat ======================================================
+
+Mat::Mat()
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{}
+
+Mat::Mat(int _rows, int _cols, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_rows, _cols, _type);
+}
+
+Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_rows, _cols, _type);
+    *this = _s;
+}
+
+Mat::Mat(Size _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create( _sz.height, _sz.width, _type );
+}
+
+Mat::Mat(Size _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz.height, _sz.width, _type);
+    *this = _s;
+}
+
+Mat::Mat(int _dims, const int* _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_dims, _sz, _type);
+}
+
+Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_dims, _sz, _type);
+    *this = _s;
+}
+
+Mat::Mat(const std::vector<int>& _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz, _type);
+}
+
+Mat::Mat(const std::vector<int>& _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz, _type);
+    *this = _s;
+}
+
+Mat::Mat(const Mat& m)
+    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
+      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
+      u(m.u), size(&rows), step(0)
+{
+    if( u )
+        CV_XADD(&u->refcount, 1);
+    if( m.dims <= 2 )
+    {
+        step[0] = m.step[0]; step[1] = m.step[1];
+    }
+    else
+    {
+        dims = 0;
+        copySize(m);
+    }
+}
+
+Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
+    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols),
+      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+      allocator(0), u(0), size(&rows)
+{
+    CV_Assert(total() == 0 || data != NULL);
+
+    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
+    size_t minstep = cols * esz;
+    if( _step == AUTO_STEP )
+    {
+        _step = minstep;
+    }
+    else
+    {
+        CV_Assert( _step >= minstep );
+        if (_step % esz1 != 0)
+        {
+            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
+        }
+    }
+    step[0] = _step;
+    step[1] = esz;
+    datalimit = datastart + _step * rows;
+    dataend = datalimit - _step + minstep;
+    updateContinuityFlag();
+}
+
+Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
+    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width),
+      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+      allocator(0), u(0), size(&rows)
+{
+    CV_Assert(total() == 0 || data != NULL);
+
+    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
+    size_t minstep = cols*esz;
+    if( _step == AUTO_STEP )
+    {
+        _step = minstep;
+    }
+    else
+    {
+        CV_Assert(_step >= minstep);
+
+        if (_step % esz1 != 0)
+        {
+            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
+        }
+    }
+    step[0] = _step;
+    step[1] = esz;
+    datalimit = datastart + _step*rows;
+    dataend = datalimit - _step + minstep;
+    updateContinuityFlag();
+}
+
+
+Mat::~Mat()
+{
+    release();
+    if( step.p != step.buf )
+        fastFree(step.p);
+}
+
+Mat& Mat::operator=(const Mat& m)
+{
+    if( this != &m )
+    {
+        if( m.u )
+            CV_XADD(&m.u->refcount, 1);
+        release();
+        flags = m.flags;
+        if( dims <= 2 && m.dims <= 2 )
+        {
+            dims = m.dims;
+            rows = m.rows;
+            cols = m.cols;
+            step[0] = m.step[0];
+            step[1] = m.step[1];
+        }
+        else
+            copySize(m);
+        data = m.data;
+        datastart = m.datastart;
+        dataend = m.dataend;
+        datalimit = m.datalimit;
+        allocator = m.allocator;
+        u = m.u;
+    }
+    return *this;
+}
+
+Mat Mat::clone() const
+{
+    Mat m;
+    copyTo(m);
+    return m;
+}
+
+void Mat::assignTo( Mat& m, int _type ) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+void Mat::create(int _rows, int _cols, int _type)
+{
+    _type &= TYPE_MASK;
+    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && data )
+        return;
+    int sz[] = {_rows, _cols};
+    create(2, sz, _type);
+}
+
+void Mat::create(Size _sz, int _type)
+{
+    create(_sz.height, _sz.width, _type);
+}
+
+void Mat::addref()
+{
+    if( u )
+        CV_XADD(&u->refcount, 1);
+}
+
+void Mat::release()
+{
+    if( u && CV_XADD(&u->refcount, -1) == 1 )
+        deallocate();
+    u = NULL;
+    datastart = dataend = datalimit = data = 0;
+    for(int i = 0; i < dims; i++)
+        size.p[i] = 0;
+#ifdef _DEBUG
+    flags = MAGIC_VAL;
+    dims = rows = cols = 0;
+    if(step.p != step.buf)
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+#endif
+}
+
+size_t Mat::step1(int i) const
+{
+    return step.p[i] / elemSize1();
+}
+
+bool Mat::empty() const
+{
+    return data == 0 || total() == 0 || dims == 0;
+}
+
+size_t Mat::total() const
+{
+    if( dims <= 2 )
+        return (size_t)rows * cols;
+    size_t p = 1;
+    for( int i = 0; i < dims; i++ )
+        p *= size[i];
+    return p;
+}
+
+size_t Mat::total(int startDim, int endDim) const
+{
+    CV_Assert( 0 <= startDim && startDim <= endDim);
+    size_t p = 1;
+    int endDim_ = endDim <= dims ? endDim : dims;
+    for( int i = startDim; i < endDim_; i++ )
+        p *= size[i];
+    return p;
+}
+
+
+Mat::Mat(Mat&& m)
+    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
+      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
+      u(m.u), size(&rows)
+{
+    if (m.dims <= 2)  // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_Assert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
+    m.allocator = NULL;
+    m.u = NULL;
+}
+
+
+Mat& Mat::operator=(Mat&& m)
+{
+    if (this == &m)
+      return *this;
+
+    release();
+    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols; data = m.data;
+    datastart = m.datastart; dataend = m.dataend; datalimit = m.datalimit; allocator = m.allocator;
+    u = m.u;
+    if (step.p != step.buf) // release self step/size
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+    if (m.dims <= 2) // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_Assert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
+    m.allocator = NULL;
+    m.u = NULL;
+    return *this;
+}
+
 
 void Mat::create(int d, const int* _sizes, int _type)
 {
diff --git a/modules/core/src/matrix_sparse.cpp b/modules/core/src/matrix_sparse.cpp
index 61e7e90a56..05d16d706e 100644
--- a/modules/core/src/matrix_sparse.cpp
+++ b/modules/core/src/matrix_sparse.cpp
@@ -176,6 +176,94 @@ void SparseMat::Hdr::clear()
     nodeCount = freeList = 0;
 }
 
+///////////////////////////// SparseMat /////////////////////////////
+
+SparseMat::SparseMat()
+    : flags(MAGIC_VAL), hdr(0)
+{}
+
+SparseMat::SparseMat(int _dims, const int* _sizes, int _type)
+    : flags(MAGIC_VAL), hdr(0)
+{
+    create(_dims, _sizes, _type);
+}
+
+SparseMat::SparseMat(const SparseMat& m)
+    : flags(m.flags), hdr(m.hdr)
+{
+    addref();
+}
+
+SparseMat::~SparseMat()
+{
+    release();
+}
+
+SparseMat& SparseMat::operator = (const SparseMat& m)
+{
+    if( this != &m )
+    {
+        if( m.hdr )
+            CV_XADD(&m.hdr->refcount, 1);
+        release();
+        flags = m.flags;
+        hdr = m.hdr;
+    }
+    return *this;
+}
+
+SparseMat& SparseMat::operator=(const Mat& m)
+{
+    return (*this = SparseMat(m));
+}
+
+void SparseMat::assignTo(SparseMat& m, int _type) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+void SparseMat::addref()
+{
+    if( hdr )
+        CV_XADD(&hdr->refcount, 1);
+}
+
+void SparseMat::release()
+{
+    if( hdr && CV_XADD(&hdr->refcount, -1) == 1 )
+        delete hdr;
+    hdr = 0;
+}
+
+size_t SparseMat::hash(int i0) const
+{
+    return (size_t)i0;
+}
+
+size_t SparseMat::hash(int i0, int i1) const
+{
+    return (size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1;
+}
+
+size_t SparseMat::hash(int i0, int i1, int i2) const
+{
+    return ((size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1) * HASH_SCALE + (unsigned)i2;
+}
+
+size_t SparseMat::hash(const int* idx) const
+{
+    size_t h = (unsigned)idx[0];
+    if( !hdr )
+        return 0;
+    int d = hdr->dims;
+    for(int i = 1; i < d; i++ )
+        h = h * HASH_SCALE + (unsigned)idx[i];
+    return h;
+}
+
 
 SparseMat::SparseMat(const Mat& m)
 : flags(MAGIC_VAL), hdr(0)
diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp
index 421c51febc..68a674f6f1 100644
--- a/modules/core/src/matrix_wrap.cpp
+++ b/modules/core/src/matrix_wrap.cpp
@@ -915,7 +915,7 @@ bool _InputArray::isContinuous(int i) const
     if( k == STD_ARRAY_MAT )
     {
         const Mat* vv = (const Mat*)obj;
-        CV_Assert(i > 0 && i < sz.height);
+        CV_Assert(i >= 0 && i < sz.height);
         return vv[i].isContinuous();
     }
 
@@ -949,21 +949,21 @@ bool _InputArray::isSubmatrix(int i) const
     if( k == STD_VECTOR_MAT )
     {
         const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
-        CV_Assert((size_t)i < vv.size());
+        CV_Assert(i >= 0 && (size_t)i < vv.size());
         return vv[i].isSubmatrix();
     }
 
     if( k == STD_ARRAY_MAT )
     {
         const Mat* vv = (const Mat*)obj;
-        CV_Assert(i < sz.height);
+        CV_Assert(i >= 0 && i < sz.height);
         return vv[i].isSubmatrix();
     }
 
     if( k == STD_VECTOR_UMAT )
     {
         const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
-        CV_Assert((size_t)i < vv.size());
+        CV_Assert(i >= 0 && (size_t)i < vv.size());
         return vv[i].isSubmatrix();
     }
 
@@ -994,9 +994,7 @@ size_t _InputArray::offset(int i) const
     if( k == STD_VECTOR_MAT )
     {
         const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
-        if( i < 0 )
-            return 1;
-        CV_Assert( i < (int)vv.size() );
+        CV_Assert( i >= 0 && i < (int)vv.size() );
 
         return (size_t)(vv[i].ptr() - vv[i].datastart);
     }
@@ -1004,16 +1002,14 @@ size_t _InputArray::offset(int i) const
     if( k == STD_ARRAY_MAT )
     {
         const Mat* vv = (const Mat*)obj;
-        if( i < 0 )
-            return 1;
-        CV_Assert( i < sz.height );
+        CV_Assert( i >= 0 && i < sz.height );
         return (size_t)(vv[i].ptr() - vv[i].datastart);
     }
 
     if( k == STD_VECTOR_UMAT )
     {
         const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
-        CV_Assert((size_t)i < vv.size());
+        CV_Assert(i >= 0 && (size_t)i < vv.size());
         return vv[i].offset;
     }
 
@@ -1027,7 +1023,7 @@ size_t _InputArray::offset(int i) const
     if (k == STD_VECTOR_CUDA_GPU_MAT)
     {
         const std::vector<cuda::GpuMat>& vv = *(const std::vector<cuda::GpuMat>*)obj;
-        CV_Assert((size_t)i < vv.size());
+        CV_Assert(i >= 0 && (size_t)i < vv.size());
         return (size_t)(vv[i].data - vv[i].datastart);
     }
 
@@ -1057,25 +1053,21 @@ size_t _InputArray::step(int i) const
     if( k == STD_VECTOR_MAT )
     {
         const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
-        if( i < 0 )
-            return 1;
-        CV_Assert( i < (int)vv.size() );
+        CV_Assert( i >= 0 && i < (int)vv.size() );
         return vv[i].step;
     }
 
     if( k == STD_ARRAY_MAT )
     {
         const Mat* vv = (const Mat*)obj;
-        if( i < 0 )
-            return 1;
-        CV_Assert( i < sz.height );
+        CV_Assert( i >= 0 && i < sz.height );
         return vv[i].step;
     }
 
     if( k == STD_VECTOR_UMAT )
     {
         const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
-        CV_Assert((size_t)i < vv.size());
+        CV_Assert(i >= 0 && (size_t)i < vv.size());
         return vv[i].step;
     }
 
@@ -1087,7 +1079,7 @@ size_t _InputArray::step(int i) const
     if (k == STD_VECTOR_CUDA_GPU_MAT)
     {
         const std::vector<cuda::GpuMat>& vv = *(const std::vector<cuda::GpuMat>*)obj;
-        CV_Assert((size_t)i < vv.size());
+        CV_Assert(i >= 0 && (size_t)i < vv.size());
         return vv[i].step;
     }
 
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 0a82424ba1..44ee8f9c59 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -113,6 +113,10 @@
 
 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
 
+#ifdef HAVE_DIRECTX
+#include "directx.hpp"
+#endif
+
 #ifdef HAVE_OPENCL_SVM
 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
@@ -2327,6 +2331,9 @@ protected:
         , contextId(CV_XADD(&g_contextId, 1))
         , configuration(configuration_)
         , handle(0)
+#ifdef HAVE_DIRECTX
+        , p_directx_impl(0)
+#endif
 #ifdef HAVE_OPENCL_SVM
         , svmInitialized(false)
 #endif
@@ -2352,6 +2359,9 @@ protected:
                 handle = NULL;
             }
             devices.clear();
+#ifdef HAVE_DIRECTX
+            directx::internal::deleteDirectXImpl(&p_directx_impl);
+#endif
         }
 
         {
@@ -2427,6 +2437,7 @@ public:
         if (impl)
         {
             CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
+            impl->addref();
             return impl;
         }
 
@@ -2658,6 +2669,19 @@ public:
         return *bufferPoolHostPtr_.get();
     }
 
+#ifdef HAVE_DIRECTX
+    directx::internal::OpenCLDirectXImpl* p_directx_impl;
+
+    directx::internal::OpenCLDirectXImpl* getDirectXImpl()
+    {
+        if (!p_directx_impl)
+        {
+            p_directx_impl = directx::internal::createDirectXImpl();
+        }
+        return p_directx_impl;
+    }
+#endif
+
 #ifdef HAVE_OPENCL_SVM
     bool svmInitialized;
     bool svmAvailable;
@@ -7286,4 +7310,15 @@ uint64 Timer::durationNS() const
 
 }} // namespace
 
+#ifdef HAVE_DIRECTX
+namespace cv { namespace directx { namespace internal {
+OpenCLDirectXImpl* getDirectXImpl(ocl::Context& ctx)
+{
+    ocl::Context::Impl* i = ctx.getImpl();
+    CV_Assert(i);
+    return i->getDirectXImpl();
+}
+}}} // namespace cv::directx::internal
+#endif
+
 #endif // HAVE_OPENCL
diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp
index d5942d1edc..0ec6270a70 100644
--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@@ -228,6 +228,211 @@ UMatDataAutoLock::~UMatDataAutoLock()
     getUMatDataAutoLocker().release(u1, u2);
 }
 
+//////////////////////////////// UMat ////////////////////////////////
+
+UMat::UMat(UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{}
+
+UMat::UMat(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+}
+
+UMat::UMat(int _rows, int _cols, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+    *this = _s;
+}
+
+UMat::UMat(Size _sz, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create( _sz.height, _sz.width, _type );
+}
+
+UMat::UMat(Size _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_sz.height, _sz.width, _type);
+    *this = _s;
+}
+
+UMat::UMat(int _dims, const int* _sz, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+}
+
+UMat::UMat(int _dims, const int* _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+    *this = _s;
+}
+
+UMat::UMat(const UMat& m)
+: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
+  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
+{
+    addref();
+    if( m.dims <= 2 )
+    {
+        step[0] = m.step[0]; step[1] = m.step[1];
+    }
+    else
+    {
+        dims = 0;
+        copySize(m);
+    }
+}
+
+UMat& UMat::operator=(const UMat& m)
+{
+    if( this != &m )
+    {
+        const_cast<UMat&>(m).addref();
+        release();
+        flags = m.flags;
+        if( dims <= 2 && m.dims <= 2 )
+        {
+            dims = m.dims;
+            rows = m.rows;
+            cols = m.cols;
+            step[0] = m.step[0];
+            step[1] = m.step[1];
+        }
+        else
+            copySize(m);
+        allocator = m.allocator;
+        if (usageFlags == USAGE_DEFAULT)
+            usageFlags = m.usageFlags;
+        u = m.u;
+        offset = m.offset;
+    }
+    return *this;
+}
+
+UMat UMat::clone() const
+{
+    UMat m;
+    copyTo(m);
+    return m;
+}
+
+void UMat::assignTo(UMat& m, int _type) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
+{
+    _type &= TYPE_MASK;
+    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u )
+        return;
+    int sz[] = {_rows, _cols};
+    create(2, sz, _type, _usageFlags);
+}
+
+void UMat::create(Size _sz, int _type, UMatUsageFlags _usageFlags)
+{
+    create(_sz.height, _sz.width, _type, _usageFlags);
+}
+
+void UMat::addref()
+{
+    if( u )
+        CV_XADD(&(u->urefcount), 1);
+}
+
+void UMat::release()
+{
+    if( u && CV_XADD(&(u->urefcount), -1) == 1 )
+        deallocate();
+    for(int i = 0; i < dims; i++)
+        size.p[i] = 0;
+    u = 0;
+}
+
+bool UMat::empty() const
+{
+    return u == 0 || total() == 0 || dims == 0;
+}
+
+size_t UMat::total() const
+{
+    if( dims <= 2 )
+        return (size_t)rows * cols;
+    size_t p = 1;
+    for( int i = 0; i < dims; i++ )
+        p *= size[i];
+    return p;
+}
+
+
+UMat::UMat(UMat&& m)
+: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
+  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
+{
+    if (m.dims <= 2)  // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.allocator = NULL;
+    m.u = NULL;
+    m.offset = 0;
+}
+
+UMat& UMat::operator=(UMat&& m)
+{
+    if (this == &m)
+      return *this;
+    release();
+    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols;
+    allocator = m.allocator; usageFlags = m.usageFlags;
+    u = m.u;
+    offset = m.offset;
+    if (step.p != step.buf) // release self step/size
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+    if (m.dims <= 2) // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.allocator = NULL;
+    m.u = NULL;
+    m.offset = 0;
+    return *this;
+}
+
 
 MatAllocator* UMat::getStdAllocator()
 {
diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp
index 321fa64264..71d61e14e0 100644
--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@@ -126,9 +126,11 @@ DEFINE_SIMD_TESTS(256, AVX512_SKX)
 
 TEST(hal_intrin256, float16x16_FP16)
 {
+#if CV_TRY_FP16
     //CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
     CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
 }
 
 
@@ -142,8 +144,10 @@ namespace intrin512 {
 
 TEST(hal_intrin512, float16x32_FP16)
 {
+#if CV_TRY_FP16
     CV_CPU_CALL_AVX512_SKX_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
 }
 
 
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 6731091463..84da496b42 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -1902,21 +1902,21 @@ void test_hal_intrin_float64()
 #endif
 }
 
-#if CV_FP16
 void test_hal_intrin_float16()
 {
     DUMP_ENTRY(v_float16);
 #if CV_FP16
     TheTest<v_float32>()
         .test_loadstore_fp16_f32()
-#endif
 #if CV_SIMD_FP16
         .test_loadstore_fp16()
         .test_float_cvt_fp16()
 #endif
         ;
-}
+#else
+    std::cout << "SKIP: CV_FP16 is not available" << std::endl;
 #endif
+}
 
 /*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
 void test_hal_intrin_float16()
diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp
index e695300d4b..d30c485368 100644
--- a/modules/core/test/test_io.cpp
+++ b/modules/core/test/test_io.cpp
@@ -1640,6 +1640,32 @@ TEST(Core_InputOutput, FileStorage_free_file_after_exception)
     ASSERT_EQ(0, std::remove(fileName.c_str()));
 }
 
+TEST(Core_InputOutput, FileStorage_write_to_sequence)
+{
+    const std::vector<std::string> formatExts = { ".yml", ".json", ".xml" };
+    const std::string fileName = "FileStorage_write_to_sequence";
+
+    for (const auto& ext : formatExts)
+    {
+        FileStorage fs(fileName + ext, FileStorage::WRITE);
+        std::vector<int> in = { 23, 42 };
+        fs.startWriteStruct("some_sequence", cv::FileNode::SEQ);
+        for (int i : in)
+            fs.write("", i);
+        fs.endWriteStruct();
+        fs.release();
+
+        FileStorage fsIn(fileName + ext, FileStorage::READ);
+        FileNode seq = fsIn["some_sequence"];
+        FileNodeIterator it = seq.begin(), it_end = seq.end();
+        std::vector<int> out;
+        for (; it != it_end; ++it)
+            out.push_back((int)*it);
+
+        EXPECT_EQ(in, out);
+    }
+}
+
 TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents)
 {
     const std::string filename = "FileStorage_YAML_parse_multiple_documents.yml";
diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp
index 578f693dbf..74ee167c54 100644
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@@ -9,6 +9,8 @@
 #include "opencv2/core/eigen.hpp"
 #endif
 
+#include "opencv2/core/cuda.hpp"
+
 namespace opencv_test { namespace {
 
 class Core_ReduceTest : public cvtest::BaseTest
@@ -1974,6 +1976,157 @@ TEST(Core_InputArray, fetch_MatExpr)
 }
 
 
+#ifdef CV_CXX11
+class TestInputArrayRangeChecking {
+    static const char *kind2str(cv::_InputArray ia)
+    {
+        switch (ia.kind())
+        {
+        #define C(x) case cv::_InputArray::x: return #x
+        C(MAT);
+        C(UMAT);
+        C(EXPR);
+        C(MATX);
+        C(STD_VECTOR);
+        C(STD_ARRAY);
+        C(NONE);
+        C(STD_VECTOR_VECTOR);
+        C(STD_BOOL_VECTOR);
+        C(STD_VECTOR_MAT);
+        C(STD_ARRAY_MAT);
+        C(STD_VECTOR_UMAT);
+        C(CUDA_GPU_MAT);
+        C(STD_VECTOR_CUDA_GPU_MAT);
+        #undef C
+        default:
+            return "<unsupported>";
+        }
+    }
+
+    static void banner(cv::_InputArray ia, const char *label, const char *name)
+    {
+        std::cout << std::endl
+                  << label << " = " << name << ", Kind: " << kind2str(ia)
+                  << std::endl;
+    }
+
+    template<typename I, typename F>
+    static void testA(I ia, F f, const char *mfname)
+    {
+        banner(ia, "f", mfname);
+        EXPECT_THROW(f(ia, -1), cv::Exception)
+            << "f(ia, " << -1 << ") should throw cv::Exception";
+        for (int i = 0; i < int(ia.size()); i++)
+        {
+            EXPECT_NO_THROW(f(ia, i))
+                << "f(ia, " << i << ") should not throw an exception";
+        }
+        EXPECT_THROW(f(ia, int(ia.size())), cv::Exception)
+            << "f(ia, " << ia.size() << ") should throw cv::Exception";
+    }
+
+    template<typename I, typename F>
+    static void testB(I ia, F f, const char *mfname)
+    {
+        banner(ia, "f", mfname);
+        EXPECT_THROW(f(ia, -1), cv::Exception)
+            << "f(ia, " << -1 << ") should throw cv::Exception";
+        for (int i = 0; i < int(ia.size()); i++)
+        {
+            EXPECT_NO_THROW(f(ia, i))
+                << "f(ia, " << i << ") should not throw an exception";
+        }
+        EXPECT_THROW(f(ia, int(ia.size())), cv::Exception)
+            << "f(ia, " << ia.size() << ") should throw cv::Exception";
+    }
+
+    static void test_isContinuous()
+    {
+        auto f = [](cv::_InputArray ia, int i) { (void)ia.isContinuous(i); };
+
+        cv::Mat M;
+        cv::UMat uM;
+
+        std::vector<cv::Mat> vec = {M, M};
+        std::array<cv::Mat, 2> arr = {M, M};
+        std::vector<cv::UMat> uvec = {uM, uM};
+
+        testA(vec, f, "isContinuous");
+        testA(arr, f, "isContinuous");
+        testA(uvec, f, "isContinuous");
+    }
+
+    static void test_isSubmatrix()
+    {
+        auto f = [](cv::_InputArray ia, int i) { (void)ia.isSubmatrix(i); };
+
+        cv::Mat M;
+        cv::UMat uM;
+
+        std::vector<cv::Mat> vec = {M, M};
+        std::array<cv::Mat, 2> arr = {M, M};
+        std::vector<cv::UMat> uvec = {uM, uM};
+
+        testA(vec, f, "isSubmatrix");
+        testA(arr, f, "isSubmatrix");
+        testA(uvec, f, "isSubmatrix");
+    }
+
+    static void test_offset()
+    {
+        auto f = [](cv::_InputArray ia, int i) { return ia.offset(i); };
+
+        cv::Mat M;
+        cv::UMat uM;
+        cv::cuda::GpuMat gM;
+
+        std::vector<cv::Mat> vec = {M, M};
+        std::array<cv::Mat, 2> arr = {M, M};
+        std::vector<cv::UMat> uvec = {uM, uM};
+        std::vector<cv::cuda::GpuMat> gvec = {gM, gM};
+
+        testB(vec, f, "offset");
+        testB(arr, f, "offset");
+        testB(uvec, f, "offset");
+        testB(gvec, f, "offset");
+    }
+
+    static void test_step()
+    {
+        auto f = [](cv::_InputArray ia, int i) { return ia.step(i); };
+
+        cv::Mat M;
+        cv::UMat uM;
+        cv::cuda::GpuMat gM;
+
+        std::vector<cv::Mat> vec = {M, M};
+        std::array<cv::Mat, 2> arr = {M, M};
+        std::vector<cv::UMat> uvec = {uM, uM};
+        std::vector<cv::cuda::GpuMat> gvec = {gM, gM};
+
+        testB(vec, f, "step");
+        testB(arr, f, "step");
+        testB(uvec, f, "step");
+        testB(gvec, f, "step");
+    }
+
+public:
+    static void run()
+    {
+        test_isContinuous();
+        test_isSubmatrix();
+        test_offset();
+        test_step();
+    }
+};
+
+TEST(Core_InputArray, range_checking)
+{
+    TestInputArrayRangeChecking::run();
+}
+#endif
+
+
 TEST(Core_Vectors, issue_13078)
 {
     float floats_[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp
index 0b9469ee83..ab0d52778d 100644
--- a/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@@ -2584,7 +2584,7 @@ TEST(Core_CheckRange_INT_MAX, accuracy)
 TEST(Core_CheckRange_INT_MAX1, accuracy)
 {
     cv::Mat m(3, 3, CV_32SC1, cv::Scalar(INT_MAX));
-    ASSERT_TRUE( cv::checkRange(m, true, 0, 0, INT_MAX+1.0f) );
+    ASSERT_TRUE( cv::checkRange(m, true, 0, 0, (float)((double)INT_MAX+1.0f)) );
     ASSERT_TRUE( cv::checkRange(m) );
 }
 
diff --git a/modules/core/test/test_quaternion.cpp b/modules/core/test/test_quaternion.cpp
new file mode 100644
index 0000000000..0025674ec7
--- /dev/null
+++ b/modules/core/test/test_quaternion.cpp
@@ -0,0 +1,255 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+#include <opencv2/core/quaternion.hpp>
+#include <opencv2/ts/cuda_test.hpp>
+using namespace cv;
+namespace opencv_test{ namespace {
+class QuatTest: public ::testing::Test {
+protected:
+    void SetUp() override
+    {
+        q1 = {1,2,3,4};
+        q2 = {2.5,-2,3.5,4};
+        q1Unit = {1 / sqrt(30), sqrt(2) /sqrt(15), sqrt(3) / sqrt(10), 2 * sqrt(2) / sqrt(15)};
+        q1Inv = {1.0 / 30, -1.0 / 15, -1.0 / 10, -2.0 / 15};
+    }
+    double scalar = 2.5;
+    double angle = CV_PI;
+    int qNorm2 = 2;
+    Vec<double, 3> axis{1, 1, 1};
+    Vec<double, 3> unAxis{0, 0, 0};
+    Vec<double, 3> unitAxis{1.0 / sqrt(3), 1.0 / sqrt(3), 1.0 / sqrt(3)};
+    Quatd q3 = Quatd::createFromAngleAxis(angle, axis);
+    Quatd q3UnitAxis = Quatd::createFromAngleAxis(angle, unitAxis);
+    Quat<double> q3Norm2 = q3 * qNorm2;
+
+    Quat<double> q1Inv;
+    Quat<double> q1;
+    Quat<double> q2;
+    Quat<double> q1Unit;
+
+    Quatd qNull{0, 0, 0, 0};
+    Quatd qIdentity{1, 0, 0, 0};
+    QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT;
+
+};
+
+TEST_F(QuatTest, constructor){
+    Vec<double, 4> coeff{1, 2, 3, 4};
+    EXPECT_EQ(Quat<double> (coeff), q1);
+    EXPECT_EQ(q3, q3UnitAxis);
+    EXPECT_ANY_THROW(Quatd::createFromAngleAxis(angle, unAxis));
+    Matx33d R1{
+        -1.0 / 3, 2.0 / 3 , 2.0 / 3,
+        2.0 / 3 , -1.0 / 3, 2.0 / 3,
+        2.0 / 3 , 2.0 / 3 , -1.0 / 3
+    };
+    Matx33d R2{
+        -2.0 / 3, -2.0 / 3, -1.0 / 3,
+        -2.0 / 3, 1.0 / 3, 2.0 / 3,
+        -1.0 / 3, 2.0 / 3, -2.0 / 3
+    };
+    Matx33d R3{
+        0.818181818181, 0.181818181818, 0.54545455454,
+        0.545454545545, -0.54545454545, -0.6363636364,
+        0.181818181818, 0.818181818182, -0.5454545455
+    };
+    Matx33d R4{
+        0.818181818181, -0.181818181818, 0.54545455454,
+        0.545454545545, 0.54545454545, -0.6363636364,
+        -0.181818181818, 0.818181818182, 0.5454545455
+    };
+    Quatd qMat = Quatd::createFromRotMat(R1);
+    Quatd qMat2 = Quatd::createFromRotMat(R2);
+    Quatd qMat3 = Quatd::createFromRotMat(R3);
+    Quatd qMat4 = Quatd::createFromRotMat(R4);
+    EXPECT_EQ(qMat2, Quatd(0, -0.408248290463, 0.816496580927, 0.408248904638));
+    EXPECT_EQ(qMat3, Quatd(-0.426401432711,-0.852802865422, -0.213200716355, -0.2132007163));
+    EXPECT_EQ(qMat, q3);
+    EXPECT_EQ(qMat4, -Quatd(0.852802865422, 0.426401432711221, 0.2132007163556, 0.2132007163));
+
+    Vec3d rot{angle / sqrt(3),angle / sqrt(3), angle / sqrt(3)};
+    Quatd rotQuad{0, 1.0 / sqrt(3), 1. / sqrt(3), 1. / sqrt(3)};
+    Quatd qRot = Quatd::createFromRvec(rot);
+    EXPECT_EQ(qRot, rotQuad);
+    EXPECT_EQ(Quatd::createFromRvec(Vec3d(0, 0, 0)), qIdentity);
+}
+
+TEST_F(QuatTest, basicfuns){
+    Quat<double> q1Conj{1, -2, -3, -4};
+    EXPECT_EQ(q3Norm2.normalize(), q3);
+    EXPECT_EQ(q1.norm(), sqrt(30));
+    EXPECT_EQ(q1.normalize(), q1Unit);
+    EXPECT_ANY_THROW(qNull.normalize());
+    EXPECT_EQ(q1.conjugate(), q1Conj);
+    EXPECT_EQ(q1.inv(), q1Inv);
+    EXPECT_EQ(inv(q1), q1Inv);
+    EXPECT_EQ(q3.inv(assumeUnit) * q3, qIdentity);
+    EXPECT_EQ(q1.inv() * q1, qIdentity);
+    EXPECT_ANY_THROW(inv(qNull));
+    EXPECT_NO_THROW(q1.at(0));
+    EXPECT_ANY_THROW(q1.at(4));
+
+    Matx33d R{
+        -2.0 / 3, 2.0 / 15 , 11.0 / 15,
+        2.0 / 3 , -1.0 / 3 , 2.0 / 3  ,
+        1.0 / 3 , 14.0 / 15, 2.0 / 15
+    };
+    Matx33d q1RotMat = q1.toRotMat3x3();
+    EXPECT_MAT_NEAR(q1RotMat, R, 1e-6);
+    Vec3d z_axis{0,0,1};
+    Quatd q_unit1 = Quatd::createFromAngleAxis(angle, z_axis);
+    Mat pointsA = (Mat_<double>(2, 3) << 1,0,0,1,0,1);
+    pointsA = pointsA.t();
+    Mat new_point = q_unit1.toRotMat3x3() * pointsA;
+    Mat afterRo = (Mat_<double>(3, 2) << -1,-1,0,0,0,1);
+    EXPECT_MAT_NEAR(afterRo, new_point, 1e-6);
+    EXPECT_ANY_THROW(qNull.toRotVec());
+    Vec3d rodVec{CV_PI/sqrt(3), CV_PI/sqrt(3), CV_PI/sqrt(3)};
+    Vec3d q3Rod = q3.toRotVec();
+    EXPECT_NEAR(q3Rod[0], rodVec[0], 1e-6);
+    EXPECT_NEAR(q3Rod[1], rodVec[1], 1e-6);
+    EXPECT_NEAR(q3Rod[2], rodVec[2], 1e-6);
+
+    EXPECT_EQ(log(q1Unit, assumeUnit), log(q1Unit));
+    EXPECT_EQ(log(qIdentity, assumeUnit), qNull);
+    EXPECT_EQ(log(q3), Quatd(0, angle * unitAxis[0] / 2, angle * unitAxis[1] / 2, angle * unitAxis[2] / 2));
+    EXPECT_ANY_THROW(log(qNull));
+    EXPECT_EQ(log(Quatd(exp(1), 0, 0, 0)), qIdentity);
+
+    EXPECT_EQ(exp(qIdentity), Quatd(exp(1), 0, 0, 0));
+    EXPECT_EQ(exp(qNull), qIdentity);
+    EXPECT_EQ(exp(Quatd(0, angle * unitAxis[0] / 2, angle * unitAxis[1] / 2, angle * unitAxis[2] / 2)), q3);
+
+    EXPECT_EQ(power(q3, 2), Quatd::createFromAngleAxis(2*angle, axis));
+    EXPECT_EQ(power(Quatd(0.5, 0.5, 0.5, 0.5), 2.0, assumeUnit), Quatd(-0.5,0.5,0.5,0.5));
+    EXPECT_EQ(power(Quatd(0.5, 0.5, 0.5, 0.5), -2.0), Quatd(-0.5,-0.5,-0.5,-0.5));
+    EXPECT_EQ(sqrt(q1), power(q1, 0.5));
+    EXPECT_EQ(exp(q3 * log(q1)), power(q1, q3));
+    EXPECT_EQ(exp(q1 * log(q3)), power(q3, q1, assumeUnit));
+    EXPECT_EQ(crossProduct(q1, q3), (q1 * q3 - q3 * q1) / 2);
+    EXPECT_EQ(sinh(qNull), qNull);
+    EXPECT_EQ(sinh(q1), (exp(q1) - exp(-q1)) / 2);
+    EXPECT_EQ(sinh(qIdentity), Quatd(sinh(1), 0, 0, 0));
+    EXPECT_EQ(sinh(q1), Quatd(0.73233760604, -0.44820744998, -0.67231117497, -0.8964148999610843));
+    EXPECT_EQ(cosh(qNull), qIdentity);
+    EXPECT_EQ(cosh(q1), Quatd(0.961585117636, -0.34135217456, -0.51202826184, -0.682704349122));
+    EXPECT_EQ(tanh(q1), sinh(q1) * inv(cosh(q1)));
+    EXPECT_EQ(sin(qNull), qNull);
+    EXPECT_EQ(sin(q1), Quatd(91.78371578403, 21.88648685303, 32.829730279543, 43.772973706058));
+    EXPECT_EQ(cos(qNull), qIdentity);
+    EXPECT_EQ(cos(q1), Quatd(58.9336461679, -34.0861836904, -51.12927553569, -68.17236738093));
+    EXPECT_EQ(tan(q1), sin(q1)/cos(q1));
+    EXPECT_EQ(sinh(asinh(q1)), q1);
+    Quatd c1 = asinh(sinh(q1));
+    EXPECT_EQ(sinh(c1), sinh(q1));
+    EXPECT_EQ(cosh(acosh(q1)), q1);
+    c1 = acosh(cosh(q1));
+    EXPECT_EQ(cosh(c1), cosh(q1));
+    EXPECT_EQ(tanh(atanh(q1)), q1);
+    c1 = atanh(tanh(q1));
+    EXPECT_EQ(tanh(q1), tanh(c1));
+    EXPECT_EQ(asin(sin(q1)), q1);
+    EXPECT_EQ(sin(asin(q1)), q1);
+    EXPECT_EQ(acos(cos(q1)), q1);
+    EXPECT_EQ(cos(acos(q1)), q1);
+    EXPECT_EQ(atan(tan(q3)), q3);
+    EXPECT_EQ(tan(atan(q1)), q1);
+}
+
+TEST_F(QuatTest, opeartor){
+    Quatd minusQ{-1, -2, -3, -4};
+    Quatd qAdd{3.5, 0, 6.5, 8};
+    Quatd qMinus{-1.5, 4, -0.5, 0};
+    Quatd qMultq{-20, 1, -5, 27};
+    Quatd qMults{2.5, 5.0, 7.5, 10.0};
+    Quatd qDvss{1.0 / 2.5, 2.0 / 2.5, 3.0 / 2.5, 4.0 / 2.5};
+    Quatd qOrigin(q1);
+
+    EXPECT_EQ(-q1, minusQ);
+    EXPECT_EQ(q1 + q2, qAdd);
+    EXPECT_EQ(q1 - q2, qMinus);
+    EXPECT_EQ(q1 * q2, qMultq);
+    EXPECT_EQ(q1 * scalar, qMults);
+    EXPECT_EQ(scalar * q1, qMults);
+    EXPECT_EQ(q1 / q1, qIdentity);
+    EXPECT_EQ(q1 / scalar, qDvss);
+    q1 += q2;
+    EXPECT_EQ(q1, qAdd);
+    q1 -= q2;
+    EXPECT_EQ(q1, qOrigin);
+    q1 *= q2;
+    EXPECT_EQ(q1, qMultq);
+    q1 /= q2;
+    EXPECT_EQ(q1, qOrigin);
+    q1 *= scalar;
+    EXPECT_EQ(q1, qMults);
+    q1 /= scalar;
+    EXPECT_EQ(q1, qOrigin);
+    EXPECT_NO_THROW(q1[0]);
+    EXPECT_NO_THROW(q1.at(0));
+    EXPECT_ANY_THROW(q1[4]);
+    EXPECT_ANY_THROW(q1.at(4));
+}
+
+TEST_F(QuatTest, quatAttrs){
+    double angleQ1 = 2 * acos(1.0 / sqrt(30));
+    Vec3d axis1{0.3713906763541037, 0.557086014, 0.742781352};
+    Vec<double, 3> q1axis1 = q1.getAxis();
+
+    EXPECT_EQ(angleQ1, q1.getAngle());
+    EXPECT_EQ(angleQ1, q1Unit.getAngle());
+    EXPECT_EQ(angleQ1, q1Unit.getAngle(assumeUnit));
+    EXPECT_EQ(0, qIdentity.getAngle());
+    EXPECT_ANY_THROW(qNull.getAxis());
+    EXPECT_NEAR(axis1[0], q1axis1[0], 1e-6);
+    EXPECT_NEAR(axis1[1], q1axis1[1], 1e-6);
+    EXPECT_NEAR(axis1[2], q1axis1[2], 1e-6);
+    EXPECT_NEAR(q3Norm2.norm(), qNorm2, 1e-6);
+    EXPECT_EQ(q3Norm2.getAngle(), angle);
+    EXPECT_NEAR(axis1[0], axis1[0], 1e-6);
+    EXPECT_NEAR(axis1[1], axis1[1], 1e-6);
+    EXPECT_NEAR(axis1[2], axis1[2], 1e-6);
+}
+
+TEST_F(QuatTest, interpolation){
+    Quatd qNoRot = Quatd::createFromAngleAxis(0, axis);
+    Quatd qLerpInter(1.0 / 2, sqrt(3) / 6, sqrt(3) / 6, sqrt(3) / 6);
+    EXPECT_EQ(Quatd::lerp(qNoRot, q3, 0), qNoRot);
+    EXPECT_EQ(Quatd::lerp(qNoRot, q3, 1), q3);
+    EXPECT_EQ(Quatd::lerp(qNoRot, q3, 0.5), qLerpInter);
+    Quatd q3NrNn2 = qNoRot * qNorm2;
+    EXPECT_EQ(Quatd::nlerp(q3NrNn2, q3Norm2, 0), qNoRot);
+    EXPECT_EQ(Quatd::nlerp(q3NrNn2, q3Norm2, 1), q3);
+    EXPECT_EQ(Quatd::nlerp(q3NrNn2, q3Norm2, 0.5), qLerpInter.normalize());
+    EXPECT_EQ(Quatd::nlerp(qNoRot, q3, 0, assumeUnit), qNoRot);
+    EXPECT_EQ(Quatd::nlerp(qNoRot, q3, 1, assumeUnit), q3);
+    EXPECT_EQ(Quatd::nlerp(qNoRot, q3, 0.5, assumeUnit), qLerpInter.normalize());
+    Quatd q3Minus(-q3);
+    EXPECT_EQ(Quatd::nlerp(qNoRot, q3, 0.4), -Quatd::nlerp(qNoRot, q3Minus, 0.4));
+    EXPECT_EQ(Quatd::slerp(qNoRot, q3, 0, assumeUnit), qNoRot);
+    EXPECT_EQ(Quatd::slerp(qNoRot, q3, 1, assumeUnit), q3);
+    EXPECT_EQ(Quatd::slerp(qNoRot, q3, 0.5, assumeUnit), -Quatd::nlerp(qNoRot, -q3, 0.5, assumeUnit));
+    EXPECT_EQ(Quatd::slerp(qNoRot, q1, 0.5), Quatd(0.76895194, 0.2374325, 0.35614876, 0.47486501));
+    EXPECT_EQ(Quatd::slerp(-qNoRot, q1, 0.5), Quatd(0.76895194, 0.2374325, 0.35614876, 0.47486501));
+    EXPECT_EQ(Quatd::slerp(qNoRot, -q1, 0.5), -Quatd::slerp(-qNoRot, q1, 0.5));
+
+    Quat<double> tr1 = Quatd::createFromAngleAxis(0, axis);
+    Quat<double> tr2 = Quatd::createFromAngleAxis(angle / 2, axis);
+    Quat<double> tr3 = Quatd::createFromAngleAxis(angle, axis);
+    Quat<double> tr4 = Quatd::createFromAngleAxis(angle, Vec3d{-1/sqrt(2),0,1/(sqrt(2))});
+    EXPECT_ANY_THROW(Quatd::spline(qNull, tr1, tr2, tr3, 0));
+    EXPECT_EQ(Quatd::spline(tr1, tr2, tr3, tr4, 0), tr2);
+    EXPECT_EQ(Quatd::spline(tr1, tr2, tr3, tr4, 1), tr3);
+    EXPECT_EQ(Quatd::spline(tr1, tr2, tr3, tr4, 0.6, assumeUnit), Quatd::spline(tr1, tr2, tr3, tr4, 0.6));
+    EXPECT_EQ(Quatd::spline(tr1, tr2, tr3, tr3, 0.5), Quatd::spline(tr1, -tr2, tr3, tr3, 0.5));
+    EXPECT_EQ(Quatd::spline(tr1, tr2, tr3, tr3, 0.5), -Quatd::spline(-tr1, -tr2, -tr3, tr3, 0.5));
+    EXPECT_EQ(Quatd::spline(tr1, tr2, tr3, tr3, 0.5), Quatd(0.336889853392, 0.543600719487, 0.543600719487, 0.543600719487));
+}
+
+} // namespace
+
+}// opencv_test
\ No newline at end of file
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
index 5d8fbc8b84..39aaa1edb4 100644
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -248,8 +248,6 @@ CV__DNN_INLINE_NS_BEGIN
         int type;
         std::vector<size_t> kernel_size, strides;
         std::vector<size_t> pads_begin, pads_end;
-        CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
-        CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
         bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
         std::vector<bool> isGlobalPooling;
         bool computeMaxIdx;
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 3b12508c74..69b71f90ce 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -93,7 +93,8 @@ CV__DNN_INLINE_NS_BEGIN
         DNN_TARGET_VULKAN,
         DNN_TARGET_FPGA,  //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin.
         DNN_TARGET_CUDA,
-        DNN_TARGET_CUDA_FP16
+        DNN_TARGET_CUDA_FP16,
+        DNN_TARGET_HDDL
     };
 
     CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
@@ -364,9 +365,12 @@ CV__DNN_INLINE_NS_BEGIN
                                      const int requiredOutputs,
                                      std::vector<MatShape> &outputs,
                                      std::vector<MatShape> &internals) const;
+
         virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                                const std::vector<MatShape> &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;}
 
+        virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs);
+
         CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
         CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
         CV_PROP int preferableTarget; //!< prefer target for layer forwarding
@@ -571,6 +575,7 @@ CV__DNN_INLINE_NS_BEGIN
          * | DNN_TARGET_FPGA        |                    |                            + |                    |                   |
          * | DNN_TARGET_CUDA        |                    |                              |                    |                 + |
          * | DNN_TARGET_CUDA_FP16   |                    |                              |                    |                 + |
+         * | DNN_TARGET_HDDL        |                    |                            + |                    |                   |
          */
         CV_WRAP void setPreferableTarget(int targetId);
 
@@ -1072,14 +1077,17 @@ CV__DNN_INLINE_NS_BEGIN
       * Model creates net from file with trained weights and config,
       * sets preprocessing input and runs forward pass.
       */
-     class CV_EXPORTS_W_SIMPLE Model : public Net
+     class CV_EXPORTS_W_SIMPLE Model
      {
      public:
-         /**
-          * @brief Default constructor.
-          */
+         CV_DEPRECATED_EXTERNAL  // avoid using in C++ code, will be moved to "protected" (need to fix bindings first)
          Model();
 
+         Model(const Model&) = default;
+         Model(Model&&) = default;
+         Model& operator=(const Model&) = default;
+         Model& operator=(Model&&) = default;
+
          /**
           * @brief Create model from deep learning network represented in one of the supported formats.
           * An order of @p model and @p config arguments does not matter.
@@ -1100,13 +1108,12 @@ CV__DNN_INLINE_NS_BEGIN
          */
          CV_WRAP Model& setInputSize(const Size& size);
 
-         /** @brief Set input size for frame.
+         /** @overload
          *  @param[in] width New input width.
          *  @param[in] height New input height.
-         *  @note If shape of the new blob less than 0,
-         *  then frame size not change.
          */
-         CV_WRAP Model& setInputSize(int width, int height);
+         CV_WRAP inline
+         Model& setInputSize(int width, int height) { return setInputSize(Size(width, height)); }
 
          /** @brief Set mean value for frame.
           *  @param[in] mean Scalar with mean values which are subtracted from channels.
@@ -1143,10 +1150,31 @@ CV__DNN_INLINE_NS_BEGIN
           *  @param[in]  frame  The input image.
           *  @param[out] outs Allocated output blobs, which will store results of the computation.
           */
-         CV_WRAP void predict(InputArray frame, OutputArrayOfArrays outs);
+         CV_WRAP void predict(InputArray frame, OutputArrayOfArrays outs) const;
+
+
+         // ============================== Net proxy methods ==============================
+         // Never expose methods with network implementation details, like:
+         // - addLayer, addLayerToPrev, connect, setInputsNames, setInputShape, setParam, getParam
+         // - getLayer*, getUnconnectedOutLayers, getUnconnectedOutLayersNames, getLayersShapes
+         // - forward* methods, setInput
+
+         /// @sa Net::setPreferableBackend
+         CV_WRAP Model& setPreferableBackend(dnn::Backend backendId);
+         /// @sa Net::setPreferableTarget
+         CV_WRAP Model& setPreferableTarget(dnn::Target targetId);
+
+         CV_DEPRECATED_EXTERNAL
+         operator Net&() const { return getNetwork_(); }
+
+     //protected: - internal/tests usage only
+         Net& getNetwork_() const;
+         inline Net& getNetwork_() { return const_cast<const Model*>(this)->getNetwork_(); }
 
-     protected:
          struct Impl;
+         inline Impl* getImpl() const { return impl.get(); }
+         inline Impl& getImplRef() const { CV_DbgAssert(impl); return *impl.get(); }
+     protected:
          Ptr<Impl> impl;
      };
 
diff --git a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
index 7db93a916d..29882b92b0 100644
--- a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
+++ b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
@@ -58,6 +58,11 @@ CV_EXPORTS_W void resetMyriadDevice();
 CV_EXPORTS_W cv::String getInferenceEngineVPUType();
 
 
+/** @brief Release a HDDL plugin.
+ */
+CV_EXPORTS_W void releaseHDDLPlugin();
+
+
 CV__DNN_INLINE_NS_END
 }} // namespace
 
diff --git a/modules/dnn/include/opencv2/dnn/version.hpp b/modules/dnn/include/opencv2/dnn/version.hpp
index 87c2a8e3bc..7dc2786906 100644
--- a/modules/dnn/include/opencv2/dnn/version.hpp
+++ b/modules/dnn/include/opencv2/dnn/version.hpp
@@ -6,7 +6,7 @@
 #define OPENCV_DNN_VERSION_HPP
 
 /// Use with major OpenCV version only.
-#define OPENCV_DNN_API_VERSION 20200908
+#define OPENCV_DNN_API_VERSION 20201117
 
 #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
 #define CV__DNN_INLINE_NS __CV_CAT(dnn5_v, OPENCV_DNN_API_VERSION)
diff --git a/modules/dnn/perf/perf_convolution.cpp b/modules/dnn/perf/perf_convolution.cpp
index 7d51cd300f..c2a3a66ab9 100644
--- a/modules/dnn/perf/perf_convolution.cpp
+++ b/modules/dnn/perf/perf_convolution.cpp
@@ -533,7 +533,7 @@ struct ConvParamID
         CONV_100 = 100,
         CONV_LAST = sizeof(testConvolutionConfigs) / sizeof(testConvolutionConfigs[0])
     };
-    int val_;                                                                  \
+    int val_;
     ConvParamID(int val = 0) : val_(val) {}
     operator int() const { return val_; }
     static ::testing::internal::ParamGenerator<ConvParamID> all()
@@ -546,7 +546,7 @@ struct ConvParamID
         ConvParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = ConvParamID(i); } // reduce generated code size
         return ::testing::ValuesIn(v_, v_ + NUM);
     }
-};                                                                                  \
+};
 static inline void PrintTo(const ConvParamID& v, std::ostream* os)
 {
     CV_Assert((int)v >= 0); CV_Assert((int)v < ConvParamID::CONV_LAST);
diff --git a/modules/dnn/perf/perf_convolution1d.cpp b/modules/dnn/perf/perf_convolution1d.cpp
new file mode 100644
index 0000000000..c35cbd503f
--- /dev/null
+++ b/modules/dnn/perf/perf_convolution1d.cpp
@@ -0,0 +1,163 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+#include <opencv2/dnn/shape_utils.hpp>
+
+namespace opencv_test {
+
+struct Conv1DParam_t {
+    int kernel;
+    struct BlobShape { int dims[3]; } shapeIn;
+    int outCN;
+    int groups;
+    int stride;
+    int dilation;
+    int pad[2];
+    const char* padMode;
+    bool hasBias;
+    double declared_flops;
+};
+// Details: #12142
+static const Conv1DParam_t testConvolution1DConfigs[] = {
+        {3, {{1, 6, 10}}, 6, 1, 1, 1, {0, 0}, "VALID", true, 1776.},
+        {3, {{1, 2, 19}}, 2, 2, 2, 1, {1, 1}, "", true, 260.},
+        {3, {{1, 2, 25}}, 2, 2, 1, 1, {2, 2}, "SAME", false, 650.},
+};
+
+struct Conv1DParamID
+{
+    enum {
+        CONV_0 = 0,
+        CONV_LAST = sizeof(testConvolution1DConfigs) / sizeof(testConvolution1DConfigs[0])
+    };
+    int val_;
+    Conv1DParamID(int val = 0) : val_(val) {}
+    operator int() const { return val_; }
+    static ::testing::internal::ParamGenerator<Conv1DParamID> all()
+    {
+        enum { NUM = (int)CONV_LAST };
+        Conv1DParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = Conv1DParamID(i); } // reduce generated code size
+        return ::testing::ValuesIn(v_, v_ + NUM);
+    }
+};
+static inline void PrintTo(const Conv1DParamID& v, std::ostream* os)
+{
+    CV_Assert((int)v >= 0); CV_Assert((int)v < Conv1DParamID::CONV_LAST);
+    const Conv1DParam_t& p = testConvolution1DConfigs[(int)v];
+
+    *os << "GFLOPS=" << cv::format("%.3f", p.declared_flops * 1e-9)
+        << ", K=[" << p.kernel << "]"
+        << ", IN={" << p.shapeIn.dims[0] << ", " << p.shapeIn.dims[1] << ", " << p.shapeIn.dims[2] << "}"
+        << ", OCN=" << p.outCN;
+    if (p.groups > 1)
+        *os << ", G=" << p.groups;
+    if (p.stride != 1)
+        *os << ", S=" << p.stride;
+    if (p.dilation != 1)
+        *os << ", D="  << p.dilation;
+    if (p.pad[0] != 0 && p.pad[1] != 0 )
+        *os << ", P=(" << p.pad[0] << ", " << p.pad[1] << ")";
+    if (!((std::string)p.padMode).empty())
+        *os << ", PM=" << ((std::string)p.padMode);
+    if (p.hasBias)
+        *os << ", BIAS";
+}
+
+
+typedef tuple<Conv1DParamID, tuple<Backend, Target> > Conv1DTestParam_t;
+typedef TestBaseWithParam<Conv1DTestParam_t> Conv1D;
+
+PERF_TEST_P_(Conv1D, conv1d)
+{
+    int test_id = (int)get<0>(GetParam());
+    ASSERT_GE(test_id, 0); ASSERT_LT(test_id, Conv1DParamID::CONV_LAST);
+    const Conv1DParam_t& params = testConvolution1DConfigs[test_id];
+    double declared_flops = params.declared_flops;
+
+    DictValue kernel   = DictValue::arrayInt(&params.kernel, 1);
+    DictValue stride   = DictValue::arrayInt(&params.stride, 1);
+    DictValue pad      = DictValue::arrayInt(&params.pad[0], 2);
+    DictValue dilation = DictValue::arrayInt(&params.dilation, 1);
+
+    MatShape inputShape = MatShape(params.shapeIn.dims, params.shapeIn.dims + 3);
+    int outChannels = params.outCN;
+    int groups = params.groups;
+    std::string padMode(params.padMode);
+
+    bool hasBias = params.hasBias;
+    Backend backendId = get<0>(get<1>(GetParam()));
+    Target targetId = get<1>(get<1>(GetParam()));
+
+    if (targetId != DNN_TARGET_CPU)
+        throw SkipTestException("Only CPU is supported");
+
+    int inChannels = inputShape[1];
+
+    int sz[] = {outChannels, inChannels / groups, params.kernel};
+    Mat weights(3, &sz[0], CV_32F);
+    randu(weights, -1.0f, 1.0f);
+
+    LayerParams lp;
+    lp.set("kernel_size", kernel);
+    lp.set("pad", pad);
+    if (!padMode.empty())
+        lp.set("pad_mode", padMode);
+
+    lp.set("stride", stride);
+    lp.set("dilation", dilation);
+    lp.set("num_output", outChannels);
+    lp.set("group", groups);
+    lp.set("bias_term", hasBias);
+    lp.type = "Convolution";
+    lp.name = "testLayer";
+    lp.blobs.push_back(weights);
+
+    if (hasBias)
+    {
+        Mat bias(1, outChannels, CV_32F);
+        randu(bias, -1.0f, 1.0f);
+        lp.blobs.push_back(bias);
+    }
+
+    int inpSz[] = {1, inChannels, inputShape[2]};
+    Mat input(3, &inpSz[0], CV_32F);
+    randu(input, -1.0f, 1.0f);
+
+    Net net;
+    net.addLayerToPrev(lp.name, lp.type, lp);
+
+    net.setInput(input);
+    net.setPreferableBackend(backendId);
+    net.setPreferableTarget(targetId);
+
+    // warmup
+    Mat output = net.forward();
+
+    MatShape netInputShape = shape(input);
+    size_t weightsMemory = 0, blobsMemory = 0;
+    net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
+    int64 flops = net.getFLOPS(netInputShape);
+    CV_Assert(flops > 0);
+
+    std::cout
+    << "IN=" << divUp(input.total() * input.elemSize(), 1u<<10) << " Kb " << netInputShape
+    << "    OUT=" << divUp(output.total() * output.elemSize(), 1u<<10) << " Kb " << shape(output)
+    << "    Weights(parameters): " << divUp(weightsMemory, 1u<<10) << " Kb"
+    << "    MFLOPS=" << flops * 1e-6 << std::endl;
+
+    TEST_CYCLE()
+    {
+        Mat res = net.forward();
+    }
+    EXPECT_NEAR(flops, declared_flops, declared_flops * 1e-6);
+    SANITY_CHECK_NOTHING();
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Conv1D, Combine(
+        Conv1DParamID::all(),
+        dnnBackendsAndTargets(false, false)  // defined in ../test/test_common.hpp
+));
+
+} // namespace
diff --git a/modules/dnn/perf/perf_convolution3d.cpp b/modules/dnn/perf/perf_convolution3d.cpp
index e81a4bfc5b..22f61b3b9c 100644
--- a/modules/dnn/perf/perf_convolution3d.cpp
+++ b/modules/dnn/perf/perf_convolution3d.cpp
@@ -46,7 +46,7 @@ struct Conv3DParamID
         CONV_100 = 16,
         CONV_LAST = sizeof(testConvolution3DConfigs) / sizeof(testConvolution3DConfigs[0])
     };
-    int val_;                                                                  \
+    int val_;
     Conv3DParamID(int val = 0) : val_(val) {}
     operator int() const { return val_; }
     static ::testing::internal::ParamGenerator<Conv3DParamID> all()
@@ -59,7 +59,7 @@ struct Conv3DParamID
         Conv3DParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = Conv3DParamID(i); } // reduce generated code size
         return ::testing::ValuesIn(v_, v_ + NUM);
     }
-};                                                                                  \
+};
 static inline void PrintTo(const Conv3DParamID& v, std::ostream* os)
 {
     CV_Assert((int)v >= 0); CV_Assert((int)v < Conv3DParamID::CONV_LAST);
diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index 23ece025e7..aef3bc2c31 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -111,6 +111,10 @@ PERF_TEST_P_(DNNTestNetwork, ENet)
     if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) ||
         (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
         throw SkipTestException("");
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        throw SkipTestException("");
+#endif
     processNet("dnn/Enet-model-best.net", "", "enet.yml",
             Mat(cv::Size(512, 256), CV_32FC3));
 }
@@ -126,7 +130,7 @@ PERF_TEST_P_(DNNTestNetwork, OpenFace)
     if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_HDDL))
         throw SkipTestException("");
 #endif
     processNet("dnn/openface_nn4.small2.v1.t7", "", "",
@@ -168,7 +172,7 @@ PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
 PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
 {
     if (backend == DNN_BACKEND_HALIDE ||
-        (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD))
+        (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_HDDL)))
         throw SkipTestException("");
     // The same .caffemodel but modified .prototxt
     // See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
@@ -202,6 +206,10 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
         throw SkipTestException("Test is disabled in OpenVINO 2020.4");
 #endif
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)  // nGraph compilation failure
+    if (target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+#endif
 
     Mat sample = imread(findDataFile("dnn/dog416.png"));
     cvtColor(sample, sample, COLOR_BGR2RGB);
@@ -214,7 +222,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4)
 {
     if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
-    if (target == DNN_TARGET_MYRIAD)
+    if (target == DNN_TARGET_MYRIAD)  // not enough resources
         throw SkipTestException("");
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000)  // nGraph compilation failure
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
@@ -233,6 +241,10 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny)
 {
     if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)  // nGraph compilation failure
+    if (target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+#endif
     Mat sample = imread(findDataFile("dnn/dog416.png"));
     cvtColor(sample, sample, COLOR_BGR2RGB);
     Mat inp;
@@ -263,6 +275,10 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019020000)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
         throw SkipTestException("Test is disabled in OpenVINO 2019R2");
+#endif
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("Test is disabled in OpenVINO 2021.1 / MYRIAD");
 #endif
     if (backend == DNN_BACKEND_HALIDE ||
         (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) ||
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index f39305eef4..0f60a393a5 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -122,6 +122,8 @@ public:
         {
             if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
                 return true;
+            if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL)
+                return true;
             else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
                 return true;
             else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
@@ -184,6 +186,14 @@ private:
 #endif
 #ifdef HAVE_DNN_NGRAPH
             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
+#endif
+        }
+        if (checkIETarget(DNN_TARGET_HDDL)) {
+#ifdef HAVE_DNN_IE_NN_BUILDER_2019
+            backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_HDDL));
+#endif
+#ifdef HAVE_DNN_NGRAPH
+            backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL));
 #endif
         }
 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
@@ -1172,6 +1182,7 @@ struct Net::Impl : public detail::NetImplBase
         preferableBackend = DNN_BACKEND_DEFAULT;
         preferableTarget = DNN_TARGET_CPU;
         skipInfEngineInit = false;
+        hasDynamicShapes = false;
     }
 
     Ptr<DataLayer> netInputLayer;
@@ -1183,6 +1194,7 @@ struct Net::Impl : public detail::NetImplBase
     int preferableTarget;
     String halideConfigFile;
     bool skipInfEngineInit;
+    bool hasDynamicShapes;
     // Map host data to backend specific wrapper.
     std::map<void*, Ptr<BackendWrapper> > backendWrappers;
 
@@ -1379,6 +1391,7 @@ struct Net::Impl : public detail::NetImplBase
                   preferableTarget == DNN_TARGET_OPENCL ||
                   preferableTarget == DNN_TARGET_OPENCL_FP16 ||
                   preferableTarget == DNN_TARGET_MYRIAD ||
+                  preferableTarget == DNN_TARGET_HDDL ||
                   preferableTarget == DNN_TARGET_FPGA
             );
         }
@@ -1813,7 +1826,7 @@ struct Net::Impl : public detail::NetImplBase
                                     INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
                                     supportsCPUFallback;
                 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
-                if (preferableTarget == DNN_TARGET_MYRIAD)
+                if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
                 {
                     for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
                     {
@@ -1823,6 +1836,7 @@ struct Net::Impl : public detail::NetImplBase
 
                 // TODO: fix these workarounds
                 if (preferableTarget == DNN_TARGET_MYRIAD ||
+                    preferableTarget == DNN_TARGET_HDDL ||
                     preferableTarget == DNN_TARGET_OPENCL ||
                     preferableTarget == DNN_TARGET_OPENCL_FP16)
                     customizable &= ld.type != "Concat";
@@ -1910,6 +1924,7 @@ struct Net::Impl : public detail::NetImplBase
             // Convert weights in FP16 for specific targets.
             if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
                  preferableTarget == DNN_TARGET_MYRIAD ||
+                 preferableTarget == DNN_TARGET_HDDL ||
                  preferableTarget == DNN_TARGET_FPGA) && !fused)
             {
 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
@@ -2104,7 +2119,7 @@ struct Net::Impl : public detail::NetImplBase
                 bool customizable = ld.id != 0 && supportsCPUFallback;
 
                 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
-                if (preferableTarget == DNN_TARGET_MYRIAD)
+                if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
                 {
                     for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
                     {
@@ -2114,6 +2129,7 @@ struct Net::Impl : public detail::NetImplBase
 
                 // TODO: fix these workarounds
                 if (preferableTarget == DNN_TARGET_MYRIAD ||
+                    preferableTarget == DNN_TARGET_HDDL ||
                     preferableTarget == DNN_TARGET_OPENCL ||
                     preferableTarget == DNN_TARGET_OPENCL_FP16)
                     customizable &= ld.type != "Concat";
@@ -2654,15 +2670,17 @@ struct Net::Impl : public detail::NetImplBase
 
                 // OpenCL: fuse convolution layer followed by eltwise + relu
                 // CUDA: fuse convolution layer followed by eltwise (and optional activation)
-                if ((IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) &&
-                    ld.layerInstance->type == "Convolution" )
+                while (nextData &&
+                    (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) &&
+                    ld.layerInstance->type == "Convolution"
+                )  // semantic of 'if'
                 {
-                    Ptr<EltwiseLayer> nextEltwiseLayer;
-                    if( nextData )
-                        nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
+                    Ptr<EltwiseLayer> nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
+                    if (nextEltwiseLayer.empty())
+                        break;
+
 #ifdef HAVE_CUDA
                     // CUDA backend supports fusion with eltwise sum (without variable channels)
-                    // `nextEltwiseLayer` is reset if eltwise layer doesn't have a compatible configuration for fusion
                     if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
                     {
                         // we create a temporary backend node for eltwise layer to obtain the eltwise configuration
@@ -2672,10 +2690,43 @@ struct Net::Impl : public detail::NetImplBase
                         // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
                         // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
                         if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
-                            nextEltwiseLayer = Ptr<EltwiseLayer>();
+                            break;
                     }
 #endif
-                    if (!nextEltwiseLayer.empty() && nextData && nextData->inputBlobsId.size() == 2)
+
+                    if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0)
+                        break;
+                    if (nextData->inputBlobsId.size() != 2)
+                        break;
+
+                    if (IS_DNN_OPENCL_TARGET(preferableTarget))
+                    {
+                        if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
+                        {
+                            if (nextData->params.has("coeff"))
+                            {
+                                DictValue paramCoeff = nextData->params.get("coeff");
+                                int n = paramCoeff.size();
+                                bool isCoeffOneOne = (n == 2);
+                                for (int i = 0; isCoeffOneOne && i < n; i++)
+                                {
+                                    float c = paramCoeff.get<float>(i);
+                                    isCoeffOneOne &= (c == 1.0f);
+                                }
+                                if (!isCoeffOneOne)
+                                {
+                                    CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
+                                    break;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
+                            break;
+                        }
+                    }
+
                     {
                         LayerData *eltwiseData = nextData;
 
@@ -2732,11 +2783,13 @@ struct Net::Impl : public detail::NetImplBase
                                 // we need to check them separately; hence, the fuse variables
                                 bool fuse_eltwise = false, fuse_activation = false;
 
+                                Ptr<PowerLayer> activ_power;
                                 if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() &&
                                     nextData &&
                                     (!nextData->type.compare("ReLU") ||
                                      !nextData->type.compare("ChannelsPReLU") ||
-                                     !nextData->type.compare("Power")) &&
+                                     (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast<PowerLayer>()) && activ_power->scale == 1.0f)
+                                    ) &&
                                     currLayer->setActivation(nextFusabeleActivLayer))
                                 {
                                     fuse_eltwise = true;
@@ -2868,6 +2921,8 @@ struct Net::Impl : public detail::NetImplBase
                             }
                         }
                     }
+
+                    break;
                 }
             }
 
@@ -3107,11 +3162,11 @@ struct Net::Impl : public detail::NetImplBase
 
         Ptr<Layer> layer = ld.layerInstance;
 
-        TickMeter tm;
-        tm.start();
-
         if( !ld.skip )
         {
+            TickMeter tm;
+            tm.start();
+
             std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
             if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
             {
@@ -3320,12 +3375,15 @@ struct Net::Impl : public detail::NetImplBase
                     CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
                 }
             }
+
+            tm.stop();
+            int64 t = tm.getTimeTicks();
+            layersTimings[ld.id] = (t > 0) ? t : t + 1;  // zero for skipped layers only
         }
         else
-            tm.reset();
-
-        tm.stop();
-        layersTimings[ld.id] = tm.getTimeTicks();
+        {
+            layersTimings[ld.id] = 0;
+        }
 
         ld.flag = 1;
     }
@@ -3485,6 +3543,46 @@ struct Net::Impl : public detail::NetImplBase
         shapes = inOutShapes[layerId];
     }
 
+    void updateLayersShapes()
+    {
+        CV_Assert(!layers[0].outputBlobs.empty());
+        ShapesVec inputShapes;
+        for(int i = 0; i < layers[0].outputBlobs.size(); i++)
+        {
+            Mat& inp = layers[0].outputBlobs[i];
+            CV_Assert(inp.total());
+            if (preferableBackend == DNN_BACKEND_OPENCV &&
+                preferableTarget == DNN_TARGET_OPENCL_FP16)
+            {
+                layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
+            }
+            inputShapes.push_back(shape(inp));
+        }
+        LayersShapesMap layersShapes;
+        layersShapes[0].in = inputShapes;
+        for (MapIdToLayerData::iterator it = layers.begin();
+             it != layers.end(); it++)
+        {
+            int layerId = it->first;
+            std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
+            if (layersShapes[layerId].in.empty())
+            {
+                for(int i = 0; i < inputLayerIds.size(); i++)
+                {
+                    int inputLayerId = inputLayerIds[i].lid;
+                    LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
+                    if(inputIt == layersShapes.end() || inputIt->second.out.empty())
+                    {
+                        getLayerShapesRecursively(inputLayerId, layersShapes);
+                    }
+                    const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
+                    layersShapes[layerId].in.push_back(shape);
+                }
+                it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in);
+            }
+        }
+    }
+
     LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
     {
         return *std::max_element(pins.begin(), pins.end());
@@ -3898,6 +3996,8 @@ int Net::addLayer(const String &name, const String &type, LayerParams &params)
     int id = ++impl->lastLayerId;
     impl->layerNameToId.insert(std::make_pair(name, id));
     impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
+    if (params.get<bool>("has_dynamic_shapes", false))
+        impl->hasDynamicShapes = true;
 
     return id;
 }
@@ -4229,8 +4329,13 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
     bool oldShape = prevShape == blobShape;
 
     blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
-    if (!oldShape)
+    if (!oldShape) {
         ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
+        if (impl->hasDynamicShapes)
+        {
+            impl->updateLayersShapes();
+        }
+    }
 
     if (!ld.outputBlobsWrappers[pin.oid].empty())
     {
@@ -4501,6 +4606,7 @@ string Net::Impl::dump()
             case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break;
             case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break;
             case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break;
+            case DNN_TARGET_HDDL: out << "HDDL"; colorId = 8; break;
             case DNN_TARGET_VULKAN: out << "VULKAN"; colorId = 7; break;
             case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break;
             case DNN_TARGET_CUDA: out << "CUDA"; colorId = 5; break;
@@ -5179,6 +5285,10 @@ bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
     return false;
 }
 
+bool Layer::updateMemoryShapes(const std::vector<MatShape> &inputs)
+{
+    return true;
+}
 //////////////////////////////////////////////////////////////////////////
 
 static Mutex& getLayerFactoryMutex()
diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp
index 84b984ac97..c646c1fe3a 100644
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -556,6 +556,9 @@ void InfEngineNgraphNet::init(Target targetId)
         case DNN_TARGET_MYRIAD:
             device_name = "MYRIAD";
             break;
+        case DNN_TARGET_HDDL:
+            device_name = "HDDL";
+            break;
         case DNN_TARGET_FPGA:
             device_name = "FPGA";
             break;
@@ -683,7 +686,7 @@ void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net)
 #endif
         }
         std::map<std::string, std::string> config;
-        if (device_name == "MYRIAD") {
+        if (device_name == "MYRIAD" || device_name == "HDDL") {
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
             config.emplace("MYRIAD_DETECT_NETWORK_BATCH", CONFIG_VALUE(NO));
 #else
diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp
index 5acdc3fa1e..5f93b45886 100644
--- a/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@@ -125,7 +125,7 @@ public:
 
         InferenceEngine::Builder::Layer ieLayer(name);
         ieLayer.setName(name);
-        if (preferableTarget == DNN_TARGET_MYRIAD)
+        if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
         {
             ieLayer.setType("Copy");
         }
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 023c8b40d8..02495f45ea 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -48,6 +48,8 @@
 #include "../ie_ngraph.hpp"
 #include "../op_vkcom.hpp"
 
+#include <opencv2/core/utils/logger.hpp>
+
 #include "opencv2/core/hal/hal.hpp"
 #include "opencv2/core/hal/intrin.hpp"
 #include <iostream>
@@ -119,17 +121,22 @@ public:
         MatSize weightShape = blobs.empty() ? inputs[1].size : blobs[0].size;
 
         CV_Assert(inputs[0].dims == outputs[0].dims);
+        if (weightShape.dims() == 3)
+        {
+            kernel_size.assign(1, kernel_size[0]);
+            strides.assign(1, strides[0]);
+        }
         CV_Assert(weightShape.dims() == kernel_size.size() + 2);
         for (int i = 0; i < kernel_size.size(); i++) {
             CV_Assert(weightShape[i + 2] == kernel_size[i]);
         }
 
         const Mat &input = inputs[0];
-        CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S));
+        CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S));
         for (size_t i = 0; i < outputs.size(); i++)
         {
             CV_Assert(inputs[i].type() == input.type());
-            CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]);
+            CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]);
             for (int j = 0; j < inputs[i].dims; j++) {
                 CV_Assert(inputs[i].size[j] == input.size[j]);
             }
@@ -300,36 +307,41 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+        size_t ksize = kernel_size.size();
+#ifdef HAVE_CUDA
         if (backendId == DNN_BACKEND_CUDA)
         {
             /* only convolution 2d and 3d supported */
-            if(kernel_size.size() == 2 || kernel_size.size() == 3)
+            if (ksize == 2 || ksize == 3)
                 return true;
 
             return false;
         }
-
+#endif
 #ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
-            if (kernel_size.size() == 3)
+            if (ksize == 1)
+                return false;
+            if (ksize == 3)
                 return preferableTarget == DNN_TARGET_CPU;
-            if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableTarget != DNN_TARGET_MYRIAD) && blobs.empty())
+            bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
+            if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || !isMyriad) && blobs.empty())
                 return false;
-            return (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height);
+            return (!isMyriad || dilation.width == dilation.height);
         }
-        else
 #endif
-        {
-            if (kernel_size.size() == 3)
-                return (preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV);
-            else if (kernel_size.size() == 2)
-                return backendId == DNN_BACKEND_OPENCV ||
-                       (backendId == DNN_BACKEND_HALIDE && !blobs.empty()) ||
-                       (backendId == DNN_BACKEND_VKCOM && haveVulkan());
-            else
-                return false;
-        }
+        if (backendId == DNN_BACKEND_OPENCV)
+            return ksize >= 1 && ksize <= 3;
+#ifdef HAVE_HALIDE
+        if (backendId == DNN_BACKEND_HALIDE)
+            return ksize == 2 && !blobs.empty();
+#endif
+#ifdef HAVE_VULKAN
+        if (backendId == DNN_BACKEND_VKCOM)
+            return ksize == 2;
+#endif
+        return false;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -381,18 +393,27 @@ public:
         inputs_arr.getMatVector(inputs);
         // prepare weightsMat where each row is aligned and has enough zero padding on the right to
         // use vectorized (i.e. with intrinsics) loops without tail processing
-        Mat wm = blobs.empty() ? inputs[1].reshape(1, numOutput) : blobs[0].reshape(1, numOutput);
-        if( wm.step1() % VEC_ALIGN != 0 )
+        if (!blobs.empty())
         {
-            int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
-            Mat wm_buffer = Mat(numOutput, newcols, wm.type());
-            Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
-            wm_padding.setTo(Scalar::all(0.));
-            Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
-            wm.copyTo(wm_aligned);
-            wm = wm_aligned;
+            Mat wm = blobs[0].reshape(1, numOutput);
+            if( wm.step1() % VEC_ALIGN != 0 )
+            {
+                int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
+                Mat wm_buffer = Mat(numOutput, newcols, wm.type());
+                Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
+                wm_padding.setTo(Scalar::all(0.));
+                Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
+                wm.copyTo(wm_aligned);
+                wm = wm_aligned;
+            }
+            weightsMat = wm;
         }
-        weightsMat = wm;
+        else
+        {
+            // initialized in .forward()
+            weightsMat.release();
+        }
+
         weightsMultipliers.assign(numOutput, 1.0);
 
         Mat biasMat = hasBias() ? blobs[1].reshape(1, numOutput) : Mat();
@@ -436,6 +457,14 @@ public:
             Ptr<PowerLayer> activ_power = activ.dynamicCast<PowerLayer>();
             if (!activ_power.empty())
             {
+                if (activ_power->scale != 1.0f)  // not supported well by implementation, #17964
+                {
+                    // FIXIT no way to check number of blobs (like, eltwise input)
+                    CV_LOG_DEBUG(NULL, "DNN/OpenCL: can't configure Power activation (scale != 1.0f)");
+                    activ.release();
+                    newActiv = false;
+                    return false;
+                }
                 if (activ_power->scale != 1.f || activ_power->shift != 0.f)
                 {
                     const int outCh = blobs[0].size[0];
@@ -897,8 +926,11 @@ public:
         {
             size_t karea = std::accumulate(kernel_size.begin(), kernel_size.end(),
                                            1, std::multiplies<size_t>());
-            CV_Assert_N(
-                       (input.dims == 4 || input.dims == 5) && (input.dims == output.dims),
+            bool isConv1D = input.dims == 3;
+            bool isConv2D = input.dims == 4;
+            bool isConv3D = input.dims == 5;
+            CV_CheckEQ(static_cast<int>(kernel_size.size()), input.dims - 2, "");
+            CV_Assert_N(input.dims == output.dims,
                        input.size[0] == output.size[0],
                        weights.rows == output.size[1],
                        weights.cols == (input.size[1]/ngroups)*karea,
@@ -908,12 +940,15 @@ public:
                        input.isContinuous(),
                        output.isContinuous(),
                        biasvec.size() == (size_t)output.size[1]+2);
+            CV_Check(weights.step1(), weights.step1() % VEC_ALIGN == 0, "");
+            CV_CheckType(weights.type(), CV_32FC1, "");
             ParallelConv p;
 
             p.input_ = &input;
             p.weights_ = &weights;
             p.output_ = &output;
-            for( int i = 0; i < 4; i++ ) p.outShape[i] = output.size[i];
+            int max_ind = isConv1D? 3: 4;
+            for( int i = 0; i < max_ind; i++ ) p.outShape[i] = output.size[i];
             p.outShape[1] /= ngroups;
 
             p.kernel_size = kernel_size; p.strides = strides; p.dilations = dilations;
@@ -925,20 +960,19 @@ public:
             int inpCnAll = input.size[1];
             int depth = (input.dims == 5) ? input.size[2] : 1;
             int width = input.size[input.dims - 1];
-            int height = input.size[input.dims - 2];
+            int height = isConv1D? 1 : input.size[input.dims - 2];
             int inpCn = inpCnAll / ngroups;
 
-            bool isConv2D = kernel_size.size() == 2;
-
-            p.is1x1_ = isConv2D && kernel_size[0] == 1 && kernel_size[1] == 1 &&
-                       pads_begin[0] == 0  && pads_begin[1] == 0;
+            p.is1x1_ = (isConv2D && kernel_size[0] == 1 && kernel_size[1] == 1 &&
+                       pads_begin[0] == 0  && pads_begin[1] == 0) ||
+                       (isConv1D && pads_begin[0] == 0 && kernel_size[0] == 1);
 
             p.useAVX    = checkHardwareSupport(CPU_AVX)  && isConv2D;
             p.useAVX2   = checkHardwareSupport(CPU_AVX2) && isConv2D;
             p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX  && isConv2D;
 
-            int kernel_d = !isConv2D? kernel_size[0] : 1;
-            int kernel_h = kernel_size[kernel_size.size() - 2];
+            int kernel_d = isConv3D? kernel_size[0] : 1;
+            int kernel_h = isConv1D? 1 : kernel_size[kernel_size.size() - 2];
             int kernel_w = kernel_size.back();
 
             int blk_size_cn0 = cvCeil(800./(kernel_w*kernel_h));
@@ -948,14 +982,20 @@ public:
             ncn = std::min(ncn, inpCn);
             p.blk_size_cn = ncn;
 
-            int dil_d = !isConv2D? dilations[0] : 1;
-            int dil_h = dilations[dilations.size() - 2];
+            int dil_d = isConv3D? dilations[0] : 1;
+            int dil_h = isConv1D? 1 : dilations[dilations.size() - 2];
             int dil_w = dilations.back();
 
             p.ofstab_.resize(karea * ncn);
             int* ofstab = &p.ofstab_[0];
 
-            if (isConv2D)
+            if (isConv1D)
+            {
+                for( int k = 0; k < ncn; k++ )
+                    for( int k_c = 0; k_c < kernel_w; k_c++ )
+                        ofstab[k*kernel_w + k_c] = k*width + k_c*dil_w;
+            }
+            else if (isConv2D)
             {
                 for( int k = 0; k < ncn; k++ )
                     for( int k_r = 0; k_r < kernel_h; k_r++ )
@@ -984,34 +1024,36 @@ public:
         {
             const int valign = ConvolutionLayerImpl::VEC_ALIGN;
             int ngroups = ngroups_, batchSize = input_->size[0]*ngroups;
+            bool isConv1D = input_->dims == 3;
             bool isConv2D = input_->dims == 4;
+            bool isConv3D = input_->dims == 5;
 
             int outW = output_->size[output_->dims - 1];
-            int outH = output_->size[output_->dims - 2];
+            int outH = isConv1D? 1 : output_->size[output_->dims - 2];
             int outCn = output_->size[1]/ngroups;
 
-            int depth = !isConv2D? input_->size[2] : 1;
-            int height = input_->size[input_->dims - 2];
+            int depth = isConv3D? input_->size[2] : 1;
+            int height = isConv1D? 1 : input_->size[input_->dims - 2];
             int width = input_->size[input_->dims - 1];
             int inpCn = input_->size[1]/ngroups;
 
             const int nstripes = nstripes_;
 
-            int kernel_d = !isConv2D? kernel_size[0] : 1;
-            int kernel_h = kernel_size[kernel_size.size() - 2];
+            int kernel_d = isConv3D? kernel_size[0] : 1;
+            int kernel_h = isConv1D? 1 : kernel_size[kernel_size.size() - 2];
             int kernel_w = kernel_size.back();
             int karea = kernel_w*kernel_h*kernel_d;
 
-            int pad_d = !isConv2D? pads_begin[0] : 0;
-            int pad_t = pads_begin[pads_begin.size() - 2];
+            int pad_d = isConv3D? pads_begin[0] : 0;
+            int pad_t = isConv1D? 0 : pads_begin[pads_begin.size() - 2];
             int pad_l = pads_begin.back();
 
-            int stride_d = !isConv2D? strides[0] : 0;
-            int stride_h = strides[strides.size() - 2];
+            int stride_d = isConv3D? strides[0] : 0;
+            int stride_h = isConv1D? 0 : strides[strides.size() - 2];
             int stride_w = strides.back();
 
-            int dilation_d = !isConv2D? dilations[0] : 1;
-            int dilation_h = dilations[dilations.size() - 2];
+            int dilation_d = isConv3D? dilations[0] : 1;
+            int dilation_h = isConv1D? 1 : dilations[dilations.size() - 2];
             int dilation_w = dilations.back();
 
             int i, j, k, d;
@@ -1251,7 +1293,71 @@ public:
                         // do im2row for a part of input tensor
                         float* rowbuf = rowbuf0;
 
-                        if (isConv2D)
+                        if (isConv1D)
+                        {
+                            for( ofs = ofs0; ofs < ofs1; out_j = 0, ++out_i )
+                            {
+                                int delta = std::min(ofs1 - ofs, outW - out_j);
+                                int out_j1 = out_j + delta;
+
+                                int in_j = out_j * stride_w - pad_l;
+                                const float* imgptr = data_inp0 + cn0*width + in_j;
+                                ofs += delta;
+
+                                // do im2row for a part of input tensor
+                                if( is1x1 )
+                                {
+                                    for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w )
+                                    {
+                                        for( k = 0; k < vsz; k++ )
+                                            rowbuf[k] = imgptr[k*inpPlaneSize];
+                                    }
+                                }
+                                else
+                                {
+                                    for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w, in_j += stride_w )
+                                    {
+                                        // this condition should be true for most of the tensor elements, i.e.
+                                        // most of the time the kernel aperture is inside the tensor X-Y plane.
+                                        if( out_j + 2 <= out_j1 && 0 <= in_j && in_j + stride_w*2 <= width - (kernel_w-1)*dilation_w )
+                                        {
+                                            for( k = 0; k < vsz; k++ )
+                                            {
+                                                int k1 = ofstab[k];
+                                                float v0 = imgptr[k1];
+                                                float v1 = imgptr[k1 + stride_w];
+                                                rowbuf[k] = v0;
+                                                rowbuf[k+vsz_a] = v1;
+                                            }
+                                            out_j++;
+                                            rowbuf += vsz_a;
+                                            imgptr += stride_w;
+                                            in_j += stride_w;
+                                        }
+                                        else
+                                        {
+                                            int i0 = std::max(0, (-in_j + dilation_w-1)/dilation_w);
+                                            int i1 = std::min(kernel_w, (width - in_j + dilation_w-1)/dilation_w);
+
+                                            // here some non-continuous sub-row of the row will not be
+                                            // filled from the tensor; we need to make sure that the uncovered
+                                            // elements are explicitly set to 0's. the easiest way is to
+                                            // set all the elements to 0's before the loop.
+                                            memset(rowbuf, 0, vsz*sizeof(rowbuf[0]));
+                                            for( k = 0; k < ncn; k++ )
+                                            {
+                                                for( i = i0; i < i1; i++ )
+                                                {
+                                                    int imgofs = k*width + i*dilation_w;
+                                                    rowbuf[k*kernel_w + i] = imgptr[imgofs];
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        else if (isConv2D)
                         {
                             if( is1x1 && stride_w == 1 && stride_h == 1 )
                             {
@@ -1484,9 +1590,12 @@ public:
                                             vs12 = v_setzero_f32(), vs13 = v_setzero_f32();
                                 for( k = 0; k < vsz; k += 4, rptr += 4 )
                                 {
-                                    v_float32x4 w0 = v_load_aligned(wptr0 + k), w1 = v_load_aligned(wptr1 + k);
-                                    v_float32x4 r0 = v_load_aligned(rptr), r1 = v_load_aligned(rptr + vsz_a),
-                                                r2 = v_load_aligned(rptr + vsz_a*2), r3 = v_load_aligned(rptr + vsz_a*3);
+                                    v_float32x4 w0 = v_load_aligned(wptr0 + k);
+                                    v_float32x4 w1 = v_load_aligned(wptr1 + k);
+                                    v_float32x4 r0 = v_load_aligned(rptr);
+                                    v_float32x4 r1 = v_load_aligned(rptr + vsz_a);
+                                    v_float32x4 r2 = v_load_aligned(rptr + vsz_a*2);
+                                    v_float32x4 r3 = v_load_aligned(rptr + vsz_a*3);
 
                                     vs00 += w0*r0;
                                     vs01 += w0*r1;
@@ -1556,6 +1665,12 @@ public:
 #ifdef HAVE_OPENCL
     bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
     {
+        if (kernel_size.size() != 2)
+        {
+            // no OpenCL optimizations, see .supportedBacked()
+            return false;
+        }
+
         std::vector<UMat> inputs;
         std::vector<UMat> outputs;
 
@@ -1739,26 +1854,35 @@ public:
         if (blobs.empty())
         {
             Mat wm = inputs[1].reshape(1, outCn);
-            if( wm.step1() % VEC_ALIGN != 0 )
+            if (wm.data != weightsMat.data)
             {
-                wm.copyTo(weightsMat);
+                int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
+                Mat wm_buffer = Mat(numOutput, newcols, wm.type());
+                Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
+                wm_padding.setTo(Scalar::all(0.));
+                weightsMat = wm_buffer.colRange(0, wm.cols);
+
+                wm.copyTo((const Mat&)weightsMat);
                 if (inputs.size() > 2)
                 {
                     Mat biasMat = inputs[2].reshape(1, outCn);
                     biasMat.col(0).copyTo(biasvec);
-                    biasvec.resize(outCn + 2);
-                }
-                else
-                {
-                    biasvec.resize(outCn + 2, 0);
                 }
+                biasvec.resize(outCn + 2, 0);
             }
         }
-
-        /*printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n",
-               name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3],
-               kernel.width, kernel.height, pad.width, pad.height,
-               stride.width, stride.height, dilation.width, dilation.height);*/
+        /*if (inputs[0].dims > 3) {
+            printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n",
+                   name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3],
+                   kernel.width, kernel.height, pad.width, pad.height,
+                   stride.width, stride.height, dilation.width, dilation.height);
+        }
+        else {
+            printf("conv %s: input (%d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n",
+                   name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2],
+                   kernel.width, kernel.height, pad.width, pad.height,
+                   stride.width, stride.height, dilation.width, dilation.height);
+        }*/
         int inpGroupCn = blobs.empty() ? inputs[1].size[1] : blobs[0].size[1];
         CV_Assert_N(inputs.size() >= (size_t)1, inputs[0].size[1] % inpGroupCn == 0,
                     outputs.size() == 1, inputs[0].data != outputs[0].data);
diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp
index 4a096ce19c..db986bc897 100644
--- a/modules/dnn/src/layers/mvn_layer.cpp
+++ b/modules/dnn/src/layers/mvn_layer.cpp
@@ -126,7 +126,10 @@ public:
     {
 #ifdef HAVE_DNN_IE_NN_BUILDER_2019
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-            return !zeroDev && (preferableTarget != DNN_TARGET_MYRIAD || eps <= 1e-7f);
+        {
+            bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
+            return !zeroDev && (!isMyriad || eps <= 1e-7f);
+        }
 #endif
 #ifdef HAVE_DNN_NGRAPH
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp
index 3d33511d17..a979fdedb6 100644
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@ -75,7 +75,8 @@ public:
             if (pnorm != 2)
                 return false;
 
-            if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && preferableTarget == DNN_TARGET_MYRIAD)
+            bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
+            if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && isMyriad)
                 return !acrossSpatial;
 
             return startAxis == 1;
diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp
index c83cf026de..b286133419 100644
--- a/modules/dnn/src/layers/padding_layer.cpp
+++ b/modules/dnn/src/layers/padding_layer.cpp
@@ -103,9 +103,12 @@ public:
     {
 #ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
             return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
-                   (preferableTarget != DNN_TARGET_MYRIAD ||
+                   (!isMyriad ||
                     (dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0));
+        }
 #endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 0e07e5352e..621315a572 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -95,8 +95,9 @@ public:
         computeMaxIdx = true;
         globalPooling = false;
         isGlobalPooling = std::vector<bool>(3, false);
-        stride = Size(1, 1);
-        pad_t = pad_l = pad_b = pad_r = 0;
+
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
 
         if (params.has("pool") || params.has("kernel_size") ||
             params.has("kernel_w") || params.has("kernel_h"))
@@ -115,16 +116,6 @@ public:
 
             getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode);
             globalPooling = isGlobalPooling[0] || isGlobalPooling[1] || isGlobalPooling[2];
-            if (kernel_size.size() == 2) {
-                kernel = Size(kernel_size[1], kernel_size[0]);
-                stride = Size(strides[1], strides[0]);
-                pad = Size(pads_begin[1], pads_begin[0]);
-
-                pad_t = pads_begin[0];
-                pad_l = pads_begin[1];
-                pad_b = pads_end[0];
-                pad_r = pads_end[1];
-            }
         }
         else if (params.has("pooled_w") || params.has("pooled_h"))
         {
@@ -172,17 +163,20 @@ public:
                 finalKernel.push_back(isGlobalPooling[idx] ? inp[i] : kernel_size[idx]);
              }
              kernel_size = finalKernel;
-             kernel = Size(kernel_size[1], kernel_size[0]);
          }
 
         getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end);
-        if (pads_begin.size() == 2) {
-            pad_t = pads_begin[0];
-            pad_l = pads_begin[1];
-            pad_b = pads_end[0];
-            pad_r = pads_end[1];
+
+        if (inputs[0].dims == 3)
+        {
+            //Pool1D
+            kernel_size.erase(kernel_size.begin() + 1);
+            strides.erase(strides.begin() + 1);
+            pads_begin.erase(pads_begin.begin() + 1);
+            pads_end.erase(pads_end.begin() + 1);
         }
 
+
 #ifdef HAVE_OPENCL
         poolOp.release();
 #endif
@@ -202,9 +196,11 @@ public:
                 return false;
             if (kernel_size.size() == 3)
                 return preferableTarget == DNN_TARGET_CPU;
-            if (preferableTarget == DNN_TARGET_MYRIAD) {
+            if (kernel_size.size() == 1)
+                return false;
+            if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) {
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-                if (type == MAX && (pad_l == 1 && pad_t == 1) && stride == Size(2, 2) ) {
+                if (type == MAX && (pads_begin[1] == 1 && pads_begin[0] == 1) && (strides[0] == 2 && strides[1] == 2)) {
                     return !isMyriadX();
                 }
 #endif
@@ -216,21 +212,30 @@ public:
 #endif
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
-            return !computeMaxIdx && type != STOCHASTIC;
+            return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1;
         }
-        if (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_VKCOM)
+        else if (backendId == DNN_BACKEND_OPENCV)
         {
             if (kernel_size.size() == 3)
-                return (backendId == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU);
-            if (kernel_size.empty() || kernel_size.size() == 2)
-                return backendId == DNN_BACKEND_OPENCV ||
-                       (backendId == DNN_BACKEND_HALIDE && haveHalide() &&
-                           (type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r))) ||
-                       (backendId == DNN_BACKEND_VKCOM && haveVulkan() &&
-                           (type == MAX || type == AVE));
+                return preferableTarget == DNN_TARGET_CPU;
+            if (kernel_size.size() <= 2)
+                return true;
             else
                 return false;
         }
+        else if (backendId == DNN_BACKEND_HALIDE)
+        {
+            if (kernel_size.empty() || kernel_size.size() == 2)
+                return haveHalide() &&
+                       (type == MAX || (type == AVE && !pads_begin[0] && !pads_begin[1] && !pads_end[0] && !pads_end[1]));
+        }
+        else if (backendId == DNN_BACKEND_VKCOM)
+        {
+            if (kernel_size.empty() || kernel_size.size() == 2)
+                return haveVulkan() &&
+                           (type == MAX || type == AVE);
+            return false;
+        }
         return false;
     }
 
@@ -250,12 +255,25 @@ public:
 
             config.in_shape = shape(inputs[0]);
             config.out_shape = shape(outputs[0]);
-            config.kernel = kernel;
-            config.pad_l = pad_l;
-            config.pad_t = pad_t;
-            config.pad_r = pad_r;
-            config.pad_b = pad_b;
-            config.stride = stride;
+            if (inputs[0].dims == 3)
+            {
+                //Pool1D
+                config.kernel = Size(kernel_size[0], 1);
+                config.stride = Size(strides[0], 1);
+                config.pad_l = pads_begin[0];
+                config.pad_t = 0;
+                config.pad_r = pads_end[0];
+                config.pad_b = 0;
+            }
+            else
+            {
+                config.kernel = Size(kernel_size[1], kernel_size[0]);
+                config.stride = Size(strides[1], strides[0]);
+                config.pad_l = pads_begin[1];
+                config.pad_t = pads_begin[0];
+                config.pad_r = pads_end[1];
+                config.pad_b = pads_end[0];
+            }
             config.channels = inputs[0].size[1];
             config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
                                 (type == AVE ? LIBDNN_POOLING_METHOD_AVE :
@@ -572,7 +590,6 @@ public:
     public:
         const Mat* src, *rois;
         Mat *dst, *mask;
-        Size kernel, stride;
         int pad_l, pad_t, pad_r, pad_b;
         bool avePoolPaddedArea;
         int nstripes;
@@ -598,7 +615,7 @@ public:
             CV_Assert_N(
                       src.isContinuous(), dst.isContinuous(),
                       src.type() == CV_32F, src.type() == dst.type(),
-                      src.dims == 4 || src.dims == 5, dst.dims == 4 || dst.dims == 5,
+                      src.dims == 3 || src.dims == 4 || src.dims == 5, dst.dims == 3 || dst.dims == 4 || dst.dims == 5,
                       (((poolingType == ROI || poolingType == PSROI) &&
                       dst.size[0] == rois.size[0]) || src.size[0] == dst.size[0]),
                       poolingType == PSROI || src.size[1] == dst.size[1],
@@ -606,6 +623,9 @@ public:
 
             PoolingInvoker p;
 
+            bool isPool1D = src.dims == 3;
+            bool isPool3D = src.dims == 5;
+
             p.src = &src;
             p.rois = &rois;
             p.dst = &dst;
@@ -616,12 +636,10 @@ public:
             p.pads_end = pads_end;
 
             p.mask = &mask;
-            p.kernel = Size(kernel_size[1], kernel_size[0]);
-            p.stride = Size(strides[1], strides[0]);
             p.pad_l = pads_begin.back();
-            p.pad_t = pads_begin[pads_begin.size() - 2];
+            p.pad_t = isPool1D ? 0 : pads_begin[pads_begin.size() - 2];
             p.pad_r = pads_end.back();
-            p.pad_b = pads_end[pads_end.size() - 2];
+            p.pad_b = isPool1D ? 0 : pads_end[pads_end.size() - 2];
 
             p.avePoolPaddedArea = avePoolPaddedArea;
             p.nstripes = nstripes;
@@ -631,11 +649,11 @@ public:
 
             if( !computeMaxIdx )
             {
-                int height = src.size[src.dims - 2];
+                int height = isPool1D ? 1 : src.size[src.dims - 2];
                 int width = src.size[src.dims - 1];
 
-                int kernel_d = (kernel_size.size() == 3) ? kernel_size[0] : 1;
-                int kernel_h = kernel_size[kernel_size.size() - 2];
+                int kernel_d = isPool3D ? kernel_size[0] : 1;
+                int kernel_h = isPool1D ? 1 : kernel_size[kernel_size.size() - 2];
                 int kernel_w = kernel_size.back();
 
                 p.ofsbuf.resize(kernel_d * kernel_h * kernel_w);
@@ -655,13 +673,15 @@ public:
         {
             int channels = dst->size[1];
 
+            bool isPool3D = src->dims == 5;
             bool isPool2D = src->dims == 4;
-            int depth = !isPool2D? dst->size[2] : 1;
-            int height = dst->size[dst->dims - 2];
+            bool isPool1D = src->dims == 3;
+            int depth = isPool3D? dst->size[2] : 1;
+            int height = isPool1D? 1 : dst->size[dst->dims - 2];
             int width = dst->size[dst->dims - 1];
 
-            int inp_depth = !isPool2D? src->size[2] : 1;
-            int inp_height = src->size[src->dims - 2];
+            int inp_depth = isPool3D? src->size[2] : 1;
+            int inp_height = isPool1D? 1 : src->size[src->dims - 2];
             int inp_width = src->size[src->dims - 1];
 
             size_t total = dst->total();
@@ -669,12 +689,12 @@ public:
             size_t stripeStart = r.start*stripeSize;
             size_t stripeEnd = std::min(r.end*stripeSize, total);
 
-            int kernel_d = !isPool2D? kernel_size[0] : 1;
-            int kernel_h = kernel_size[kernel_size.size() - 2];
+            int kernel_d = isPool3D? kernel_size[0] : 1;
+            int kernel_h = isPool1D? 1 : kernel_size[kernel_size.size() - 2];
             int kernel_w = kernel_size.back();
 
-            int stride_d = !isPool2D? strides[0] : 0;
-            int stride_h = strides[strides.size() - 2];
+            int stride_d = isPool3D? strides[0] : 0;
+            int stride_h = isPool1D? 1 :strides[strides.size() - 2];
             int stride_w = strides.back();
             bool compMaxIdx = computeMaxIdx;
 
@@ -865,7 +885,24 @@ public:
                             }
                         }
                         else
+#else
+                        CV_UNUSED(isPool2D);
 #endif
+                        if( isPool1D )
+                        {
+                            const float* first = srcData + xstart;
+                            const float* last = srcData + xend;
+                            const float* max_elem = std::max_element(first, last);
+                            if (max_elem!=last)
+                            {
+                                dstData[x0] = *max_elem;
+                                if( compMaxIdx )
+                                {
+                                    dstMaskData[x0] = std::distance(first, max_elem);
+                                }
+                            }
+                        }
+                        else
                         {
                             float max_val = -FLT_MAX;
                             if( compMaxIdx )
@@ -939,6 +976,14 @@ public:
                         }
                         else
 #endif
+                        if( isPool1D )
+                        {
+                            const float* first = srcData + xstart;
+                            const float* last = srcData + xend;
+                            float sum_val = std::accumulate(first, last, 0.f);
+                            dstData[x0] = sum_val*inv_kernel_area;
+                        }
+                        else
                         {
                             float sum_val = 0.f;
                             for (int d = dstart; d < dend; ++d) {
@@ -1052,20 +1097,26 @@ public:
         Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
         const int inWidth = inputBuffer.width();
         const int inHeight = inputBuffer.height();
+        const size_t kernelHeight = kernel_size[0];
+        const size_t kernelWidth = kernel_size[1];
+        const size_t strideHeight = strides[0];
+        const size_t strideWidth = strides[1];
+        const size_t paddingTop = pads_begin[0];
+        const size_t paddingLeft = pads_begin[1];
 
         Halide::Var x("x"), y("y"), c("c"), n("n");
         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
-        Halide::RDom r(0, kernel.width, 0, kernel.height);
+        Halide::RDom r(0, kernelWidth, 0, kernelHeight);
         Halide::Expr kx, ky;
-        if(pad_l || pad_t)
+        if(paddingLeft || paddingTop)
         {
-            kx = clamp(x * stride.width + r.x - pad_l, 0, inWidth - 1);
-            ky = clamp(y * stride.height + r.y - pad_t, 0, inHeight - 1);
+            kx = clamp(x * strideWidth + r.x - paddingLeft, 0, inWidth - 1);
+            ky = clamp(y * strideHeight + r.y - paddingTop, 0, inHeight - 1);
         }
         else
         {
-            kx = min(x * stride.width + r.x, inWidth - 1);
-            ky = min(y * stride.height + r.y, inHeight - 1);
+            kx = min(x * strideWidth + r.x, inWidth - 1);
+            ky = min(y * strideHeight + r.y, inHeight - 1);
         }
 
         // Halide::argmax returns tuple (r.x, r.y, max).
@@ -1073,17 +1124,17 @@ public:
 
         // Compute offset from argmax in range [0, kernel_size).
         Halide::Expr max_index;
-        if(pad_l || pad_t)
+        if(paddingLeft || paddingTop)
         {
-            max_index = clamp(y * stride.height + res[1] - pad_t,
+            max_index = clamp(y * strideHeight + res[1] - paddingTop,
                               0, inHeight - 1) * inWidth +
-                        clamp(x * stride.width + res[0] - pad_l,
+                        clamp(x * strideWidth + res[0] - paddingLeft,
                               0, inWidth - 1);
         }
         else
         {
-            max_index = min(y * stride.height + res[1], inHeight - 1) * inWidth +
-                        min(x * stride.width + res[0], inWidth - 1);
+            max_index = min(y * strideHeight + res[1], inHeight - 1) * inWidth +
+                        min(x * strideWidth + res[0], inWidth - 1);
         }
         top(x, y, c, n) = { res[2], Halide::cast<float>(max_index) };
         return Ptr<BackendNode>(new HalideBackendNode(top));
@@ -1097,21 +1148,25 @@ public:
         Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
 
         const int inW = inputBuffer.width(), inH = inputBuffer.height();
-        if ((inW - kernel.width) % stride.width || (inH - kernel.height) % stride.height)
+        const size_t kernelHeight = kernel_size[0];
+        const size_t kernelWidth = kernel_size[1];
+        const size_t strideHeight = strides[0];
+        const size_t strideWidth = strides[1];
+        if ((inW - kernelWidth) % strideWidth || (inH - kernelHeight) % strideHeight)
         {
             CV_Error(cv::Error::StsNotImplemented,
                      "Halide backend for average pooling with partial "
                      "kernels is not implemented");
         }
 
-        const float norm = 1.0f / (kernel.width * kernel.height);
+        const float norm = 1.0f / (kernelWidth * kernelHeight);
 
         Halide::Var x("x"), y("y"), c("c"), n("n");
         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
-        Halide::RDom r(0, kernel.width, 0, kernel.height);
+        Halide::RDom r(0, kernelWidth, 0, kernelHeight);
         top(x, y, c, n) = sum(
-            inputBuffer(x * stride.width + r.x,
-                        y * stride.height + r.y, c, n)) * norm;
+            inputBuffer(x * strideWidth + r.x,
+                        y * strideHeight + r.y, c, n)) * norm;
         return Ptr<BackendNode>(new HalideBackendNode(top));
 #endif  // HAVE_HALIDE
         return Ptr<BackendNode>();
@@ -1173,6 +1228,7 @@ public:
     {
         CV_Assert(inputs.size() != 0);
 
+        bool isPool1D = inputs[0].size() == 3;
         std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
         std::vector<int> outShape(inputs[0].begin(), inputs[0].begin() + 2);
 
@@ -1191,25 +1247,34 @@ public:
             outShape.push_back(pooledSize.height);
             outShape.push_back(pooledSize.width);
         }
-        else if (padMode.empty())
-        {
-            for (int i = 0; i < local_kernel.size(); i++) {
-                float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
-                outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
-            }
-
-            // If we have padding, ensure that the last pooling starts strictly
-            // inside the image (instead of at the padding); otherwise clip the last.
-            for (int i = 0; i < pads_end.size(); i++) {
-                if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
-                    --outShape[2 + i];
-                    CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
-                }
-            }
-        }
         else
         {
-            getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector<size_t>(local_kernel.size(), 1), outShape);
+            if (hasDynamicShapes && !shapesInitialized)
+            {
+                //Just copy input shapes for width and height to prevent errors on loading stage
+                for (int i = 0; i < inpShape.size(); i++)
+                    outShape.push_back(inpShape[i]);
+            }
+            else if (padMode.empty())
+            {
+                int addedDims = isPool1D? inpShape.size() : local_kernel.size();
+                for (int i = 0; i < addedDims; i++) {
+                    float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
+                    outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
+                }
+
+                // If we have padding, ensure that the last pooling starts strictly
+                // inside the image (instead of at the padding); otherwise clip the last.
+                for (int i = 0; i < addedDims; i++) {
+                    if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
+                        --outShape[2 + i];
+                        CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
+                    }
+                }
+            } else {
+                getConvPoolOutParams(inpShape, local_kernel, strides, padMode,
+                                     std::vector<size_t>(local_kernel.size(), 1), outShape);
+            }
         }
         if (type == ROI)
         {
@@ -1231,12 +1296,21 @@ public:
         return false;
     }
 
+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        int dims = inputs[0].size();
+        CV_Assert(inputs[0][dims - 1] > 0 && inputs[0][dims - 2] > 0);
+        shapesInitialized = true;
+        return true;
+    }
+
     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
     {
         CV_UNUSED(inputs); // suppress unused variable warning
         long flops = 0;
-        size_t karea = std::accumulate(kernel_size.begin(), kernel_size.end(),
+        bool isPool1D = inputs[0].size() == 3;
+        size_t karea = std::accumulate(kernel_size.begin(), isPool1D? kernel_size.begin() + 1 : kernel_size.end(),
                                     1, std::multiplies<size_t>());
         for(int i = 0; i < outputs.size(); i++)
         {
@@ -1262,6 +1336,8 @@ private:
         ROI,   // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
         PSROI  // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
     };
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };
 
 Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp
index dc1364a06b..f7340b1e67 100644
--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@@ -607,7 +607,7 @@ public:
 
             auto priorBox = std::make_shared<ngraph::op::PriorBoxClustered>(slice_layer, slice_image, attrs);
             auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
-            auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(priorBox, axis);
+            auto unsqueeze = std::make_shared<ngraph::op::v0::Unsqueeze>(priorBox, axis);
             return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
         }
         else
@@ -628,7 +628,7 @@ public:
 
             auto priorBox = std::make_shared<ngraph::op::PriorBox>(slice_layer, slice_image, attrs);
             auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
-            auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(priorBox, axis);
+            auto unsqueeze = std::make_shared<ngraph::op::v0::Unsqueeze>(priorBox, axis);
             return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
         }
     }
diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp
index 990cfeda30..4658e7b41f 100644
--- a/modules/dnn/src/layers/proposal_layer.cpp
+++ b/modules/dnn/src/layers/proposal_layer.cpp
@@ -95,8 +95,14 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_OPENCV ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && preferableTarget != DNN_TARGET_MYRIAD);
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
+            return !isMyriad;
+        }
+#endif
+        return backendId == DNN_BACKEND_OPENCV;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp
index dbea5d55f4..4c603c1ac8 100644
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@@ -170,6 +170,9 @@ public:
         setParamsFrom(params);
         int axis = params.get<int>("axis", 0);
         int numAxes = params.get<int>("num_axes", -1);
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
+
         CV_Assert(numAxes >= -1);
         newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
 
@@ -182,6 +185,25 @@ public:
             for (i = 0; i < dims; i++)
                 newShapeDesc[i] = paramShape.get<int>(i);
         }
+        if (hasDynamicShapes)
+        {
+            dynamicShapes.clear();
+            inputIndices.clear();
+            if (params.has("dynamic_axes")) {
+                CV_Assert(params.has("input_indices"));
+                const DictValue &dynamicAxes = params.get("dynamic_axes");
+                const DictValue &dynamicInputShapes = params.get("input_indices");
+                int i, dims = dynamicAxes.size();
+                CV_Assert(dims == dynamicInputShapes.size());
+                CV_Assert(dims > 0);
+                dynamicShapes.resize(dims);
+                inputIndices.resize(dims);
+                for (i = 0; i < dims; i++) {
+                    dynamicShapes[i] = dynamicAxes.get<int>(i);
+                    inputIndices[i] = dynamicInputShapes.get<int>(i);
+                }
+            }
+        }
     }
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
@@ -196,13 +218,21 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const CV_OVERRIDE
     {
+
         if (inputs.size() == 1 || inputs.size() == requiredOutputs)
         {
             outputs.clear();
             for (size_t i = 0; i < inputs.size(); i++)
             {
-                outputs.push_back(MatShape());
-                computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
+                if (hasDynamicShapes && !shapesInitialized)
+                {
+                    outputs.push_back(newShapeDesc);
+                }
+                else
+                {
+                    outputs.push_back(MatShape());
+                    computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
+                }
             }
         }
         else
@@ -213,6 +243,19 @@ public:
         return true;
     }
 
+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        if (hasDynamicShapes)
+        {
+            for (int i = 0; i < dynamicShapes.size(); ++i)
+            {
+                newShapeDesc[dynamicShapes[i]] = inputs[0][inputIndices[i]];
+            }
+        }
+        shapesInitialized = true;
+        return true;
+    }
+
     void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
     {
         std::vector<Mat> outputs;
@@ -310,6 +353,10 @@ public:
 
 private:
     std::vector<MatShape> outShapes;
+    std::vector<int> dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input
+    std::vector<int> inputIndices; // Which axes from input are needed to compute correct output shape
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };
 
 Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)
diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
index 6deabb5884..fa2d755b71 100644
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@ -72,6 +72,8 @@ public:
         setParamsFrom(params);
         axis = params.get<int>("axis", 1);
         num_split = params.get<int>("num_split", 0);
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
         if (params.has("slice_point"))
         {
             CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
@@ -150,7 +152,8 @@ public:
                 CV_Assert(sliceRanges[i].size() <= inpShape.size());
                 for (int j = 0; j < sliceRanges[i].size(); ++j)
                 {
-                    outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
+                    if (shapesInitialized || inpShape[j] > 0)
+                        outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
                 }
             }
         }
@@ -165,6 +168,12 @@ public:
         return false;
     }
 
+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        shapesInitialized = true;
+        return true;
+    }
+
     void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
     {
 #ifdef HAVE_OPENCL
@@ -492,7 +501,7 @@ public:
         std::vector<size_t> axes, offsets, dims;
         int from, to, step;
         int numDims = finalSliceRanges[0].size();
-        if (preferableTarget == DNN_TARGET_MYRIAD)
+        if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
         {
             from = axis;
             to = numDims;
@@ -597,6 +606,8 @@ public:
 protected:
     // The actual non-negative values determined from @p sliceRanges depends on input size.
     std::vector<std::vector<Range> > finalSliceRanges;
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };
 
 class CropLayerImpl CV_FINAL : public SliceLayerImpl
diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp
index 677228bcf2..aefeaa42b3 100644
--- a/modules/dnn/src/model.cpp
+++ b/modules/dnn/src/model.cpp
@@ -15,6 +15,9 @@ namespace dnn {
 
 struct Model::Impl
 {
+//protected:
+    Net    net;
+
     Size   size;
     Scalar mean;
     double  scale = 1.0;
@@ -23,7 +26,70 @@ struct Model::Impl
     Mat    blob;
     std::vector<String> outNames;
 
-    void predict(Net& net, const Mat& frame, OutputArrayOfArrays outs)
+public:
+    virtual ~Impl() {}
+    Impl() {}
+    Impl(const Impl&) = delete;
+    Impl(Impl&&) = delete;
+
+    virtual Net& getNetwork() const { return const_cast<Net&>(net); }
+
+    virtual void setPreferableBackend(Backend backendId) { net.setPreferableBackend(backendId); }
+    virtual void setPreferableTarget(Target targetId) { net.setPreferableTarget(targetId); }
+
+    /*virtual*/
+    void initNet(const Net& network)
+    {
+        net = network;
+
+        outNames = net.getUnconnectedOutLayersNames();
+        std::vector<MatShape> inLayerShapes;
+        std::vector<MatShape> outLayerShapes;
+        net.getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes);
+        if (!inLayerShapes.empty() && inLayerShapes[0].size() == 4)
+            size = Size(inLayerShapes[0][3], inLayerShapes[0][2]);
+        else
+            size = Size();
+    }
+
+    /*virtual*/
+    void setInputParams(double scale_, const Size& size_, const Scalar& mean_,
+                        bool swapRB_, bool crop_)
+    {
+        size = size_;
+        mean = mean_;
+        scale = scale_;
+        crop = crop_;
+        swapRB = swapRB_;
+    }
+    /*virtual*/
+    void setInputSize(const Size& size_)
+    {
+        size = size_;
+    }
+    /*virtual*/
+    void setInputMean(const Scalar& mean_)
+    {
+        mean = mean_;
+    }
+    /*virtual*/
+    void setInputScale(double scale_)
+    {
+        scale = scale_;
+    }
+    /*virtual*/
+    void setInputCrop(bool crop_)
+    {
+        crop = crop_;
+    }
+    /*virtual*/
+    void setInputSwapRB(bool swapRB_)
+    {
+        swapRB = swapRB_;
+    }
+
+    /*virtual*/
+    void processFrame(InputArray frame, OutputArrayOfArrays outs)
     {
         if (size.empty())
             CV_Error(Error::StsBadSize, "Input size not specified");
@@ -34,96 +100,115 @@ struct Model::Impl
         // Faster-RCNN or R-FCN
         if (net.getLayer(0)->outputNameToIndex("im_info") != -1)
         {
-            Mat imInfo = (Mat_<float>(1, 3) << size.height, size.width, 1.6f);
+            Mat imInfo(Matx13f(size.height, size.width, 1.6f));
             net.setInput(imInfo, "im_info");
         }
         net.forward(outs, outNames);
     }
 };
 
-Model::Model() : impl(new Impl) {}
+Model::Model()
+    : impl(makePtr<Impl>())
+{
+    // nothing
+}
 
 Model::Model(const String& model, const String& config)
-    : Net(readNet(model, config)), impl(new Impl)
+    : Model()
 {
-    impl->outNames = getUnconnectedOutLayersNames();
-    std::vector<MatShape> inLayerShapes;
-    std::vector<MatShape> outLayerShapes;
-    getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes);
-    if (!inLayerShapes.empty() && inLayerShapes[0].size() == 4)
-        impl->size = Size(inLayerShapes[0][3], inLayerShapes[0][2]);
-};
+    impl->initNet(readNet(model, config));
+}
 
-Model::Model(const Net& network) : Net(network), impl(new Impl)
+Model::Model(const Net& network)
+    : Model()
 {
-    impl->outNames = getUnconnectedOutLayersNames();
-    std::vector<MatShape> inLayerShapes;
-    std::vector<MatShape> outLayerShapes;
-    getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes);
-    if (!inLayerShapes.empty() && inLayerShapes[0].size() == 4)
-        impl->size = Size(inLayerShapes[0][3], inLayerShapes[0][2]);
-};
+    impl->initNet(network);
+}
 
-Model& Model::setInputSize(const Size& size)
+Net& Model::getNetwork_() const
 {
-    impl->size = size;
+    CV_DbgAssert(impl);
+    return impl->getNetwork();
+}
+
+Model& Model::setPreferableBackend(Backend backendId)
+{
+    CV_DbgAssert(impl);
+    impl->setPreferableBackend(backendId);
+    return *this;
+}
+Model& Model::setPreferableTarget(Target targetId)
+{
+    CV_DbgAssert(impl);
+    impl->setPreferableTarget(targetId);
     return *this;
 }
 
-Model& Model::setInputSize(int width, int height)
+Model& Model::setInputSize(const Size& size)
 {
-    impl->size = Size(width, height);
+    CV_DbgAssert(impl);
+    impl->setInputSize(size);
     return *this;
 }
 
 Model& Model::setInputMean(const Scalar& mean)
 {
-    impl->mean = mean;
+    CV_DbgAssert(impl);
+    impl->setInputMean(mean);
     return *this;
 }
 
 Model& Model::setInputScale(double scale)
 {
-    impl->scale = scale;
+    CV_DbgAssert(impl);
+    impl->setInputScale(scale);
     return *this;
 }
 
 Model& Model::setInputCrop(bool crop)
 {
-    impl->crop = crop;
+    CV_DbgAssert(impl);
+    impl->setInputCrop(crop);
     return *this;
 }
 
 Model& Model::setInputSwapRB(bool swapRB)
 {
-    impl->swapRB = swapRB;
+    CV_DbgAssert(impl);
+    impl->setInputSwapRB(swapRB);
     return *this;
 }
 
 void Model::setInputParams(double scale, const Size& size, const Scalar& mean,
                            bool swapRB, bool crop)
 {
-    impl->size = size;
-    impl->mean = mean;
-    impl->scale = scale;
-    impl->crop = crop;
-    impl->swapRB = swapRB;
+    CV_DbgAssert(impl);
+    impl->setInputParams(scale, size, mean, swapRB, crop);
 }
 
-void Model::predict(InputArray frame, OutputArrayOfArrays outs)
+void Model::predict(InputArray frame, OutputArrayOfArrays outs) const
 {
-    impl->predict(*this, frame.getMat(), outs);
+    CV_DbgAssert(impl);
+    impl->processFrame(frame, outs);
 }
 
+
 ClassificationModel::ClassificationModel(const String& model, const String& config)
-    : Model(model, config) {};
+    : Model(model, config)
+{
+    // nothing
+}
 
-ClassificationModel::ClassificationModel(const Net& network) : Model(network) {};
+ClassificationModel::ClassificationModel(const Net& network)
+    : Model(network)
+{
+    // nothing
+}
 
 std::pair<int, float> ClassificationModel::classify(InputArray frame)
 {
     std::vector<Mat> outs;
-    impl->predict(*this, frame.getMat(), outs);
+    impl->processFrame(frame, outs);
     CV_Assert(outs.size() == 1);
 
     double conf;
@@ -145,11 +230,11 @@ KeypointsModel::KeypointsModel(const Net& network) : Model(network) {};
 std::vector<Point2f> KeypointsModel::estimate(InputArray frame, float thresh)
 {
 
-    int frameHeight = frame.getMat().size[0];
-    int frameWidth = frame.getMat().size[1];
+    int frameHeight = frame.rows();
+    int frameWidth = frame.cols();
     std::vector<Mat> outs;
 
-    impl->predict(*this, frame.getMat(), outs);
+    impl->processFrame(frame, outs);
     CV_Assert(outs.size() == 1);
     Mat output = outs[0];
 
@@ -202,9 +287,8 @@ SegmentationModel::SegmentationModel(const Net& network) : Model(network) {};
 
 void SegmentationModel::segment(InputArray frame, OutputArray mask)
 {
-
     std::vector<Mat> outs;
-    impl->predict(*this, frame.getMat(), outs);
+    impl->processFrame(frame, outs);
     CV_Assert(outs.size() == 1);
     Mat score = outs[0];
 
@@ -250,12 +334,14 @@ void disableRegionNMS(Net& net)
 }
 
 DetectionModel::DetectionModel(const String& model, const String& config)
-    : Model(model, config) {
-      disableRegionNMS(*this);
+    : Model(model, config)
+{
+    disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
 }
 
-DetectionModel::DetectionModel(const Net& network) : Model(network) {
-    disableRegionNMS(*this);
+DetectionModel::DetectionModel(const Net& network) : Model(network)
+{
+    disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
 }
 
 void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
@@ -263,7 +349,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
                             float confThreshold, float nmsThreshold)
 {
     std::vector<Mat> detections;
-    impl->predict(*this, frame.getMat(), detections);
+    impl->processFrame(frame, detections);
 
     boxes.clear();
     confidences.clear();
@@ -271,15 +357,15 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
 
     int frameWidth  = frame.cols();
     int frameHeight = frame.rows();
-    if (getLayer(0)->outputNameToIndex("im_info") != -1)
+    if (getNetwork_().getLayer(0)->outputNameToIndex("im_info") != -1)
     {
         frameWidth = impl->size.width;
         frameHeight = impl->size.height;
     }
 
-    std::vector<String> layerNames = getLayerNames();
-    int lastLayerId = getLayerId(layerNames.back());
-    Ptr<Layer> lastLayer = getLayer(lastLayerId);
+    std::vector<String> layerNames = getNetwork_().getLayerNames();
+    int lastLayerId = getNetwork_().getLayerId(layerNames.back());
+    Ptr<Layer> lastLayer = getNetwork_().getLayer(lastLayerId);
 
     if (lastLayer->type == "DetectionOutput")
     {
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
index 47b40cc6c2..b366c97ac8 100644
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
@@ -51,18 +51,20 @@ template<typename Dtype>
 OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
 {
     int dims = config.in_shape.size();
-    int spatial_dims = 2;
+    int spatial_dims = config.in_shape.size()-2;
 
     channels_ = config.channels;
     pool_method_ = config.pool_method;
     avePoolPaddedArea = config.avePoolPaddedArea;
     computeMaxIdx = config.computeMaxIdx;
     use_half = config.use_half;
+    kernel_shape_.push_back(config.kernel.height);
+    kernel_shape_.push_back(config.kernel.width);
+    stride_.push_back(config.stride.height);
+    stride_.push_back(config.stride.width);
 
     for (int i = 0; i < spatial_dims; ++i)
     {
-        kernel_shape_.push_back(i == 0 ? config.kernel.height : config.kernel.width);
-        stride_.push_back(i == 0 ? config.stride.height : config.stride.width);
         im_in_shape_.push_back(config.in_shape[dims - spatial_dims + i]);
         im_out_shape_.push_back(config.out_shape[dims - spatial_dims + i]);
     }
@@ -75,10 +77,10 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
     pad_l_ = config.pad_l;
     pad_r_ = config.pad_r;
     pad_b_ = config.pad_b;
-    height_ = im_in_shape_[0];
-    width_ = im_in_shape_[1];
-    pooled_height_ = im_out_shape_[0];
-    pooled_width_ = im_out_shape_[1];
+    height_ = spatial_dims == 1? 1 : im_in_shape_[0];
+    width_ = im_in_shape_.back();
+    pooled_height_ = spatial_dims == 1? 1 : im_out_shape_[0];
+    pooled_width_ = im_out_shape_.back();
 
     count_ = 1;
     for (int i = 0; i < config.out_shape.size(); ++i)
diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
index 46773220aa..ad3d903d68 100644
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@@ -260,6 +260,40 @@ public:
         addNodeToMatch("Cast", gather);
         setFusedNode("Gather", input, index);
     }
+
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds) CV_OVERRIDE
+    {
+        bool retVal = Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds);
+        size_t matchedNodesNum = matchedNodesIds.size();
+        // Now we check if merging can be made for these Gather and Cast nodes
+        if (!retVal || matchedNodesNum < 2)
+            return retVal;
+        else {
+            int nodeToMatch = matchedNodesIds[matchedNodesNum - 1];
+            const Ptr<ImportNodeWrapper> node = net->getNode(nodeToMatch);
+            if (node->getType() == "Cast") {
+                int inpNodeId = matchedNodesIds[matchedNodesNum - 2];
+                const Ptr<ImportNodeWrapper> inpNode = net->getNode(inpNodeId);
+                if (inpNode->getType() == "Gather") {
+                    int numNodes = net->getNumNodes();
+                    std::string inpNodeName = node->getInputName(0);
+                    for (int i = 0; i < numNodes; ++i) {
+                        const Ptr<ImportNodeWrapper> node_to_check = net->getNode(i);
+                        int numInp = node_to_check->getNumInputs();
+                        for (int inp = 0; inp < numInp; ++inp) {
+                            if (i != nodeToMatch && inpNodeName == node_to_check->getInputName(0)) {
+                                // Another node has the same input node, so it cannot be merged.
+                                return false;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return retVal;
+    }
 };
 
 class ExpandSubgraph : public Subgraph
@@ -513,6 +547,19 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
         CV_Assert(!field.empty());
         Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1);
     }
+    else if (datatype == opencv_onnx::TensorProto_DataType_INT32)
+    {
+        if (!tensor_proto.int32_data().empty())
+        {
+            const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
+            Mat(sizes, CV_32SC1, (void*)field.data()).copyTo(blob);
+        }
+        else
+        {
+            char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
+            Mat(sizes, CV_32SC1, val).copyTo(blob);
+        }
+    }
     else if (datatype == opencv_onnx::TensorProto_DataType_INT64)
     {
         blob.create(sizes, CV_32SC1);
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 7421ad1a5c..859b595b7f 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -64,6 +64,7 @@ public:
     ONNXImporter(Net& net, const char *onnxFile)
         : dstNet(net)
     {
+        hasDynamicShapes = false;
         CV_Assert(onnxFile);
         CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
 
@@ -84,6 +85,7 @@ public:
     ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
         : dstNet(net)
     {
+        hasDynamicShapes = false;
         CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
 
         struct _Buf : public std::streambuf
@@ -115,6 +117,7 @@ protected:
     std::map<std::string, Mat> constBlobs;
 
     std::map<std::string, MatShape> outShapes;  // List of internal blobs shapes.
+    bool hasDynamicShapes;  // Whether the model has inputs with dynamic shapes
     typedef std::map<std::string, MatShape>::iterator IterShape_t;
 
     std::map<std::string, LayerInfo> layer_id;
@@ -200,12 +203,12 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
 
         if(attribute_name == "kernel_shape")
         {
-            CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
+            CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
             lp.set("kernel_size", parse(attribute_proto.ints()));
         }
         else if(attribute_name == "strides")
         {
-            CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
+            CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
             lp.set("stride", parse(attribute_proto.ints()));
         }
         else if(attribute_name == "pads")
@@ -229,7 +232,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
             else
             {
                 // Convolution or pooling.
-                CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
+                CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
                 lp.set("pad", parse(attribute_proto.ints()));
             }
         }
@@ -244,7 +247,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
         }
         else if(attribute_name == "dilations")
         {
-            CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
+            CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
             lp.set("dilation", parse(attribute_proto.ints()));
         }
         else if (attribute_proto.has_i())
@@ -413,8 +416,10 @@ void ONNXImporter::populateNet()
         for (int j = 0; j < inpShape.size(); ++j)
         {
             inpShape[j] = tensorShape.dim(j).dim_value();
+            if (!tensorShape.dim(j).dim_param().empty())
+                hasDynamicShapes = true;
         }
-        if (!inpShape.empty())
+        if (!inpShape.empty() && !hasDynamicShapes)
         {
             inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
         }
@@ -461,6 +466,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
 
         layerParams.name = name;
         layerParams.type = layer_type;
+        layerParams.set("has_dynamic_shapes", hasDynamicShapes);
 
         if (layer_type == "MaxPool")
         {
@@ -494,14 +500,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                 MatShape inpShape = outShapes[node_proto.input(0)];
                 DictValue axes = layerParams.get("axes");
                 bool keepdims = layerParams.get<int>("keepdims");
-                MatShape targetShape = inpShape;
+                MatShape targetShape;
+                std::vector<bool> shouldDelete(inpShape.size(), false);
                 for (int i = 0; i < axes.size(); i++) {
                     int axis = clamp(axes.get<int>(i), inpShape.size());
-                    if (keepdims) {
-                        targetShape[axis] = 1;
-                    } else {
-                        targetShape.erase(targetShape.begin() + axis);
-                    }
+                    shouldDelete[axis] = true;
+                }
+                for (int axis = 0; axis < inpShape.size(); ++axis){
+                    if (!shouldDelete[axis])
+                        targetShape.push_back(inpShape[axis]);
+                    else if (keepdims)
+                        targetShape.push_back(1);
                 }
 
                 if (inpShape.size() == 3 && axes.size() <= 2)
@@ -551,11 +560,36 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
 
                     CV_Assert(axes.size() <= inpShape.size() - 2);
                     std::vector<int> kernel_size(inpShape.size() - 2, 1);
-                    for (int i = 0; i < axes.size(); i++) {
-                        int axis = clamp(axes.get<int>(i), inpShape.size());
-                        CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
-                        kernel_size[axis - 2] = inpShape[axis];
+                    if (axes.size() == 1 && (clamp(axes.get<int>(0), inpShape.size()) <= 1))
+                    {
+                        int axis = clamp(axes.get<int>(0), inpShape.size());
+                        MatShape newShape = inpShape;
+                        newShape[axis + 1] = total(newShape, axis + 1);
+                        newShape.resize(axis + 2);
+                        newShape.insert(newShape.begin(), 2 - axis, 1);
+
+                        LayerParams reshapeLp;
+                        reshapeLp.type = "Reshape";
+                        reshapeLp.name = layerParams.name + "/reshape";
+                        CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+                        reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size()));
+
+                        node_proto.set_output(0, reshapeLp.name);
+                        addLayer(reshapeLp, node_proto);
+
+                        kernel_size.resize(2);
+                        kernel_size[0] = inpShape[axis];
+                        node_proto.set_input(0, node_proto.output(0));
                     }
+                    else
+                    {
+                        for (int i = 0; i < axes.size(); i++) {
+                            int axis = clamp(axes.get<int>(i), inpShape.size());
+                            CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
+                            kernel_size[axis - 2] = inpShape[axis];
+                        }
+                    }
+
                     LayerParams poolLp = layerParams;
                     poolLp.name = layerParams.name + "/avg";
                     CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
@@ -1276,6 +1310,20 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             {
                 layerParams.type = "Reshape";
                 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
+                if (hasDynamicShapes)
+                {
+                    std::vector<int> dynamicAxes;
+                    std::vector<int> inputIndices;
+                    for (int index = 0; index < inpShape.size(); ++index)
+                    {
+                        if (!maskedAxes[index])
+                            inputIndices.push_back(index);
+                    }
+                    for (int index = 0; index < outShape.size(); ++index)
+                        dynamicAxes.push_back(index);
+                    layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                    layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+                }
             }
             else
                 layerParams.type = "Identity";
@@ -1338,6 +1386,19 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             outShape.insert(outShape.begin() + axis, 1);
             layerParams.type = "Reshape";
             layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
+            if (hasDynamicShapes)
+            {
+                std::vector<int> dynamicAxes;
+                std::vector<int> inputIndices;
+                for (int index = 0; index < outShape.size(); ++index) {
+                    if (index != axis)
+                        dynamicAxes.push_back(index);
+                }
+                for (int index = 0; index < inpShape.size(); ++index)
+                    inputIndices.push_back(index);
+                layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+            }
         }
         else if (layer_type == "Expand")
         {
@@ -1625,6 +1686,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                 cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
                 sliceLp.set("begin", paramBegin);
                 sliceLp.set("end", paramEnd);
+                sliceLp.set("has_dynamic_shapes", hasDynamicShapes);
 
                 if (inpShape.size() > 1)
                 {
@@ -1637,6 +1699,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                     layerParams.type = "Reshape";
                     layerParams.set("axis", 0);
                     layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
+                    if (hasDynamicShapes)
+                    {
+                        std::vector<int> dynamicAxes;
+                        std::vector<int> inputIndices;
+                        for (int index = 0; index < inpShape.size(); ++index)
+                            dynamicAxes.push_back(index);
+                        for (int index = 0; index < inpShape.size(); ++index)
+                            inputIndices.push_back(index);
+                        layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                        layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+                    }
                     node_proto.set_input(0, sliceLp.name);
                 }
                 else
@@ -1676,32 +1749,45 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             for (int i = 1; i < node_proto.input_size(); i++)
                 CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
 
-            String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
-            CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
-
-            layerParams.set("align_corners", interp_mode == "align_corners");
-            Mat shapes = getBlob(node_proto, node_proto.input_size() - 1);
-            CV_CheckEQ(shapes.size[0], 4, "");
-            CV_CheckEQ(shapes.size[1], 1, "");
-            CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
-            if (shapes.depth() == CV_32F)
-                shapes.convertTo(shapes, CV_32S);
-            int height = shapes.at<int>(2);
-            int width  = shapes.at<int>(3);
-            if (node_proto.input_size() == 3)
+            if (layerParams.has("coordinate_transformation_mode"))
             {
-                IterShape_t shapeIt = outShapes.find(node_proto.input(0));
-                CV_Assert(shapeIt != outShapes.end());
-                MatShape scales = shapeIt->second;
-                height *= scales[2];
-                width  *= scales[3];
-            }
-            layerParams.set("width", width);
-            layerParams.set("height", height);
+                String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
+                CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
 
-            if (layerParams.get<String>("mode") == "linear") {
-                layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
-                                        "opencv_linear" : "bilinear");
+                layerParams.set("align_corners", interp_mode == "align_corners");
+                if (layerParams.get<String>("mode") == "linear")
+                {
+                    layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
+                                            "opencv_linear" : "bilinear");
+                }
+            }
+            if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
+                layerParams.set("mode", "opencv_linear");
+
+            // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes]
+            int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1
+                                                : node_proto.input_size() > 2 ? 2 : 1;
+
+            Mat scales = getBlob(node_proto, foundScaleId);
+            if (scales.total() == 4)
+            {
+                layerParams.set("zoom_factor_y", scales.at<float>(2));
+                layerParams.set("zoom_factor_x", scales.at<float>(3));
+            }
+            else
+            {
+                const std::string& inputLast = node_proto.input(node_proto.input_size() - 1);
+                if (constBlobs.find(inputLast) != constBlobs.end())
+                {
+                    Mat shapes = getBlob(inputLast);
+                    CV_CheckEQ(shapes.size[0], 4, "");
+                    CV_CheckEQ(shapes.size[1], 1, "");
+                    CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
+                    if (shapes.depth() == CV_32F)
+                        shapes.convertTo(shapes, CV_32S);
+                    layerParams.set("width", shapes.at<int>(3));
+                    layerParams.set("height", shapes.at<int>(2));
+                }
             }
             replaceLayerParam(layerParams, "mode", "interpolation");
         }
@@ -1741,10 +1827,14 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             else
             {
                 // scales as input
-                Mat scales = getBlob(node_proto, 1);
-                CV_Assert(scales.total() == 4);
-                layerParams.set("zoom_factor_y", scales.at<float>(2));
-                layerParams.set("zoom_factor_x", scales.at<float>(3));
+                const std::string& input1 = node_proto.input(1);
+                if (constBlobs.find(input1) != constBlobs.end())
+                {
+                    Mat scales = getBlob(input1);
+                    CV_Assert(scales.total() == 4);
+                    layerParams.set("zoom_factor_y", scales.at<float>(2));
+                    layerParams.set("zoom_factor_x", scales.at<float>(3));
+                }
             }
             replaceLayerParam(layerParams, "mode", "interpolation");
         }
diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp
index 745d86ef3c..b7cdc2ad94 100644
--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@@ -367,6 +367,7 @@ void InfEngineBackendNet::init(Target targetId)
 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
             // Inference Engine determines network precision by ports.
             InferenceEngine::Precision p = (targetId == DNN_TARGET_MYRIAD ||
+                                            targetId == DNN_TARGET_HDDL ||
                                             targetId == DNN_TARGET_OPENCL_FP16) ?
                                            InferenceEngine::Precision::FP16 :
                                            InferenceEngine::Precision::FP32;
@@ -391,6 +392,9 @@ void InfEngineBackendNet::init(Target targetId)
         case DNN_TARGET_MYRIAD:
             device_name = "MYRIAD";
             break;
+        case DNN_TARGET_HDDL:
+            device_name = "HDDL";
+            break;
         case DNN_TARGET_FPGA:
             device_name = "FPGA";
             break;
@@ -652,20 +656,20 @@ InferenceEngine::Core& getCore(const std::string& id)
 #endif
 
 #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)
-static bool detectMyriadX_()
+static bool detectMyriadX_(std::string device)
 {
     AutoLock lock(getInitializationMutex());
 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3)
     // Lightweight detection
-    InferenceEngine::Core& ie = getCore("MYRIAD");
+    InferenceEngine::Core& ie = getCore(device);
     const std::vector<std::string> devices = ie.GetAvailableDevices();
     for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
     {
-        if (i->find("MYRIAD") != std::string::npos)
+        if (i->find(device) != std::string::npos)
         {
             const std::string name = ie.GetMetric(*i, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
             CV_LOG_INFO(NULL, "Myriad device: " << name);
-            return name.find("MyriadX") != std::string::npos  || name.find("Myriad X") != std::string::npos;
+            return name.find("MyriadX") != std::string::npos || name.find("Myriad X") != std::string::npos || name.find("HDDL") != std::string::npos;
         }
     }
     return false;
@@ -702,13 +706,13 @@ static bool detectMyriadX_()
     InferenceEngine::InferenceEnginePluginPtr enginePtr;
     {
         auto& sharedPlugins = getSharedPlugins();
-        auto pluginIt = sharedPlugins.find("MYRIAD");
+        auto pluginIt = sharedPlugins.find(device);
         if (pluginIt != sharedPlugins.end()) {
             enginePtr = pluginIt->second;
         } else {
             auto dispatcher = InferenceEngine::PluginDispatcher({""});
-            enginePtr = dispatcher.getPluginByDevice("MYRIAD");
-            sharedPlugins["MYRIAD"] = enginePtr;
+            enginePtr = dispatcher.getPluginByDevice(device);
+            sharedPlugins[device] = enginePtr;
         }
     }
     auto plugin = InferenceEngine::InferencePlugin(enginePtr);
@@ -719,9 +723,9 @@ static bool detectMyriadX_()
     try
     {
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
-        auto netExec = getCore("MYRIAD").LoadNetwork(cnn, "MYRIAD", {{"VPU_PLATFORM", "VPU_2480"}});
+        auto netExec = getCore(device).LoadNetwork(cnn, device, {{"VPU_PLATFORM", "VPU_2480"}});
 #else
-        auto netExec = getCore("MYRIAD").LoadNetwork(cnn, "MYRIAD", {{"VPU_MYRIAD_PLATFORM", "VPU_MYRIAD_2480"}});
+        auto netExec = getCore(device).LoadNetwork(cnn, device, {{"VPU_MYRIAD_PLATFORM", "VPU_MYRIAD_2480"}});
 #endif
 #endif
         auto infRequest = netExec.CreateInferRequest();
@@ -1155,11 +1159,30 @@ void resetMyriadDevice()
 #endif  // HAVE_INF_ENGINE
 }
 
+void releaseHDDLPlugin()
+{
+#ifdef HAVE_INF_ENGINE
+    AutoLock lock(getInitializationMutex());
+#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
+    getSharedPlugins().erase("HDDL");
+#else
+    // Unregister both "HDDL" and "HETERO:HDDL,CPU" plugins
+    InferenceEngine::Core& ie = getCore("HDDL");
+    try
+    {
+        ie.UnregisterPlugin("HDDL");
+        ie.UnregisterPlugin("HETERO");
+    }
+    catch (...) {}
+#endif
+#endif  // HAVE_INF_ENGINE
+}
+
 #ifdef HAVE_INF_ENGINE
 bool isMyriadX()
 {
-     static bool myriadX = getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X;
-     return myriadX;
+    static bool myriadX = getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X;
+    return myriadX;
 }
 
 static std::string getInferenceEngineVPUType_()
@@ -1170,10 +1193,11 @@ static std::string getInferenceEngineVPUType_()
 #if defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)
         param_vpu_type = OPENCV_DNN_IE_VPU_TYPE_DEFAULT;
 #else
-        CV_LOG_INFO(NULL, "OpenCV-DNN: running Inference Engine VPU autodetection: Myriad2/X. In case of other accelerator types specify 'OPENCV_DNN_IE_VPU_TYPE' parameter");
+        CV_LOG_INFO(NULL, "OpenCV-DNN: running Inference Engine VPU autodetection: Myriad2/X or HDDL. In case of other accelerator types specify 'OPENCV_DNN_IE_VPU_TYPE' parameter");
         try {
-            bool isMyriadX_ = detectMyriadX_();
-            if (isMyriadX_)
+            bool isMyriadX_ = detectMyriadX_("MYRIAD");
+            bool isHDDL_ = detectMyriadX_("HDDL");
+            if (isMyriadX_ || isHDDL_)
             {
                 param_vpu_type = CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X;
             }
diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp
index 7b3ab0fed0..fcd1a6927d 100644
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@@ -27,10 +27,11 @@
 #define INF_ENGINE_RELEASE_2020_2 2020020000
 #define INF_ENGINE_RELEASE_2020_3 2020030000
 #define INF_ENGINE_RELEASE_2020_4 2020040000
+#define INF_ENGINE_RELEASE_2021_1 2021010000
 
 #ifndef INF_ENGINE_RELEASE
-#warning("IE version have not been provided via command-line. Using 2020.4 by default")
-#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_4
+#warning("IE version have not been provided via command-line. Using 2021.1 by default")
+#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_1
 #endif
 
 #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index b3e425aef7..67f5782a2e 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -321,6 +321,7 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
     else if (target == DNN_TARGET_CUDA_FP16)
     {
         scoreDiff = 0.03;
+        iouDiff = 0.13;
     }
 
     processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
@@ -511,7 +512,7 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
     else if (target == DNN_TARGET_CUDA_FP16)
     {
         l1 = 0.3;
-        lInf = 7.2;
+        lInf = 7.6;
     }
     processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
 #if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index e1ffa762de..c0282207dd 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -563,7 +563,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121)
     }
     normAssert(outs[0], ref, "", l1, lInf);
     if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
-        expectNoFallbacksFromIE(model);
+        expectNoFallbacksFromIE(model.getNetwork_());
 }
 
 TEST(Test_Caffe, multiple_inputs)
@@ -749,7 +749,7 @@ TEST_P(Test_Caffe_nets, RFCN)
     if (target == DNN_TARGET_CUDA_FP16)
     {
         scoreDiff = 0.0034;
-        iouDiff = 0.11;
+        iouDiff = 0.12;
     }
     static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
                                            0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp
index fa86f9b1c7..cf1b558391 100644
--- a/modules/dnn/test/test_common.impl.hpp
+++ b/modules/dnn/test/test_common.impl.hpp
@@ -40,6 +40,7 @@ void PrintTo(const cv::dnn::Target& v, std::ostream* os)
     case DNN_TARGET_OPENCL: *os << "OCL"; return;
     case DNN_TARGET_OPENCL_FP16: *os << "OCL_FP16"; return;
     case DNN_TARGET_MYRIAD: *os << "MYRIAD"; return;
+    case DNN_TARGET_HDDL: *os << "HDDL"; return;
     case DNN_TARGET_VULKAN: *os << "VULKAN"; return;
     case DNN_TARGET_FPGA: *os << "FPGA"; return;
     case DNN_TARGET_CUDA: *os << "CUDA"; return;
@@ -67,10 +68,10 @@ void normAssert(
         double l1 /*= 0.00001*/, double lInf /*= 0.0001*/)
 {
     double normL1 = cvtest::norm(ref, test, cv::NORM_L1) / ref.getMat().total();
-    EXPECT_LE(normL1, l1) << comment;
+    EXPECT_LE(normL1, l1) << comment << "  |ref| = " << cvtest::norm(ref, cv::NORM_INF);
 
     double normInf = cvtest::norm(ref, test, cv::NORM_INF);
-    EXPECT_LE(normInf, lInf) << comment;
+    EXPECT_LE(normInf, lInf) << comment << "  |ref| = " << cvtest::norm(ref, cv::NORM_INF);
 }
 
 std::vector<cv::Rect2d> matToBoxes(const cv::Mat& m)
@@ -221,7 +222,7 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
         available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019);
         for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i)
         {
-            if (*i == DNN_TARGET_MYRIAD && !withVPU)
+            if ((*i == DNN_TARGET_MYRIAD || *i == DNN_TARGET_HDDL) && !withVPU)
                 continue;
             targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, *i));
         }
@@ -231,7 +232,7 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
         available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
         for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i)
         {
-            if (*i == DNN_TARGET_MYRIAD && !withVPU)
+            if ((*i == DNN_TARGET_MYRIAD || *i == DNN_TARGET_HDDL) && !withVPU)
                 continue;
             targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, *i));
         }
@@ -281,7 +282,7 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
         available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019);
         for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i)
         {
-            if (*i == DNN_TARGET_MYRIAD && !withVPU)
+            if ((*i == DNN_TARGET_MYRIAD || *i == DNN_TARGET_HDDL) && !withVPU)
                 continue;
             targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, *i));
         }
@@ -291,7 +292,7 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
         available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
         for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i)
         {
-            if (*i == DNN_TARGET_MYRIAD && !withVPU)
+            if ((*i == DNN_TARGET_MYRIAD || *i == DNN_TARGET_HDDL) && !withVPU)
                 continue;
             targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, *i));
         }
@@ -323,7 +324,7 @@ static bool validateVPUType_()
     bool have_vpu_target = false;
     for (std::vector<Target>::const_iterator i = available.begin(); i != available.end(); ++i)
     {
-        if (*i == DNN_TARGET_MYRIAD)
+        if (*i == DNN_TARGET_MYRIAD || *i == DNN_TARGET_HDDL)
         {
             have_vpu_target = true;
             break;
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index 30180789d2..021603636e 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -656,6 +656,11 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
         target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB
     );
 
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)  // nGraph compilation failure
+    if (target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+#endif
+
     const double confThreshold = 0.5;
     // batchId, classId, confidence, left, top, right, bottom
     const int N0 = 2;
@@ -672,6 +677,8 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
 
     double scoreDiff = 0.01f;
     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.15 : 0.01f;
+    if (target == DNN_TARGET_CUDA_FP16)
+        iouDiff = 0.02;
 
     std::string config_file = "yolov4-tiny.cfg";
     std::string weights_file = "yolov4-tiny.weights";
diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp
index bd36c86d6c..b285e91d96 100644
--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@@ -340,6 +340,8 @@ TEST_P(DNNTestOpenVINO, models)
     // Single Myriad device cannot be shared across multiple processes.
     if (targetId == DNN_TARGET_MYRIAD)
         resetMyriadDevice();
+    if (targetId == DNN_TARGET_HDDL)
+        releaseHDDLPlugin();
     EXPECT_NO_THROW(runIE(targetId, xmlPath, binPath, inputsMap, ieOutputsMap)) << "runIE";
     EXPECT_NO_THROW(runCV(backendId, targetId, xmlPath, binPath, inputsMap, cvOutputsMap)) << "runCV";
 
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index e3caae5c0c..61537e0e01 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -2228,7 +2228,7 @@ public:
 
     static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
     {
-        return dnnBackendsAndTargets(false, false, true, false, false, false); // OCV OpenCL + OCV CPU
+        return dnnBackendsAndTargets(false, false, true, false, true, false); // OCV OpenCL + OCV CPU + CUDA
     }
 };
 
@@ -2264,10 +2264,6 @@ TEST_P(ConvolutionActivationFusion, Accuracy)
     Backend backendId = get<0>(get<2>(GetParam()));
     Target targetId = get<1>(get<2>(GetParam()));
 
-    // bug: https://github.com/opencv/opencv/issues/17964
-    if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-
     Net net;
     int convId = net.addLayer(convParams.name, convParams.type, convParams);
     int activId = net.addLayerToPrev(activationParams.name, activationParams.type, activationParams);
@@ -2280,11 +2276,16 @@ TEST_P(ConvolutionActivationFusion, Accuracy)
             expectedFusedLayers.push_back(activId); // all activations are fused
         else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
         {
-            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
+            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" /*|| actType == "Power"*/)
                 expectedFusedLayers.push_back(activId);
         }
     }
-
+    else if (backendId == DNN_BACKEND_CUDA)
+    {
+        if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
+            actType == "Mish" || actType == "Sigmoid" || actType == "Power")
+                expectedFusedLayers.push_back(activId);
+    }
     TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
@@ -2323,7 +2324,7 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
     std::string eltwiseOp = get<1>(GetParam());
     bool weightedEltwise = get<2>(GetParam());
     if (eltwiseOp != "sum" && weightedEltwise)
-            throw SkipTestException("weighted eltwise not supported");
+        throw SkipTestException("weighted eltwise not supported");
     LayerParams eltwiseParams;
     TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
 
@@ -2336,7 +2337,11 @@ TEST_P(ConvolutionEltwiseFusion, Accuracy)
 
     Backend backendId = get<0>(get<3>(GetParam()));
     Target targetId = get<1>(get<3>(GetParam()));
-    TestLayerFusion::test(input, net, backendId, targetId);
+
+    std::vector<int> expectedFusedLayers;
+    if (backendId == DNN_BACKEND_CUDA && eltwiseOp == "sum" && !weightedEltwise)
+        expectedFusedLayers.push_back(eltwiseId);
+    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
 /* bias */              testing::Bool(),
@@ -2390,21 +2395,6 @@ TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
     Backend backendId = get<0>(get<4>(GetParam()));
     Target targetId = get<1>(get<4>(GetParam()));
 
-    // bug: https://github.com/opencv/opencv/issues/17945
-    if ((eltwiseOp != "sum" || weightedEltwise) && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-
-    // bug: https://github.com/opencv/opencv/issues/17953
-    if (eltwiseOp == "sum" && actType == "ChannelsPReLU" && bias_term == false &&
-        backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
-    {
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-    }
-
-    // bug: https://github.com/opencv/opencv/issues/17964
-    if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-
     Net net;
     int convId = net.addLayer(convParams.name, convParams.type, convParams);
     int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
@@ -2421,14 +2411,25 @@ TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
             expectedFusedLayers.push_back(activId); // activation is fused with eltwise layer
         else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
         {
-            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "Power")
+            if (eltwiseOp == "sum" && !weightedEltwise &&
+                (actType == "ReLU" || actType == "ChannelsPReLU" /*|| actType == "Power"*/)
+            )
             {
                 expectedFusedLayers.push_back(eltwiseId);
                 expectedFusedLayers.push_back(activId);
             }
         }
     }
-
+    else if(backendId == DNN_BACKEND_CUDA)
+    {
+        if (eltwiseOp == "sum" && !weightedEltwise)
+        {
+            expectedFusedLayers.push_back(eltwiseId);
+            if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
+                actType == "Mish" || actType == "Sigmoid" || actType == "Power")
+                expectedFusedLayers.push_back(activId);
+        }
+    }
     TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
@@ -2483,17 +2484,6 @@ TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
     Backend backendId = get<0>(get<4>(GetParam()));
     Target targetId = get<1>(get<4>(GetParam()));
 
-    // bug: https://github.com/opencv/opencv/issues/17964
-    if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-
-    // bug: https://github.com/opencv/opencv/issues/17953
-    if (actType == "ChannelsPReLU" && bias_term == false &&
-        backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
-    {
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-    }
-
     Net net;
     int convId = net.addLayer(convParams.name, convParams.type, convParams);
     int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
@@ -2510,11 +2500,20 @@ TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
             expectedFusedLayers.push_back(activId); // activation fused with convolution
         else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
         {
-            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
+            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" /*|| actType == "Power"*/)
                 expectedFusedLayers.push_back(activId); // activation fused with convolution
         }
     }
-
+    else if(backendId == DNN_BACKEND_CUDA)
+    {
+        if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
+            actType == "Mish" || actType == "Sigmoid" || actType == "Power")
+        {
+                expectedFusedLayers.push_back(activId);
+                if (eltwiseOp == "sum" && !weightedEltwise)
+                    expectedFusedLayers.push_back(eltwiseId);
+        }
+    }
     TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
 }
 INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(
diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp
index 215cc1c743..7d516de73e 100644
--- a/modules/dnn/test/test_model.cpp
+++ b/modules/dnn/test/test_model.cpp
@@ -206,6 +206,8 @@ TEST_P(Test_Model, DetectionOutput)
     {
         if (backend == DNN_BACKEND_OPENCV)
             scoreDiff = 4e-3;
+        else
+            scoreDiff = 2e-2;
         iouDiff = 1.8e-1;
     }
 
@@ -261,7 +263,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
     }
     else if (target == DNN_TARGET_CUDA_FP16)
     {
-        scoreDiff = 4e-4;
+        scoreDiff = 0.002;
         iouDiff = 1e-2;
     }
     float confThreshold = FLT_MIN;
@@ -363,7 +365,7 @@ TEST_P(Test_Model, Detection_normalized)
         scoreDiff = 5e-3;
         iouDiff = 0.09;
     }
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000)
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020040000)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
     {
         iouDiff = 0.095f;
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index f9cfd290a9..a2c097da42 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -192,9 +192,14 @@ TEST_P(Test_ONNX_layers, Convolution3D)
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
     applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
-    if (target != DNN_TARGET_CPU && backend != DNN_BACKEND_CUDA)
-        throw SkipTestException("Only CPU and CUDA is supported");
     testONNXModels("conv3d");
+}
+
+TEST_P(Test_ONNX_layers, Convolution3D_bias)
+{
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
+    applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+#endif
     testONNXModels("conv3d_bias");
 }
 
@@ -216,7 +221,8 @@ TEST_P(Test_ONNX_layers, Deconvolution)
     testONNXModels("two_deconvolution", npy, 0, 0, false, false);
     testONNXModels("deconvolution_group", npy, 0, 0, false, false);
     testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false);
-    testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
+    if (target != DNN_TARGET_CUDA_FP16) // bug
+        testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
 }
 
 TEST_P(Test_ONNX_layers, Deconvolution3D)
@@ -275,9 +281,11 @@ TEST_P(Test_ONNX_layers, ReduceSum)
     testONNXModels("reduce_sum");
 }
 
-TEST_P(Test_ONNX_layers, ReduceMaxGlobal)
+TEST_P(Test_ONNX_layers, ReduceMax)
 {
     testONNXModels("reduce_max");
+    testONNXModels("reduce_max_axis_0");
+    testONNXModels("reduce_max_axis_1");
 }
 
 TEST_P(Test_ONNX_layers, Scale)
@@ -543,7 +551,12 @@ TEST_P(Test_ONNX_layers, Broadcast)
 
 TEST_P(Test_ONNX_layers, DynamicResize)
 {
-    testONNXModels("dynamic_resize", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_9", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_10", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_11", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_scale_9", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_scale_10", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_scale_11", npy, 0, 0, false, true, 2);
 }
 
 TEST_P(Test_ONNX_layers, Div)
@@ -663,6 +676,8 @@ TEST_P(Test_ONNX_layers, LinearWithConstant)
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
     applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
 #endif
+    if (backend == DNN_BACKEND_CUDA)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
     testONNXModels("lin_with_constant");
 }
 
@@ -673,6 +688,8 @@ TEST_P(Test_ONNX_layers, MatmulWithTwoInputs)
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
     applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
 #endif
+    if (backend == DNN_BACKEND_CUDA)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
     testONNXModels("matmul_with_two_inputs");
 }
 
@@ -681,6 +698,174 @@ TEST_P(Test_ONNX_layers, ResizeOpset11_Torch1_6)
     testONNXModels("resize_opset11_torch1.6");
 }
 
+TEST_P(Test_ONNX_layers, Conv1d)
+{
+    testONNXModels("conv1d");
+}
+
+TEST_P(Test_ONNX_layers, Conv1d_bias)
+{
+    testONNXModels("conv1d_bias");
+}
+
+TEST_P(Test_ONNX_layers, Conv1d_variable_weight)
+{
+    String basename = "conv1d_variable_w";
+    Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
+    ASSERT_FALSE(net.empty());
+
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat input = blobFromNPY(_tf("data/input_" + basename + "_0.npy"));
+    Mat weights = blobFromNPY(_tf("data/input_" + basename + "_1.npy"));
+    Mat ref = blobFromNPY(_tf("data/output_" + basename + ".npy"));
+
+    net.setInput(input, "0");
+    net.setInput(weights, "1");
+
+    Mat out = net.forward();
+    normAssert(ref, out, "", default_l1, default_lInf);
+}
+
+TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    String basename = "conv1d_variable_wb";
+    Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
+    ASSERT_FALSE(net.empty());
+
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat input = blobFromNPY(_tf("data/input_" + basename + "_0.npy"));
+    Mat weights = blobFromNPY(_tf("data/input_" + basename + "_1.npy"));
+    Mat bias = blobFromNPY(_tf("data/input_" + basename + "_2.npy"));
+    Mat ref = blobFromNPY(_tf("data/output_" + basename + ".npy"));
+
+    net.setInput(input, "0");
+    net.setInput(weights, "1");
+    net.setInput(bias, "bias");
+
+    Mat out = net.forward();
+    normAssert(ref, out, "", default_l1, default_lInf);
+}
+
+TEST_P(Test_ONNX_layers, GatherMultiOutput)
+{
+    if (cvtest::skipUnstableTests && backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
+        throw SkipTestException("Skip unstable test: https://github.com/opencv/opencv/issues/18937");
+
+#if defined(INF_ENGINE_RELEASE)
+    if (target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE);
+#endif
+
+    testONNXModels("gather_multi_output");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("squeeze_and_conv_dynamic_axes");
+    testONNXModels("unsqueeze_and_conv_dynamic_axes");
+    testONNXModels("gather_dynamic_axes");
+    testONNXModels("gather_scalar_dynamic_axes");
+    testONNXModels("slice_dynamic_axes");
+    testONNXModels("slice_opset_11_dynamic_axes");
+    testONNXModels("resize_opset11_torch1.6_dynamic_axes");
+    testONNXModels("average_pooling_dynamic_axes");
+    testONNXModels("maxpooling_sigmoid_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, MaxPool1d)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("maxpooling_1d");
+}
+
+TEST_P(Test_ONNX_layers, MaxPoolSigmoid1d)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("maxpooling_sigmoid_1d");
+}
+
+TEST_P(Test_ONNX_layers, MaxPool1d_Twise)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("two_maxpooling_1d");
+}
+
+TEST_P(Test_ONNX_layers, AvePool1d)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("average_pooling_1d");
+}
+
+TEST_P(Test_ONNX_layers, PoolConv1d)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("pool_conv_1d");
+}
+
+TEST_P(Test_ONNX_layers, ConvResizePool1d)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("conv_resize_pool_1d");
+}
+
 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
 
 class Test_ONNX_nets : public Test_ONNX_layers
@@ -1060,8 +1245,8 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics)
     float l1 = 0.0013, lInf = 0.009;
     if (target == DNN_TARGET_CUDA_FP16)
     {
-        l1 = 0.008;
-        lInf = 0.04;
+        l1 = 0.01;
+        lInf = 0.06;
     }
 
     checkBackend(&input0, &ref0);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 5f52ef0bc8..e6cfbe6637 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -169,17 +169,10 @@ TEST_P(Test_TensorFlow_layers, Convolution3D)
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
     applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
-    if (backend == DNN_BACKEND_CUDA)
-    {
-        // ok
-    }
-    else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);  // Only CPU on DLIE backend is supported
-    else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);  // Only CPU on DLIE backend is supported
-    else if (target != DNN_TARGET_CPU)
-        throw SkipTestException("Only CPU is supported");
-
     runTensorFlowNet("conv3d");
 }
 
@@ -1263,7 +1256,7 @@ TEST_P(Test_TensorFlow_nets, EfficientDet)
     if (target == DNN_TARGET_CUDA_FP16)
     {
         scoreDiff = 0.002;
-        iouDiff = 0.004;
+        iouDiff = 0.005;
     }
     normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
     expectNoFallbacksFromIE(net);
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
index 9de74d6cae..54b7c1baa9 100644
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -165,7 +165,8 @@ TEST_P(Test_Torch_layers, run_reshape_single_sample)
     }
     else if (target == DNN_TARGET_CUDA_FP16)
     {
-        l1 = 0.01;
+        l1 = 0.02;
+        lInf = 0.04;
     }
     runTorchNet("net_reshape_single_sample", "", false, false, true, l1, lInf);
 }
@@ -409,6 +410,10 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
         if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
         throw SkipTestException("");
     }
+#endif
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
     {
diff --git a/modules/features2d/src/evaluation.cpp b/modules/features2d/src/evaluation.cpp
index 2c1a446a57..ca7ab14500 100644
--- a/modules/features2d/src/evaluation.cpp
+++ b/modules/features2d/src/evaluation.cpp
@@ -314,7 +314,7 @@ struct SIdx
         UsedFinder(const SIdx& _used) : used(_used) {}
         const SIdx& used;
         bool operator()(const SIdx& v) const { return  (v.i1 == used.i1 || v.i2 == used.i2); }
-        UsedFinder& operator=(const UsedFinder&);
+        UsedFinder& operator=(const UsedFinder&) = delete;
     };
 };
 
diff --git a/modules/features2d/src/keypoint.cpp b/modules/features2d/src/keypoint.cpp
index 219634e5b4..e14c9da94c 100644
--- a/modules/features2d/src/keypoint.cpp
+++ b/modules/features2d/src/keypoint.cpp
@@ -44,9 +44,9 @@
 namespace cv
 {
 
-struct KeypointResponseGreaterThanThreshold
+struct KeypointResponseGreaterThanOrEqualToThreshold
 {
-    KeypointResponseGreaterThanThreshold(float _value) :
+    KeypointResponseGreaterThanOrEqualToThreshold(float _value) :
     value(_value)
     {
     }
@@ -83,7 +83,7 @@ void KeyPointsFilter::retainBest(std::vector<KeyPoint>& keypoints, int n_points)
         //use std::partition to grab all of the keypoints with the boundary response.
         std::vector<KeyPoint>::const_iterator new_end =
         std::partition(keypoints.begin() + n_points, keypoints.end(),
-                       KeypointResponseGreaterThanThreshold(ambiguous_response));
+                       KeypointResponseGreaterThanOrEqualToThreshold(ambiguous_response));
         //resize the keypoints, given this new end point. nth_element and partition reordered the points inplace
         keypoints.resize(new_end - keypoints.begin());
     }
@@ -151,7 +151,7 @@ public:
 
 private:
     const Mat mask;
-    MaskPredicate& operator=(const MaskPredicate&);
+    MaskPredicate& operator=(const MaskPredicate&) = delete;
 };
 
 void KeyPointsFilter::runByPixelsMask( std::vector<KeyPoint>& keypoints, const Mat& mask )
diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt
index 0278d9326a..0067cfa389 100644
--- a/modules/gapi/CMakeLists.txt
+++ b/modules/gapi/CMakeLists.txt
@@ -38,6 +38,10 @@ if(MSVC)
   endif()
 endif()
 
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")  # don't add Clang here: issue should be investigated and fixed (workaround for Apple only)
+  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wrange-loop-analysis)  # https://github.com/opencv/opencv/issues/18928
+endif()
+
 file(GLOB gapi_ext_hdrs
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.hpp"
@@ -49,6 +53,7 @@ file(GLOB gapi_ext_hdrs
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/ocl/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/own/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/render/*.hpp"
+    "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/s11n/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/plaidml/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/util/*.hpp"
@@ -56,6 +61,7 @@ file(GLOB gapi_ext_hdrs
 
 set(gapi_srcs
     # Front-end part
+    src/api/grunarg.cpp
     src/api/gorigin.cpp
     src/api/gmat.cpp
     src/api/garray.cpp
@@ -73,10 +79,10 @@ set(gapi_srcs
     src/api/kernels_imgproc.cpp
     src/api/kernels_video.cpp
     src/api/kernels_nnparsers.cpp
+    src/api/kernels_streaming.cpp
     src/api/render.cpp
     src/api/render_ocv.cpp
     src/api/ginfer.cpp
-    src/api/ft_render.cpp
     src/api/media.cpp
     src/api/rmat.cpp
 
@@ -97,6 +103,7 @@ set(gapi_srcs
     src/compiler/passes/pattern_matching.cpp
     src/compiler/passes/perform_substitution.cpp
     src/compiler/passes/streaming.cpp
+    src/compiler/passes/intrin.cpp
 
     # Executor
     src/executor/gexecutor.cpp
@@ -129,21 +136,28 @@ set(gapi_srcs
     src/backends/ie/giebackend.cpp
     src/backends/ie/giebackend/giewrapper.cpp
 
-    # Render Backend.
-    src/backends/render/grenderocvbackend.cpp
-    src/backends/render/grenderocv.cpp
+    # ONNX backend
+    src/backends/onnx/gonnxbackend.cpp
 
-    #PlaidML Backend
+    # Render backend
+    src/backends/render/grenderocv.cpp
+    src/backends/render/ft_render.cpp
+
+    # PlaidML Backend
     src/backends/plaidml/gplaidmlcore.cpp
     src/backends/plaidml/gplaidmlbackend.cpp
 
-    # Compound
+    # Common backend code
+    src/backends/common/gmetabackend.cpp
     src/backends/common/gcompoundbackend.cpp
     src/backends/common/gcompoundkernel.cpp
 
     # Serialization API and routines
     src/api/s11n.cpp
     src/backends/common/serialization.cpp
+
+    # Python bridge
+    src/backends/ie/bindings_ie.cpp
     )
 
 ocv_add_dispatched_file(backends/fluid/gfluidimgproc_func SSE4_1 AVX2)
@@ -200,10 +214,20 @@ if(HAVE_PLAIDML)
   ocv_target_include_directories(${the_module} SYSTEM PRIVATE ${PLAIDML_INCLUDE_DIRS})
 endif()
 
+
 if(WIN32)
   # Required for htonl/ntohl on Windows
   ocv_target_link_libraries(${the_module} PRIVATE wsock32 ws2_32)
 endif()
 
+if(HAVE_ONNX)
+  ocv_target_link_libraries(${the_module} PRIVATE ${ONNX_LIBRARY})
+  ocv_target_compile_definitions(${the_module} PRIVATE HAVE_ONNX=1)
+  if(TARGET opencv_test_gapi)
+    ocv_target_compile_definitions(opencv_test_gapi PRIVATE HAVE_ONNX=1)
+    ocv_target_link_libraries(opencv_test_gapi PRIVATE ${ONNX_LIBRARY})
+  endif()
+endif()
+
 ocv_add_perf_tests()
 ocv_add_samples()
diff --git a/modules/gapi/cmake/standalone.cmake b/modules/gapi/cmake/standalone.cmake
index ca54697524..5cc57d8269 100644
--- a/modules/gapi/cmake/standalone.cmake
+++ b/modules/gapi/cmake/standalone.cmake
@@ -15,6 +15,8 @@ file(GLOB FLUID_includes "${FLUID_ROOT}/include/opencv2/*.hpp"
                          "${FLUID_ROOT}/include/opencv2/gapi/own/*.hpp"
                          "${FLUID_ROOT}/include/opencv2/gapi/fluid/*.hpp")
 file(GLOB FLUID_sources  "${FLUID_ROOT}/src/api/g*.cpp"
+                         "${FLUID_ROOT}/src/api/rmat.cpp"
+                         "${FLUID_ROOT}/src/api/media.cpp"
                          "${FLUID_ROOT}/src/compiler/*.cpp"
                          "${FLUID_ROOT}/src/compiler/passes/*.cpp"
                          "${FLUID_ROOT}/src/executor/*.cpp"
diff --git a/modules/gapi/include/opencv2/gapi.hpp b/modules/gapi/include/opencv2/gapi.hpp
index c6ab3f13fd..8445746710 100644
--- a/modules/gapi/include/opencv2/gapi.hpp
+++ b/modules/gapi/include/opencv2/gapi.hpp
@@ -33,4 +33,8 @@
 #include <opencv2/gapi/gkernel.hpp>
 #include <opencv2/gapi/operators.hpp>
 
+// Include this file here to avoid cyclic dependency between
+// Desync & GKernel & GComputation & GStreamingCompiled.
+#include <opencv2/gapi/streaming/desync.hpp>
+
 #endif // OPENCV_GAPI_HPP
diff --git a/modules/gapi/include/opencv2/gapi/core.hpp b/modules/gapi/include/opencv2/gapi/core.hpp
index c4ddaf6bd3..8825585696 100644
--- a/modules/gapi/include/opencv2/gapi/core.hpp
+++ b/modules/gapi/include/opencv2/gapi/core.hpp
@@ -508,19 +508,23 @@ namespace core {
             return in.withType(in.depth, in.chan).withSize(dsize);
         }
     };
+} // namespace core
 
-    G_TYPED_KERNEL(GSize, <GOpaque<Size>(GMat)>, "org.opencv.core.size") {
-        static GOpaqueDesc outMeta(const GMatDesc&) {
-            return empty_gopaque_desc();
-        }
-    };
+namespace streaming {
 
-    G_TYPED_KERNEL(GSizeR, <GOpaque<Size>(GOpaque<Rect>)>, "org.opencv.core.sizeR") {
-        static GOpaqueDesc outMeta(const GOpaqueDesc&) {
-            return empty_gopaque_desc();
-        }
-    };
-}
+// Operations for Streaming (declared in this header for convenience)
+G_TYPED_KERNEL(GSize, <GOpaque<Size>(GMat)>, "org.opencv.streaming.size") {
+    static GOpaqueDesc outMeta(const GMatDesc&) {
+        return empty_gopaque_desc();
+    }
+};
+
+G_TYPED_KERNEL(GSizeR, <GOpaque<Size>(GOpaque<Rect>)>, "org.opencv.streaming.sizeR") {
+    static GOpaqueDesc outMeta(const GOpaqueDesc&) {
+        return empty_gopaque_desc();
+    }
+};
+} // namespace streaming
 
 //! @addtogroup gapi_math
 //! @{
@@ -1491,8 +1495,8 @@ Output image must be of the same size and depth as input one.
 CV_32FC1, or CV_32FC2.
 @param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map
 if map1 is (x,y) points), respectively.
-@param interpolation Interpolation method (see cv::InterpolationFlags). The method INTER_AREA is
-not supported by this function.
+@param interpolation Interpolation method (see cv::InterpolationFlags). The methods #INTER_AREA
+and #INTER_LINEAR_EXACT are not supported by this function.
 @param borderMode Pixel extrapolation method (see cv::BorderTypes). When
 borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image that
 corresponds to the "outliers" in the source image are not modified by the function.
@@ -1753,9 +1757,10 @@ GAPI_EXPORTS GMat warpAffine(const GMat& src, const Mat& M, const Size& dsize, i
                              int borderMode = cv::BORDER_CONSTANT, const Scalar& borderValue = Scalar());
 //! @} gapi_transform
 
+namespace streaming {
 /** @brief Gets dimensions from Mat.
 
-@note Function textual ID is "org.opencv.core.size"
+@note Function textual ID is "org.opencv.streaming.size"
 
 @param src Input tensor
 @return Size (tensor dimensions).
@@ -1765,12 +1770,13 @@ GAPI_EXPORTS GOpaque<Size> size(const GMat& src);
 /** @overload
 Gets dimensions from rectangle.
 
-@note Function textual ID is "org.opencv.core.sizeR"
+@note Function textual ID is "org.opencv.streaming.sizeR"
 
 @param r Input rectangle.
 @return Size (rectangle dimensions).
 */
 GAPI_EXPORTS GOpaque<Size> size(const GOpaque<Rect>& r);
+} //namespace streaming
 } //namespace gapi
 } //namespace cv
 
diff --git a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
index ef67930909..5dd70bd2e8 100644
--- a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
+++ b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
@@ -271,6 +271,11 @@ template<> struct get_out<cv::GArray<cv::GMat> >: public get_out<cv::GArray<cv::
 {
 };
 
+// FIXME(dm): GArray<vector<U>>/GArray<GArray<U>> conversion should be done more gracefully in the system
+template<typename U> struct get_out<cv::GArray<cv::GArray<U>> >: public get_out<cv::GArray<std::vector<U>> >
+{
+};
+
 template<typename U> struct get_out<cv::GOpaque<U>>
 {
     static U& get(GCPUContext &ctx, int idx)
@@ -443,7 +448,7 @@ struct OCVStCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>> :
 template<class Impl, class K>
 class GCPUKernelImpl: public cv::detail::KernelTag
 {
-    using CallHelper = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+    using CallHelper = cv::detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
 
 public:
     using API = K;
@@ -497,7 +502,7 @@ private:
 template<typename K, typename Callable>
 gapi::cpu::GOCVFunctor gapi::cpu::ocv_kernel(Callable& c)
 {
-    using P = detail::OCVCallHelper<Callable, typename K::InArgs, typename K::OutArgs>;
+    using P = cv::detail::OCVCallHelper<Callable, typename K::InArgs, typename K::OutArgs>;
     return GOCVFunctor{ K::id()
                       , &K::getOutMeta
                       , std::bind(&P::callFunctor, std::placeholders::_1, std::ref(c))
@@ -507,7 +512,7 @@ gapi::cpu::GOCVFunctor gapi::cpu::ocv_kernel(Callable& c)
 template<typename K, typename Callable>
 gapi::cpu::GOCVFunctor gapi::cpu::ocv_kernel(const Callable& c)
 {
-    using P = detail::OCVCallHelper<Callable, typename K::InArgs, typename K::OutArgs>;
+    using P = cv::detail::OCVCallHelper<Callable, typename K::InArgs, typename K::OutArgs>;
     return GOCVFunctor{ K::id()
                       , &K::getOutMeta
                       , std::bind(&P::callFunctor, std::placeholders::_1, c)
diff --git a/modules/gapi/include/opencv2/gapi/garg.hpp b/modules/gapi/include/opencv2/gapi/garg.hpp
index 67ce0d990c..0838573b56 100644
--- a/modules/gapi/include/opencv2/gapi/garg.hpp
+++ b/modules/gapi/include/opencv2/gapi/garg.hpp
@@ -9,12 +9,14 @@
 #define OPENCV_GAPI_GARG_HPP
 
 #include <vector>
+#include <unordered_map>
 #include <type_traits>
 
 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/own/mat.hpp>
 #include <opencv2/gapi/media.hpp>
 
+#include <opencv2/gapi/util/util.hpp>
 #include <opencv2/gapi/util/any.hpp>
 #include <opencv2/gapi/util/variant.hpp>
 
@@ -93,7 +95,7 @@ using GArgs = std::vector<GArg>;
 
 // FIXME: Express as M<GProtoArg...>::type
 // FIXME: Move to a separate file!
-using GRunArg  = util::variant<
+using GRunArgBase  = util::variant<
 #if !defined(GAPI_STANDALONE)
     cv::UMat,
 #endif // !defined(GAPI_STANDALONE)
@@ -105,6 +107,61 @@ using GRunArg  = util::variant<
     cv::detail::OpaqueRef,
     cv::MediaFrame
     >;
+
+namespace detail {
+template<typename,typename>
+struct in_variant;
+
+template<typename T, typename... Types>
+struct in_variant<T, util::variant<Types...> >
+    : std::integral_constant<bool, cv::detail::contains<T, Types...>::value > {
+};
+} // namespace detail
+
+struct GAPI_EXPORTS GRunArg: public GRunArgBase
+{
+    // Metadata information here
+    using Meta = std::unordered_map<std::string, util::any>;
+    Meta meta;
+
+    // Mimic the old GRunArg semantics here, old of the times when
+    // GRunArg was an alias to variant<>
+    GRunArg();
+    GRunArg(const cv::GRunArg &arg);
+    GRunArg(cv::GRunArg &&arg);
+
+    GRunArg& operator= (const GRunArg &arg);
+    GRunArg& operator= (GRunArg &&arg);
+
+    template <typename T>
+    GRunArg(const T &t,
+            const Meta &m = Meta{},
+            typename std::enable_if< detail::in_variant<T, GRunArgBase>::value, int>::type = 0)
+        : GRunArgBase(t)
+        , meta(m)
+    {
+    }
+    template <typename T>
+    GRunArg(T &&t,
+            const Meta &m = Meta{},
+            typename std::enable_if< detail::in_variant<T, GRunArgBase>::value, int>::type = 0)
+        : GRunArgBase(std::move(t))
+        , meta(m)
+    {
+    }
+    template <typename T> auto operator= (const T &t)
+        -> typename std::enable_if< detail::in_variant<T, GRunArgBase>::value, cv::GRunArg>::type&
+    {
+        GRunArgBase::operator=(t);
+        return *this;
+    }
+    template <typename T> auto operator= (T&& t)
+        -> typename std::enable_if< detail::in_variant<T, GRunArgBase>::value, cv::GRunArg>::type&
+    {
+        GRunArgBase::operator=(std::move(t));
+        return *this;
+    }
+};
 using GRunArgs = std::vector<GRunArg>;
 
 // TODO: Think about the addition operator
@@ -129,11 +186,13 @@ namespace gapi
 namespace wip
 {
 /**
- * @brief This aggregate type represents all types which G-API can handle (via variant).
+ * @brief This aggregate type represents all types which G-API can
+ * handle (via variant).
  *
- * It only exists to overcome C++ language limitations (where a `using`-defined class can't be forward-declared).
+ * It only exists to overcome C++ language limitations (where a
+ * `using`-defined class can't be forward-declared).
  */
-struct Data: public GRunArg
+struct GAPI_EXPORTS Data: public GRunArg
 {
     using GRunArg::GRunArg;
     template <typename T>
diff --git a/modules/gapi/include/opencv2/gapi/garray.hpp b/modules/gapi/include/opencv2/gapi/garray.hpp
index 9118f4de98..5d4b3c59e0 100644
--- a/modules/gapi/include/opencv2/gapi/garray.hpp
+++ b/modules/gapi/include/opencv2/gapi/garray.hpp
@@ -284,6 +284,14 @@ namespace detail
             return static_cast<VectorRefT<T>&>(*m_ref).rref();
         }
 
+        // Check if was created for/from std::vector<T>
+        template <typename T> bool holds() const
+        {
+            if (!m_ref) return false;
+            using U = typename std::decay<T>::type;
+            return dynamic_cast<VectorRefT<U>*>(m_ref.get()) != nullptr;
+        }
+
         void mov(VectorRef &v)
         {
             m_ref->mov(*v.m_ref);
@@ -341,15 +349,18 @@ public:
     explicit GArray(detail::GArrayU &&ref) // GArrayU-based constructor
         : m_ref(ref) { putDetails(); }     //   (used by GCall, not for users)
 
-    detail::GArrayU strip() const { return m_ref; }
+    /// @private
+    detail::GArrayU strip() const {
+        return m_ref;
+    }
+    /// @private
+    static void VCtor(detail::VectorRef& vref) {
+        vref.reset<HT>();
+    }
 
 private:
-    static void VCTor(detail::VectorRef& vref) {
-        vref.reset<HT>();
-        vref.storeKind<HT>();
-    }
     void putDetails() {
-        m_ref.setConstructFcn(&VCTor);
+        m_ref.setConstructFcn(&VCtor);
         m_ref.specifyType<HT>();  // FIXME: to unify those 2 to avoid excessive dynamic_cast
         m_ref.storeKind<HT>();    //
     }
@@ -357,6 +368,8 @@ private:
     detail::GArrayU m_ref;
 };
 
+using GArrayP2f = GArray<cv::Point2f>;
+
 /** @} */
 
 } // namespace cv
diff --git a/modules/gapi/include/opencv2/gapi/gcall.hpp b/modules/gapi/include/opencv2/gapi/gcall.hpp
index ed5ba5fde8..511eca1408 100644
--- a/modules/gapi/include/opencv2/gapi/gcall.hpp
+++ b/modules/gapi/include/opencv2/gapi/gcall.hpp
@@ -56,11 +56,16 @@ public:
     Priv& priv();
     const Priv& priv() const;
 
-protected:
-    std::shared_ptr<Priv> m_priv;
+    // GKernel and params can be modified, it's needed for infer<Generic>,
+    // because information about output shapes doesn't exist in compile time
+    GKernel& kernel();
+    cv::util::any& params();
 
     void setArgs(std::vector<GArg> &&args);
 
+protected:
+    std::shared_ptr<Priv> m_priv;
+
     // Public versions return a typed array or opaque, those are implementation details
     detail::GArrayU yieldArray(int output = 0);
     detail::GOpaqueU yieldOpaque(int output = 0);
diff --git a/modules/gapi/include/opencv2/gapi/gcommon.hpp b/modules/gapi/include/opencv2/gapi/gcommon.hpp
index e008fe4bf1..a474140baa 100644
--- a/modules/gapi/include/opencv2/gapi/gcommon.hpp
+++ b/modules/gapi/include/opencv2/gapi/gcommon.hpp
@@ -19,6 +19,7 @@
 #include <opencv2/gapi/own/exports.hpp>
 #include <opencv2/gapi/own/assert.hpp>
 #include <opencv2/gapi/render/render_types.hpp>
+#include <opencv2/gapi/s11n/base.hpp>
 
 namespace cv {
 
@@ -48,6 +49,7 @@ namespace detail
         CV_UINT64,     // uint64_t user G-API data
         CV_STRING,     // std::string user G-API data
         CV_POINT,      // cv::Point user G-API data
+        CV_POINT2F,    // cv::Point2f user G-API data
         CV_SIZE,       // cv::Size user G-API data
         CV_RECT,       // cv::Rect user G-API data
         CV_SCALAR,     // cv::Scalar user G-API data
@@ -67,15 +69,16 @@ namespace detail
     template<> struct GOpaqueTraits<cv::Size>    { static constexpr const OpaqueKind kind = OpaqueKind::CV_SIZE; };
     template<> struct GOpaqueTraits<cv::Scalar>  { static constexpr const OpaqueKind kind = OpaqueKind::CV_SCALAR; };
     template<> struct GOpaqueTraits<cv::Point>   { static constexpr const OpaqueKind kind = OpaqueKind::CV_POINT; };
+    template<> struct GOpaqueTraits<cv::Point2f> { static constexpr const OpaqueKind kind = OpaqueKind::CV_POINT2F; };
     template<> struct GOpaqueTraits<cv::Mat>     { static constexpr const OpaqueKind kind = OpaqueKind::CV_MAT; };
     template<> struct GOpaqueTraits<cv::Rect>    { static constexpr const OpaqueKind kind = OpaqueKind::CV_RECT; };
     template<> struct GOpaqueTraits<cv::GMat>    { static constexpr const OpaqueKind kind = OpaqueKind::CV_MAT; };
     template<> struct GOpaqueTraits<cv::gapi::wip::draw::Prim>
                                                  { static constexpr const OpaqueKind kind = OpaqueKind::CV_DRAW_PRIM; };
-    using GOpaqueTraitsArrayTypes = std::tuple<int, double, float, uint64_t, bool, std::string, cv::Size, cv::Scalar, cv::Point,
+    using GOpaqueTraitsArrayTypes = std::tuple<int, double, float, uint64_t, bool, std::string, cv::Size, cv::Scalar, cv::Point, cv::Point2f,
                                                cv::Mat, cv::Rect, cv::gapi::wip::draw::Prim>;
     // GOpaque is not supporting cv::Mat and cv::Scalar since there are GScalar and GMat types
-    using GOpaqueTraitsOpaqueTypes = std::tuple<int, double, float, uint64_t, bool, std::string, cv::Size, cv::Point, cv::Rect,
+    using GOpaqueTraitsOpaqueTypes = std::tuple<int, double, float, uint64_t, bool, std::string, cv::Size, cv::Point, cv::Point2f, cv::Rect,
                                                 cv::gapi::wip::draw::Prim>;
 } // namespace detail
 
@@ -94,6 +97,15 @@ enum class GShape: int
     GFRAME,
 };
 
+namespace gapi {
+namespace s11n {
+namespace detail {
+template<typename T> struct wrap_serialize;
+} // namespace detail
+} // namespace s11n
+} // namespace gapi
+
+
 struct GCompileArg;
 
 namespace detail {
@@ -139,7 +151,7 @@ namespace detail {
  * passed in (a variadic template parameter pack) into a vector of
  * cv::GCompileArg objects.
  */
-struct GAPI_EXPORTS_W_SIMPLE GCompileArg
+struct GCompileArg
 {
 public:
     // NB: Required for pythnon bindings
@@ -151,6 +163,9 @@ public:
     template<typename T, typename std::enable_if<!detail::is_compile_arg<T>::value, int>::type = 0>
     explicit GCompileArg(T &&t)
         : tag(detail::CompileArgTag<typename std::decay<T>::type>::tag())
+        , serializeF(cv::gapi::s11n::detail::has_S11N_spec<T>::value ?
+                     &cv::gapi::s11n::detail::wrap_serialize<T>::serialize :
+                     nullptr)
         , arg(t)
     {
     }
@@ -165,7 +180,16 @@ public:
         return util::any_cast<T>(arg);
     }
 
+    void serialize(cv::gapi::s11n::IOStream& os) const
+    {
+        if (serializeF)
+        {
+            serializeF(os, *this);
+        }
+    }
+
 private:
+    std::function<void(cv::gapi::s11n::IOStream&, const GCompileArg&)> serializeF;
     util::any arg;
 };
 
@@ -198,6 +222,19 @@ inline cv::util::optional<T> getCompileArg(const cv::GCompileArgs &args)
     }
     return cv::util::optional<T>();
 }
+
+namespace s11n {
+namespace detail {
+template<typename T> struct wrap_serialize
+{
+    static void serialize(IOStream& os, const GCompileArg& arg)
+    {
+        using DT = typename std::decay<T>::type;
+        S11N<DT>::serialize(os, arg.get<DT>());
+    }
+};
+} // namespace detail
+} // namespace s11n
 } // namespace gapi
 
 /**
diff --git a/modules/gapi/include/opencv2/gapi/gcomputation.hpp b/modules/gapi/include/opencv2/gapi/gcomputation.hpp
index 1172c0f5d6..8732ada0d6 100644
--- a/modules/gapi/include/opencv2/gapi/gcomputation.hpp
+++ b/modules/gapi/include/opencv2/gapi/gcomputation.hpp
@@ -436,7 +436,7 @@ public:
      *
      * @sa @ref gapi_compile_args
      */
-    GStreamingCompiled compileStreaming(GMetaArgs &&in_metas, GCompileArgs &&args = {});
+    GAPI_WRAP GStreamingCompiled compileStreaming(GMetaArgs &&in_metas, GCompileArgs &&args = {});
 
     /**
      * @brief Compile the computation for streaming mode.
@@ -457,7 +457,7 @@ public:
      *
      * @sa @ref gapi_compile_args
      */
-    GStreamingCompiled compileStreaming(GCompileArgs &&args = {});
+    GAPI_WRAP GStreamingCompiled compileStreaming(GCompileArgs &&args = {});
 
     // 2. Direct metadata version
     /**
diff --git a/modules/gapi/include/opencv2/gapi/gkernel.hpp b/modules/gapi/include/opencv2/gapi/gkernel.hpp
index b04cedecad..0ec7dd07c0 100644
--- a/modules/gapi/include/opencv2/gapi/gkernel.hpp
+++ b/modules/gapi/include/opencv2/gapi/gkernel.hpp
@@ -26,8 +26,16 @@
 
 namespace cv {
 
-using GShapes = std::vector<GShape>;
-using GKinds = std::vector<cv::detail::OpaqueKind>;
+struct GTypeInfo
+{
+    GShape                 shape;
+    cv::detail::OpaqueKind kind;
+};
+
+using GShapes    = std::vector<GShape>;
+using GKinds     = std::vector<cv::detail::OpaqueKind>;
+using GCtors     = std::vector<detail::HostCtor>;
+using GTypesInfo = std::vector<GTypeInfo>;
 
 // GKernel describes kernel API to the system
 // FIXME: add attributes of a kernel, (e.g. number and types
@@ -41,6 +49,7 @@ struct GAPI_EXPORTS GKernel
     M           outMeta;    // generic adaptor to API::outMeta(...)
     GShapes     outShapes;  // types (shapes) kernel's outputs
     GKinds      inKinds;    // kinds of kernel's inputs (fixme: below)
+    GCtors      outCtors;   // captured constructors for template output types
 };
 // TODO: It's questionable if inKinds should really be here. Instead,
 // this information could come from meta.
@@ -60,30 +69,27 @@ namespace detail
     // yield() is used in graph construction time as a generic method to obtain
     // lazy "return value" of G-API operations
     //
-    namespace
+    template<typename T> struct Yield;
+    template<> struct Yield<cv::GMat>
     {
-        template<typename T> struct Yield;
-        template<> struct Yield<cv::GMat>
-        {
-            static inline cv::GMat yield(cv::GCall &call, int i) { return call.yield(i); }
-        };
-        template<> struct Yield<cv::GMatP>
-        {
-            static inline cv::GMatP yield(cv::GCall &call, int i) { return call.yieldP(i); }
-        };
-        template<> struct Yield<cv::GScalar>
-        {
-            static inline cv::GScalar yield(cv::GCall &call, int i) { return call.yieldScalar(i); }
-        };
-        template<typename U> struct Yield<cv::GArray<U> >
-        {
-            static inline cv::GArray<U> yield(cv::GCall &call, int i) { return call.yieldArray<U>(i); }
-        };
-        template<typename U> struct Yield<cv::GOpaque<U> >
-        {
-            static inline cv::GOpaque<U> yield(cv::GCall &call, int i) { return call.yieldOpaque<U>(i); }
-        };
-    } // anonymous namespace
+        static inline cv::GMat yield(cv::GCall &call, int i) { return call.yield(i); }
+    };
+    template<> struct Yield<cv::GMatP>
+    {
+        static inline cv::GMatP yield(cv::GCall &call, int i) { return call.yieldP(i); }
+    };
+    template<> struct Yield<cv::GScalar>
+    {
+        static inline cv::GScalar yield(cv::GCall &call, int i) { return call.yieldScalar(i); }
+    };
+    template<typename U> struct Yield<cv::GArray<U> >
+    {
+        static inline cv::GArray<U> yield(cv::GCall &call, int i) { return call.yieldArray<U>(i); }
+    };
+    template<typename U> struct Yield<cv::GOpaque<U> >
+    {
+        static inline cv::GOpaque<U> yield(cv::GCall &call, int i) { return call.yieldOpaque<U>(i); }
+    };
 
     ////////////////////////////////////////////////////////////////////////////
     // Helper classes which brings outputMeta() marshalling to kernel
@@ -215,7 +221,8 @@ public:
                               , K::tag()
                               , &K::getOutMeta
                               , {detail::GTypeTraits<R>::shape...}
-                              , {detail::GTypeTraits<Args>::op_kind...}});
+                              , {detail::GTypeTraits<Args>::op_kind...}
+                              , {detail::GObtainCtor<R>::get()...}});
         call.pass(args...); // TODO: std::forward() here?
         return yield(call, typename detail::MkSeq<sizeof...(R)>::type());
     }
@@ -240,7 +247,8 @@ public:
                               , K::tag()
                               , &K::getOutMeta
                               , {detail::GTypeTraits<R>::shape}
-                              , {detail::GTypeTraits<Args>::op_kind...}});
+                              , {detail::GTypeTraits<Args>::op_kind...}
+                              , {detail::GObtainCtor<R>::get()}});
         call.pass(args...);
         return detail::Yield<R>::yield(call, 0);
     }
@@ -459,11 +467,6 @@ namespace gapi {
         std::vector<GTransform> m_transformations;
 
     protected:
-        /// @private
-        // Check if package contains ANY implementation of a kernel API
-        // by API textual id.
-        bool includesAPI(const std::string &id) const;
-
         /// @private
         // Remove ALL implementations of the given API (identified by ID)
         void removeAPI(const std::string &id);
@@ -566,6 +569,9 @@ namespace gapi {
             return includesAPI(KAPI::id());
         }
 
+        /// @private
+        bool includesAPI(const std::string &id) const;
+
         // FIXME: The below comment is wrong, and who needs this function?
         /**
          * @brief Find a kernel (by its API)
diff --git a/modules/gapi/include/opencv2/gapi/gopaque.hpp b/modules/gapi/include/opencv2/gapi/gopaque.hpp
index 3d1394473b..6117971768 100644
--- a/modules/gapi/include/opencv2/gapi/gopaque.hpp
+++ b/modules/gapi/include/opencv2/gapi/gopaque.hpp
@@ -15,6 +15,7 @@
 #include <opencv2/gapi/own/exports.hpp>
 #include <opencv2/gapi/opencv_includes.hpp>
 
+#include <opencv2/gapi/util/any.hpp>
 #include <opencv2/gapi/util/variant.hpp>
 #include <opencv2/gapi/util/throw.hpp>
 #include <opencv2/gapi/util/type_traits.hpp>
@@ -119,6 +120,7 @@ namespace detail
 
         virtual void mov(BasicOpaqueRef &ref) = 0;
         virtual const void* ptr() const = 0;
+        virtual void set(const cv::util::any &a) = 0;
     };
 
     template<typename T> class OpaqueRefT final: public BasicOpaqueRef
@@ -212,6 +214,10 @@ namespace detail
         }
 
         virtual const void* ptr() const override { return &rref(); }
+
+        virtual void set(const cv::util::any &a) override {
+            wref() = util::any_cast<T>(a);
+        }
     };
 
     // This class strips type information from OpaqueRefT<> and makes it usable
@@ -285,6 +291,13 @@ namespace detail
 
         // May be used to uniquely identify this object internally
         const void *ptr() const { return m_ref->ptr(); }
+
+        // Introduced for in-graph meta handling
+        OpaqueRef& operator= (const cv::util::any &a)
+        {
+            m_ref->set(a);
+            return *this;
+        }
     };
 } // namespace detail
 
@@ -295,25 +308,27 @@ namespace detail
 template<typename T> class GOpaque
 {
 public:
-    GOpaque() { putDetails(); }              // Empty constructor
-    explicit GOpaque(detail::GOpaqueU &&ref) // GOpaqueU-based constructor
-        : m_ref(ref) { putDetails(); }       // (used by GCall, not for users)
-
-    detail::GOpaqueU strip() const { return m_ref; }
-
-private:
     // Host type (or Flat type) - the type this GOpaque is actually
     // specified to.
     using HT = typename detail::flatten_g<util::decay_t<T>>::type;
 
-    static void CTor(detail::OpaqueRef& ref) {
-        ref.reset<HT>();
-        ref.storeKind<HT>();
+    GOpaque() { putDetails(); }              // Empty constructor
+    explicit GOpaque(detail::GOpaqueU &&ref) // GOpaqueU-based constructor
+        : m_ref(ref) { putDetails(); }       // (used by GCall, not for users)
+
+    /// @private
+    detail::GOpaqueU strip() const {
+        return m_ref;
     }
+    /// @private
+    static void Ctor(detail::OpaqueRef& ref) {
+        ref.reset<HT>();
+    }
+private:
     void putDetails() {
-        m_ref.setConstructFcn(&CTor);
-        m_ref.specifyType<HT>(); // FIXME: to unify those 2 to avoid excessive dynamic_cast
-        m_ref.storeKind<HT>();   //
+        m_ref.setConstructFcn(&Ctor);
+        m_ref.specifyType<HT>();
+        m_ref.storeKind<HT>();
     }
 
     detail::GOpaqueU m_ref;
diff --git a/modules/gapi/include/opencv2/gapi/gproto.hpp b/modules/gapi/include/opencv2/gapi/gproto.hpp
index fbcccb38ea..f91fcdb2c8 100644
--- a/modules/gapi/include/opencv2/gapi/gproto.hpp
+++ b/modules/gapi/include/opencv2/gapi/gproto.hpp
@@ -135,7 +135,7 @@ GRunArg value_of(const GOrigin &origin);
 // Transform run-time computation arguments into a collection of metadata
 // extracted from that arguments
 GMetaArg  GAPI_EXPORTS descr_of(const GRunArg  &arg );
-GMetaArgs GAPI_EXPORTS descr_of(const GRunArgs &args);
+GMetaArgs GAPI_EXPORTS_W descr_of(const GRunArgs &args);
 
 // Transform run-time operation result argument into metadata extracted from that argument
 // Used to compare the metadata, which generated at compile time with the metadata result operation in run time
diff --git a/modules/gapi/include/opencv2/gapi/gstreaming.hpp b/modules/gapi/include/opencv2/gapi/gstreaming.hpp
index 7079042069..e09cf8d0f7 100644
--- a/modules/gapi/include/opencv2/gapi/gstreaming.hpp
+++ b/modules/gapi/include/opencv2/gapi/gstreaming.hpp
@@ -8,15 +8,99 @@
 #ifndef OPENCV_GAPI_GSTREAMING_COMPILED_HPP
 #define OPENCV_GAPI_GSTREAMING_COMPILED_HPP
 
+#include <memory>
 #include <vector>
 
 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/gapi/util/optional.hpp>
 #include <opencv2/gapi/garg.hpp>
 #include <opencv2/gapi/streaming/source.hpp>
 
 namespace cv {
 
+template<class T> using optional = cv::util::optional<T>;
+
+namespace detail {
+template<typename T> struct wref_spec {
+    using type = T;
+};
+template<typename T> struct wref_spec<std::vector<T> > {
+    using type = T;
+};
+
+template<typename RefHolder>
+struct OptRef {
+    struct OptHolder {
+        virtual void mov(RefHolder &h) = 0;
+        virtual void reset() = 0;
+        virtual ~OptHolder() = default;
+        using Ptr = std::shared_ptr<OptHolder>;
+    };
+    template<class T> struct Holder final: OptHolder {
+        std::reference_wrapper<cv::optional<T> > m_opt_ref;
+
+        explicit Holder(cv::optional<T>& opt) : m_opt_ref(std::ref(opt)) {
+        }
+        virtual void mov(RefHolder &h) override {
+            using U = typename wref_spec<T>::type;
+            m_opt_ref.get() = cv::util::make_optional(std::move(h.template wref<U>()));
+        }
+        virtual void reset() override {
+            m_opt_ref.get().reset();
+        }
+    };
+    template<class T>
+    explicit OptRef(cv::optional<T>& t) : m_opt{new Holder<T>(t)} {}
+    void mov(RefHolder &h) { m_opt->mov(h); }
+    void reset()           { m_opt->reset();}
+private:
+    typename OptHolder::Ptr m_opt;
+};
+using OptionalVectorRef = OptRef<cv::detail::VectorRef>;
+using OptionalOpaqueRef = OptRef<cv::detail::OpaqueRef>;
+} // namespace detail
+
+// TODO: Keep it in sync with GRunArgP (derive the type automatically?)
+using GOptRunArgP = util::variant<
+    optional<cv::Mat>*,
+    optional<cv::RMat>*,
+    optional<cv::Scalar>*,
+    cv::detail::OptionalVectorRef,
+    cv::detail::OptionalOpaqueRef
+>;
+using GOptRunArgsP = std::vector<GOptRunArgP>;
+
+namespace detail {
+
+template<typename T> inline GOptRunArgP wrap_opt_arg(optional<T>& arg) {
+    // By default, T goes to an OpaqueRef. All other types are specialized
+    return GOptRunArgP{OptionalOpaqueRef(arg)};
+}
+
+template<typename T> inline GOptRunArgP wrap_opt_arg(optional<std::vector<T> >& arg) {
+    return GOptRunArgP{OptionalVectorRef(arg)};
+}
+
+template<> inline GOptRunArgP wrap_opt_arg(optional<cv::Mat> &m) {
+    return GOptRunArgP{&m};
+}
+
+template<> inline GOptRunArgP wrap_opt_arg(optional<cv::Scalar> &s) {
+    return GOptRunArgP{&s};
+}
+
+} // namespace detail
+
+// Now cv::gout() may produce an empty vector (see "dynamic graphs"), so
+// there may be a conflict between these two. State here that Opt version
+// _must_ have at least one input for this overload
+template<typename T, typename... Ts>
+inline GOptRunArgsP gout(optional<T>&arg, optional<Ts>&... args)
+{
+    return GOptRunArgsP{ detail::wrap_opt_arg(arg), detail::wrap_opt_arg(args)... };
+}
+
 /**
  * \addtogroup gapi_main_classes
  * @{
@@ -49,11 +133,11 @@ namespace cv {
  *
  * @sa GCompiled
  */
-class GAPI_EXPORTS GStreamingCompiled
+class GAPI_EXPORTS_W_SIMPLE GStreamingCompiled
 {
 public:
     class GAPI_EXPORTS Priv;
-    GStreamingCompiled();
+    GAPI_WRAP GStreamingCompiled();
 
     // FIXME: More overloads?
     /**
@@ -96,7 +180,7 @@ public:
      * @param ins vector of inputs to process.
      * @sa gin
      */
-    void setSource(GRunArgs &&ins);
+    GAPI_WRAP void setSource(GRunArgs &&ins);
 
     /**
      * @brief Specify an input video stream for a single-input
@@ -109,7 +193,23 @@ public:
      * @param s a shared pointer to IStreamSource representing the
      * input video stream.
      */
-    void setSource(const gapi::wip::IStreamSource::Ptr& s);
+    GAPI_WRAP void setSource(const gapi::wip::IStreamSource::Ptr& s);
+
+    /**
+     * @brief Constructs and specifies an input video stream for a
+     * single-input computation pipeline with the given parameters.
+     *
+     * Throws if pipeline is already running. Use stop() and then
+     * setSource() to run the graph on a new video stream.
+     *
+     * @overload
+     * @param args arguments used to contruct and initialize a stream
+     * source.
+     */
+    template<typename T, typename... Args>
+    void setSource(Args&&... args) {
+        setSource(cv::gapi::wip::make_src<T>(std::forward<Args>(args)...));
+    }
 
     /**
      * @brief Start the pipeline execution.
@@ -126,7 +226,7 @@ public:
      * start()/stop()/setSource() may be called on the same object in
      * multiple threads in your application.
      */
-    void start();
+    GAPI_WRAP void start();
 
     /**
      * @brief Get the next processed frame from the pipeline.
@@ -150,6 +250,47 @@ public:
      */
     bool pull(cv::GRunArgsP &&outs);
 
+    // NB: Used from python
+    GAPI_WRAP std::tuple<bool, cv::GRunArgs> pull();
+
+    /**
+     * @brief Get some next available data from the pipeline.
+     *
+     * This method takes a vector of cv::optional object. An object is
+     * assigned to some value if this value is available (ready) at
+     * the time of the call, and resets the object to empty() if it is
+     * not.
+     *
+     * This is a blocking method which guarantees that some data has
+     * been written to the output vector on return.
+     *
+     * Using this method only makes sense if the graph has
+     * desynchronized parts (see cv::gapi::desync). If there is no
+     * desynchronized parts in the graph, the behavior of this
+     * method is identical to the regular pull() (all data objects are
+     * produced synchronously in the output vector).
+     *
+     * Use gout() to create an output parameter vector.
+     *
+     * Output vectors must have the same number of elements as defined
+     * in the cv::GComputation protocol (at the moment of its
+     * construction). Shapes of elements also must conform to protocol
+     * (e.g. cv::optional<cv::Mat> needs to be passed where cv::GMat
+     * has been declared as output, and so on). Run-time exception is
+     * generated on type mismatch.
+     *
+     * This method writes new data into objects passed via output
+     * vector.  If there is no data ready yet, this method blocks. Use
+     * try_pull() if you need a non-blocking version.
+     *
+     * @param outs vector of output parameters to obtain.
+     * @return true if next result has been obtained,
+     *    false marks end of the stream.
+     *
+     * @sa cv::gapi::desync
+     */
+    bool pull(cv::GOptRunArgsP &&outs);
+
     /**
      * @brief Try to get the next processed frame from the pipeline.
      *
@@ -172,7 +313,7 @@ public:
      *
      * Throws if the pipeline is not running.
      */
-    void stop();
+    GAPI_WRAP void stop();
 
     /**
      * @brief Test if the pipeline is running.
@@ -184,7 +325,7 @@ public:
      *
      * @return true if the current stream is not over yet.
      */
-    bool running() const;
+    GAPI_WRAP bool running() const;
 
     /// @private
     Priv& priv();
diff --git a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
index c9800b2b16..2e8dcb1aec 100644
--- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
+++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
@@ -191,6 +191,29 @@ namespace detail
 
     template<typename T> using wrap_gapi_helper = WrapValue<typename std::decay<T>::type>;
     template<typename T> using wrap_host_helper = WrapValue<typename std::decay<g_type_of_t<T> >::type>;
+
+// Union type for various user-defined type constructors (GArray<T>,
+// GOpaque<T>, etc)
+//
+// TODO: Replace construct-only API with a more generic one (probably
+//    with bits of introspection)
+//
+// Not required for non-user-defined types (GMat, GScalar, etc)
+using HostCtor = util::variant
+    < util::monostate
+    , detail::ConstructVec
+    , detail::ConstructOpaque
+    >;
+
+template<typename T> struct GObtainCtor {
+    static HostCtor get() { return HostCtor{}; }
+};
+template<typename T> struct GObtainCtor<GArray<T> > {
+    static HostCtor get() { return HostCtor{ConstructVec{&GArray<T>::VCtor}}; };
+};
+template<typename T> struct GObtainCtor<GOpaque<T> > {
+    static HostCtor get() { return HostCtor{ConstructOpaque{&GOpaque<T>::Ctor}}; };
+};
 } // namespace detail
 } // namespace cv
 
diff --git a/modules/gapi/include/opencv2/gapi/imgproc.hpp b/modules/gapi/include/opencv2/gapi/imgproc.hpp
index b4905e932b..7435ec1e1d 100644
--- a/modules/gapi/include/opencv2/gapi/imgproc.hpp
+++ b/modules/gapi/include/opencv2/gapi/imgproc.hpp
@@ -21,14 +21,45 @@
 @{
     @defgroup gapi_filters Graph API: Image filters
     @defgroup gapi_colorconvert Graph API: Converting image from one color space to another
+    @defgroup gapi_feature Graph API: Image Feature Detection
+    @defgroup gapi_shape Graph API: Image Structural Analysis and Shape Descriptors
 @}
  */
 
+namespace {
+void validateFindingContoursMeta(const int depth, const int chan, const int mode)
+{
+    GAPI_Assert(chan == 1);
+    switch (mode)
+    {
+    case cv::RETR_CCOMP:
+        GAPI_Assert(depth == CV_8U || depth == CV_32S);
+        break;
+    case cv::RETR_FLOODFILL:
+        GAPI_Assert(depth == CV_32S);
+        break;
+    default:
+        GAPI_Assert(depth == CV_8U);
+        break;
+    }
+}
+
+// Checks if the passed mat is a set of n-dimentional points of the given depth
+bool isPointsVector(const int chan, const cv::Size &size, const int depth,
+                    const int n, const int ddepth = -1)
+{
+    return (ddepth == depth || ddepth < 0) &&
+           ((chan == n && (size.height == 1 || size.width == 1)) ||
+            (chan == 1 && size.width == n));
+}
+} // anonymous namespace
+
 namespace cv { namespace gapi {
 
 namespace imgproc {
     using GMat2 = std::tuple<GMat,GMat>;
     using GMat3 = std::tuple<GMat,GMat,GMat>; // FIXME: how to avoid this?
+    using GFindContoursOutput = std::tuple<GArray<GArray<Point>>,GArray<Vec4i>>;
 
     G_TYPED_KERNEL(GFilter2D, <GMat(GMat,int,Mat,Point,Scalar,int,Scalar)>,"org.opencv.imgproc.filters.filter2D") {
         static GMatDesc outMeta(GMatDesc in, int ddepth, Mat, Point, Scalar, int, Scalar) {
@@ -78,6 +109,14 @@ namespace imgproc {
         }
     };
 
+    G_TYPED_KERNEL(GMorphologyEx, <GMat(GMat,MorphTypes,Mat,Point,int,BorderTypes,Scalar)>,
+                   "org.opencv.imgproc.filters.morphologyEx") {
+        static GMatDesc outMeta(const GMatDesc &in, MorphTypes, Mat, Point, int,
+                                BorderTypes, Scalar) {
+            return in;
+        }
+    };
+
     G_TYPED_KERNEL(GSobel, <GMat(GMat,int,int,int,int,double,double,int,Scalar)>, "org.opencv.imgproc.filters.sobel") {
         static GMatDesc outMeta(GMatDesc in, int ddepth, int, int, int, double, double, int, Scalar) {
             return in.withDepth(ddepth);
@@ -110,7 +149,7 @@ namespace imgproc {
         }
     };
 
-    G_TYPED_KERNEL(GCanny, <GMat(GMat,double,double,int,bool)>, "org.opencv.imgproc.canny"){
+    G_TYPED_KERNEL(GCanny, <GMat(GMat,double,double,int,bool)>, "org.opencv.imgproc.feature.canny"){
         static GMatDesc outMeta(GMatDesc in, double, double, int, bool) {
             return in.withType(CV_8U, 1);
         }
@@ -118,12 +157,153 @@ namespace imgproc {
 
     G_TYPED_KERNEL(GGoodFeatures,
                    <cv::GArray<cv::Point2f>(GMat,int,double,double,Mat,int,bool,double)>,
-                   "org.opencv.imgproc.goodFeaturesToTrack") {
+                   "org.opencv.imgproc.feature.goodFeaturesToTrack") {
         static GArrayDesc outMeta(GMatDesc, int, double, double, const Mat&, int, bool, double) {
             return empty_array_desc();
         }
     };
 
+    using RetrMode = RetrievalModes;
+    using ContMethod = ContourApproximationModes;
+    G_TYPED_KERNEL(GFindContours, <GArray<GArray<Point>>(GMat,RetrMode,ContMethod,GOpaque<Point>)>,
+                   "org.opencv.imgproc.shape.findContours")
+    {
+        static GArrayDesc outMeta(GMatDesc in, RetrMode mode, ContMethod, GOpaqueDesc)
+        {
+            validateFindingContoursMeta(in.depth, in.chan, mode);
+            return empty_array_desc();
+        }
+    };
+
+    // FIXME oc: make default value offset = Point()
+    G_TYPED_KERNEL(GFindContoursNoOffset, <GArray<GArray<Point>>(GMat,RetrMode,ContMethod)>,
+                   "org.opencv.imgproc.shape.findContoursNoOffset")
+    {
+        static GArrayDesc outMeta(GMatDesc in, RetrMode mode, ContMethod)
+        {
+            validateFindingContoursMeta(in.depth, in.chan, mode);
+            return empty_array_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFindContoursH,<GFindContoursOutput(GMat,RetrMode,ContMethod,GOpaque<Point>)>,
+                   "org.opencv.imgproc.shape.findContoursH")
+    {
+        static std::tuple<GArrayDesc,GArrayDesc>
+        outMeta(GMatDesc in, RetrMode mode, ContMethod, GOpaqueDesc)
+        {
+            validateFindingContoursMeta(in.depth, in.chan, mode);
+            return std::make_tuple(empty_array_desc(), empty_array_desc());
+        }
+    };
+
+    // FIXME oc: make default value offset = Point()
+    G_TYPED_KERNEL(GFindContoursHNoOffset,<GFindContoursOutput(GMat,RetrMode,ContMethod)>,
+                   "org.opencv.imgproc.shape.findContoursHNoOffset")
+    {
+        static std::tuple<GArrayDesc,GArrayDesc>
+        outMeta(GMatDesc in, RetrMode mode, ContMethod)
+        {
+            validateFindingContoursMeta(in.depth, in.chan, mode);
+            return std::make_tuple(empty_array_desc(), empty_array_desc());
+        }
+    };
+
+    G_TYPED_KERNEL(GBoundingRectMat, <GOpaque<Rect>(GMat)>,
+                   "org.opencv.imgproc.shape.boundingRectMat") {
+        static GOpaqueDesc outMeta(GMatDesc in) {
+            GAPI_Assert((in.depth == CV_8U && in.chan == 1) ||
+                        (isPointsVector(in.chan, in.size, in.depth, 2, CV_32S) ||
+                         isPointsVector(in.chan, in.size, in.depth, 2, CV_32F)));
+
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GBoundingRectVector32S, <GOpaque<Rect>(GArray<Point2i>)>,
+                   "org.opencv.imgproc.shape.boundingRectVector32S") {
+        static GOpaqueDesc outMeta(GArrayDesc) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GBoundingRectVector32F, <GOpaque<Rect>(GArray<Point2f>)>,
+                   "org.opencv.imgproc.shape.boundingRectVector32F") {
+        static GOpaqueDesc outMeta(GArrayDesc) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine2DMat, <GOpaque<Vec4f>(GMat,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine2DMat") {
+        static GOpaqueDesc outMeta(GMatDesc in,DistanceTypes,double,double,double) {
+            GAPI_Assert(isPointsVector(in.chan, in.size, in.depth, 2, -1));
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine2DVector32S,
+                   <GOpaque<Vec4f>(GArray<Point2i>,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine2DVector32S") {
+        static GOpaqueDesc outMeta(GArrayDesc,DistanceTypes,double,double,double) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine2DVector32F,
+                   <GOpaque<Vec4f>(GArray<Point2f>,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine2DVector32F") {
+        static GOpaqueDesc outMeta(GArrayDesc,DistanceTypes,double,double,double) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine2DVector64F,
+                   <GOpaque<Vec4f>(GArray<Point2d>,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine2DVector64F") {
+        static GOpaqueDesc outMeta(GArrayDesc,DistanceTypes,double,double,double) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine3DMat, <GOpaque<Vec6f>(GMat,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine3DMat") {
+        static GOpaqueDesc outMeta(GMatDesc in,int,double,double,double) {
+            GAPI_Assert(isPointsVector(in.chan, in.size, in.depth, 3, -1));
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine3DVector32S,
+                   <GOpaque<Vec6f>(GArray<Point3i>,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine3DVector32S") {
+        static GOpaqueDesc outMeta(GArrayDesc,DistanceTypes,double,double,double) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine3DVector32F,
+                   <GOpaque<Vec6f>(GArray<Point3f>,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine3DVector32F") {
+        static GOpaqueDesc outMeta(GArrayDesc,DistanceTypes,double,double,double) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GFitLine3DVector64F,
+                   <GOpaque<Vec6f>(GArray<Point3d>,DistanceTypes,double,double,double)>,
+                   "org.opencv.imgproc.shape.fitLine3DVector64F") {
+        static GOpaqueDesc outMeta(GArrayDesc,DistanceTypes,double,double,double) {
+            return empty_gopaque_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GBGR2RGB, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.bgr2rgb") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
     G_TYPED_KERNEL(GRGB2YUV, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.rgb2yuv") {
         static GMatDesc outMeta(GMatDesc in) {
             return in; // type still remains CV_8UC3;
@@ -136,6 +316,42 @@ namespace imgproc {
         }
     };
 
+    G_TYPED_KERNEL(GBGR2I420, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.bgr2i420") {
+        static GMatDesc outMeta(GMatDesc in) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 3);
+            GAPI_Assert(in.size.height % 2 == 0);
+            return in.withType(in.depth, 1).withSize(Size(in.size.width, in.size.height * 3 / 2));
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2I420, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.rgb2i420") {
+        static GMatDesc outMeta(GMatDesc in) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 3);
+            GAPI_Assert(in.size.height % 2 == 0);
+            return in.withType(in.depth, 1).withSize(Size(in.size.width, in.size.height * 3 / 2));
+        }
+    };
+
+    G_TYPED_KERNEL(GI4202BGR, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.i4202bgr") {
+        static GMatDesc outMeta(GMatDesc in) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 1);
+            GAPI_Assert(in.size.height % 3 == 0);
+            return in.withType(in.depth, 3).withSize(Size(in.size.width, in.size.height * 2 / 3));
+        }
+    };
+
+    G_TYPED_KERNEL(GI4202RGB, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.i4202rgb") {
+        static GMatDesc outMeta(GMatDesc in) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 1);
+            GAPI_Assert(in.size.height % 3 == 0);
+            return in.withType(in.depth, 3).withSize(Size(in.size.width, in.size.height * 2 / 3));
+        }
+    };
+
     G_TYPED_KERNEL(GNV12toRGB, <GMat(GMat, GMat)>, "org.opencv.imgproc.colorconvert.nv12torgb") {
         static GMatDesc outMeta(GMatDesc in_y, GMatDesc in_uv) {
             GAPI_Assert(in_y.chan == 1);
@@ -230,7 +446,7 @@ namespace imgproc {
         }
     };
 
-    G_TYPED_KERNEL(GNV12toRGBp, <GMatP(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12torgbp") {
+    G_TYPED_KERNEL(GNV12toRGBp, <GMatP(GMat,GMat)>, "org.opencv.imgproc.colorconvert.nv12torgbp") {
         static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
             GAPI_Assert(inY.depth == CV_8U);
             GAPI_Assert(inUV.depth == CV_8U);
@@ -244,7 +460,7 @@ namespace imgproc {
         }
     };
 
-    G_TYPED_KERNEL(GNV12toGray, <GMat(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12togray") {
+    G_TYPED_KERNEL(GNV12toGray, <GMat(GMat,GMat)>, "org.opencv.imgproc.colorconvert.nv12togray") {
         static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
             GAPI_Assert(inY.depth   == CV_8U);
             GAPI_Assert(inUV.depth  == CV_8U);
@@ -259,7 +475,7 @@ namespace imgproc {
         }
     };
 
-    G_TYPED_KERNEL(GNV12toBGRp, <GMatP(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12tobgrp") {
+    G_TYPED_KERNEL(GNV12toBGRp, <GMatP(GMat,GMat)>, "org.opencv.imgproc.colorconvert.nv12tobgrp") {
         static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
             GAPI_Assert(inY.depth == CV_8U);
             GAPI_Assert(inUV.depth == CV_8U);
@@ -455,7 +671,7 @@ The median filter uses cv::BORDER_REPLICATE internally to cope with border pixel
 @param ksize aperture linear size; it must be odd and greater than 1, for example: 3, 5, 7 ...
 @sa  boxFilter, gaussianBlur
  */
-GAPI_EXPORTS GMat medianBlur(const GMat& src, int ksize);
+GAPI_EXPORTS_W GMat medianBlur(const GMat& src, int ksize);
 
 /** @brief Erodes an image by using a specific structuring element.
 
@@ -479,7 +695,7 @@ anchor is at the element center.
 @param iterations number of times erosion is applied.
 @param borderType pixel extrapolation method, see cv::BorderTypes
 @param borderValue border value in case of a constant border
-@sa  dilate
+@sa  dilate, morphologyEx
  */
 GAPI_EXPORTS GMat erode(const GMat& src, const Mat& kernel, const Point& anchor = Point(-1,-1), int iterations = 1,
                         int borderType = BORDER_CONSTANT,
@@ -554,6 +770,37 @@ GAPI_EXPORTS GMat dilate3x3(const GMat& src, int iterations = 1,
                             int borderType = BORDER_CONSTANT,
                             const  Scalar& borderValue = morphologyDefaultBorderValue());
 
+/** @brief Performs advanced morphological transformations.
+
+The function can perform advanced morphological transformations using an erosion and dilation as
+basic operations.
+
+Any of the operations can be done in-place. In case of multi-channel images, each channel is
+processed independently.
+
+@note Function textual ID is "org.opencv.imgproc.filters.morphologyEx"
+
+@param src Input image.
+@param op Type of a morphological operation, see #MorphTypes
+@param kernel Structuring element. It can be created using #getStructuringElement.
+@param anchor Anchor position within the element. Both negative values mean that the anchor is at
+the kernel center.
+@param iterations Number of times erosion and dilation are applied.
+@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
+@param borderValue Border value in case of a constant border. The default value has a special
+meaning.
+@sa  dilate, erode, getStructuringElement
+@note The number of iterations is the number of times erosion or dilatation operation will be
+applied. For instance, an opening operation (#MORPH_OPEN) with two iterations is equivalent to
+apply successively: erode -> erode -> dilate -> dilate
+(and not erode -> dilate -> erode -> dilate).
+ */
+GAPI_EXPORTS GMat morphologyEx(const GMat &src, const MorphTypes op, const Mat &kernel,
+                               const Point       &anchor      = Point(-1,-1),
+                               const int          iterations  = 1,
+                               const BorderTypes  borderType  = BORDER_CONSTANT,
+                               const Scalar      &borderValue = morphologyDefaultBorderValue());
+
 /** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator.
 
 In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to
@@ -719,6 +966,10 @@ proportional to sigmaSpace.
 GAPI_EXPORTS GMat bilateralFilter(const GMat& src, int d, double sigmaColor, double sigmaSpace,
                                   int borderType = BORDER_DEFAULT);
 
+//! @} gapi_filters
+
+//! @addtogroup gapi_feature
+//! @{
 /** @brief Finds edges in an image using the Canny algorithm.
 
 The function finds edges in the input image and marks them in the output map edges using the
@@ -726,7 +977,7 @@ Canny algorithm. The smallest value between threshold1 and threshold2 is used fo
 largest value is used to find initial segments of strong edges. See
 <http://en.wikipedia.org/wiki/Canny_edge_detector>
 
-@note Function textual ID is "org.opencv.imgproc.filters.canny"
+@note Function textual ID is "org.opencv.imgproc.feature.canny"
 
 @param image 8-bit input image.
 @param threshold1 first threshold for the hysteresis procedure.
@@ -761,7 +1012,7 @@ The function can be used to initialize a point-based tracker of an object.
 A \> B, the vector of returned corners with qualityLevel=A will be the prefix of the output vector
 with qualityLevel=B .
 
-@note Function textual ID is "org.opencv.imgproc.goodFeaturesToTrack"
+@note Function textual ID is "org.opencv.imgproc.feature.goodFeaturesToTrack"
 
 @param image Input 8-bit or floating-point 32-bit, single-channel image.
 @param maxCorners Maximum number of corners to return. If there are more corners than are found,
@@ -784,7 +1035,7 @@ or #cornerMinEigenVal.
 
 @return vector of detected corners.
  */
-GAPI_EXPORTS GArray<Point2f> goodFeaturesToTrack(const GMat  &image,
+GAPI_EXPORTS_W GArray<Point2f> goodFeaturesToTrack(const GMat  &image,
                                                        int    maxCorners,
                                                        double qualityLevel,
                                                        double minDistance,
@@ -795,6 +1046,8 @@ GAPI_EXPORTS GArray<Point2f> goodFeaturesToTrack(const GMat  &image,
 
 /** @brief Equalizes the histogram of a grayscale image.
 
+//! @} gapi_feature
+
 The function equalizes the histogram of the input image using the following algorithm:
 
 - Calculate the histogram \f$H\f$ for src .
@@ -812,10 +1065,281 @@ The algorithm normalizes the brightness and increases the contrast of the image.
  */
 GAPI_EXPORTS GMat equalizeHist(const GMat& src);
 
-//! @} gapi_filters
+//! @addtogroup gapi_shape
+//! @{
+/** @brief Finds contours in a binary image.
+
+The function retrieves contours from the binary image using the algorithm @cite Suzuki85 .
+The contours are a useful tool for shape analysis and object detection and recognition.
+See squares.cpp in the OpenCV sample directory.
+
+@note Function textual ID is "org.opencv.imgproc.shape.findContours"
+
+@param src Input gray-scale image @ref CV_8UC1. Non-zero pixels are treated as 1's. Zero
+pixels remain 0's, so the image is treated as binary . You can use #compare, #inRange, #threshold ,
+#adaptiveThreshold, #Canny, and others to create a binary image out of a grayscale or color one.
+If mode equals to #RETR_CCOMP, the input can also be a 32-bit integer
+image of labels ( @ref CV_32SC1 ). If #RETR_FLOODFILL then @ref CV_32SC1 is supported only.
+@param mode Contour retrieval mode, see #RetrievalModes
+@param method Contour approximation method, see #ContourApproximationModes
+@param offset Optional offset by which every contour point is shifted. This is useful if the
+contours are extracted from the image ROI and then they should be analyzed in the whole image
+context.
+
+@return GArray of detected contours. Each contour is stored as a GArray of points.
+ */
+GAPI_EXPORTS GArray<GArray<Point>>
+findContours(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method,
+             const GOpaque<Point> &offset);
+
+// FIXME oc: make default value offset = Point()
+/** @overload
+@note Function textual ID is "org.opencv.imgproc.shape.findContoursNoOffset"
+ */
+GAPI_EXPORTS GArray<GArray<Point>>
+findContours(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method);
+
+/** @brief Finds contours and their hierarchy in a binary image.
+
+The function retrieves contours from the binary image using the algorithm @cite Suzuki85
+and calculates their hierarchy.
+The contours are a useful tool for shape analysis and object detection and recognition.
+See squares.cpp in the OpenCV sample directory.
+
+@note Function textual ID is "org.opencv.imgproc.shape.findContoursH"
+
+@param src Input gray-scale image @ref CV_8UC1. Non-zero pixels are treated as 1's. Zero
+pixels remain 0's, so the image is treated as binary . You can use #compare, #inRange, #threshold ,
+#adaptiveThreshold, #Canny, and others to create a binary image out of a grayscale or color one.
+If mode equals to #RETR_CCOMP, the input can also be a 32-bit integer
+image of labels ( @ref CV_32SC1 ). If #RETR_FLOODFILL -- @ref CV_32SC1 supports only.
+@param mode Contour retrieval mode, see #RetrievalModes
+@param method Contour approximation method, see #ContourApproximationModes
+@param offset Optional offset by which every contour point is shifted. This is useful if the
+contours are extracted from the image ROI and then they should be analyzed in the whole image
+context.
+
+@return GArray of detected contours. Each contour is stored as a GArray of points.
+@return Optional output GArray of cv::Vec4i, containing information about the image topology.
+It has as many elements as the number of contours. For each i-th contour contours[i], the elements
+hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are set to 0-based
+indices in contours of the next and previous contours at the same hierarchical level, the first
+child contour and the parent contour, respectively. If for the contour i there are no next,
+previous, parent, or nested contours, the corresponding elements of hierarchy[i] will be negative.
+ */
+GAPI_EXPORTS std::tuple<GArray<GArray<Point>>,GArray<Vec4i>>
+findContoursH(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method,
+              const GOpaque<Point> &offset);
+
+// FIXME oc: make default value offset = Point()
+/** @overload
+@note Function textual ID is "org.opencv.imgproc.shape.findContoursHNoOffset"
+ */
+GAPI_EXPORTS std::tuple<GArray<GArray<Point>>,GArray<Vec4i>>
+findContoursH(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method);
+
+/** @brief Calculates the up-right bounding rectangle of a point set or non-zero pixels
+of gray-scale image.
+
+The function calculates and returns the minimal up-right bounding rectangle for the specified
+point set or non-zero pixels of gray-scale image.
+
+@note Function textual ID is "org.opencv.imgproc.shape.boundingRectMat"
+
+@param src Input gray-scale image @ref CV_8UC1; or input set of @ref CV_32S or @ref CV_32F
+2D points stored in Mat.
+
+@note In case of a 2D points' set given, Mat should be 2-dimensional, have a single row or column
+if there are 2 channels, or have 2 columns if there is a single channel. Mat should have either
+@ref CV_32S or @ref CV_32F depth
+ */
+GAPI_EXPORTS GOpaque<Rect> boundingRect(const GMat& src);
+
+/** @overload
+
+Calculates the up-right bounding rectangle of a point set.
+
+@note Function textual ID is "org.opencv.imgproc.shape.boundingRectVector32S"
+
+@param src Input 2D point set, stored in std::vector<cv::Point2i>.
+ */
+GAPI_EXPORTS GOpaque<Rect> boundingRect(const GArray<Point2i>& src);
+
+/** @overload
+
+Calculates the up-right bounding rectangle of a point set.
+
+@note Function textual ID is "org.opencv.imgproc.shape.boundingRectVector32F"
+
+@param src Input 2D point set, stored in std::vector<cv::Point2f>.
+ */
+GAPI_EXPORTS GOpaque<Rect> boundingRect(const GArray<Point2f>& src);
+
+/** @brief Fits a line to a 2D point set.
+
+The function fits a line to a 2D point set by minimizing \f$\sum_i \rho(r_i)\f$ where
+\f$r_i\f$ is a distance between the \f$i^{th}\f$ point, the line and \f$\rho(r)\f$ is a distance
+function, one of the following:
+-  DIST_L2
+\f[\rho (r) = r^2/2  \quad \text{(the simplest and the fastest least-squares method)}\f]
+- DIST_L1
+\f[\rho (r) = r\f]
+- DIST_L12
+\f[\rho (r) = 2  \cdot ( \sqrt{1 + \frac{r^2}{2}} - 1)\f]
+- DIST_FAIR
+\f[\rho \left (r \right ) = C^2  \cdot \left (  \frac{r}{C} -  \log{\left(1 + \frac{r}{C}\right)} \right )  \quad \text{where} \quad C=1.3998\f]
+- DIST_WELSCH
+\f[\rho \left (r \right ) =  \frac{C^2}{2} \cdot \left ( 1 -  \exp{\left(-\left(\frac{r}{C}\right)^2\right)} \right )  \quad \text{where} \quad C=2.9846\f]
+- DIST_HUBER
+\f[\rho (r) =  \fork{r^2/2}{if \(r < C\)}{C \cdot (r-C/2)}{otherwise} \quad \text{where} \quad C=1.345\f]
+
+The algorithm is based on the M-estimator ( <http://en.wikipedia.org/wiki/M-estimator> ) technique
+that iteratively fits the line using the weighted least-squares algorithm. After each iteration the
+weights \f$w_i\f$ are adjusted to be inversely proportional to \f$\rho(r_i)\f$ .
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine2DMat"
+
+@param src Input set of 2D points stored in one of possible containers: Mat,
+std::vector<cv::Point2i>, std::vector<cv::Point2f>, std::vector<cv::Point2d>.
+
+@note In case of an N-dimentional points' set given, Mat should be 2-dimensional, have a single row
+or column if there are N channels, or have N columns if there is a single channel.
+
+@param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER
+and @ref DIST_C are not suppored.
+@param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value
+is chosen.
+@param reps Sufficient accuracy for the radius (distance between the coordinate origin and the
+line). 1.0 would be a good default value for reps. If it is 0, a default value is chosen.
+@param aeps Sufficient accuracy for the angle. 0.01 would be a good default value for aeps.
+If it is 0, a default value is chosen.
+
+@return Output line parameters: a vector of 4 elements (like Vec4f) - (vx, vy, x0, y0),
+where (vx, vy) is a normalized vector collinear to the line and (x0, y0) is a point on the line.
+ */
+GAPI_EXPORTS GOpaque<Vec4f> fitLine2D(const GMat& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+/** @overload
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine2DVector32S"
+
+ */
+GAPI_EXPORTS GOpaque<Vec4f> fitLine2D(const GArray<Point2i>& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+/** @overload
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine2DVector32F"
+
+ */
+GAPI_EXPORTS GOpaque<Vec4f> fitLine2D(const GArray<Point2f>& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+/** @overload
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine2DVector64F"
+
+ */
+GAPI_EXPORTS GOpaque<Vec4f> fitLine2D(const GArray<Point2d>& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+/** @brief Fits a line to a 3D point set.
+
+The function fits a line to a 3D point set by minimizing \f$\sum_i \rho(r_i)\f$ where
+\f$r_i\f$ is a distance between the \f$i^{th}\f$ point, the line and \f$\rho(r)\f$ is a distance
+function, one of the following:
+-  DIST_L2
+\f[\rho (r) = r^2/2  \quad \text{(the simplest and the fastest least-squares method)}\f]
+- DIST_L1
+\f[\rho (r) = r\f]
+- DIST_L12
+\f[\rho (r) = 2  \cdot ( \sqrt{1 + \frac{r^2}{2}} - 1)\f]
+- DIST_FAIR
+\f[\rho \left (r \right ) = C^2  \cdot \left (  \frac{r}{C} -  \log{\left(1 + \frac{r}{C}\right)} \right )  \quad \text{where} \quad C=1.3998\f]
+- DIST_WELSCH
+\f[\rho \left (r \right ) =  \frac{C^2}{2} \cdot \left ( 1 -  \exp{\left(-\left(\frac{r}{C}\right)^2\right)} \right )  \quad \text{where} \quad C=2.9846\f]
+- DIST_HUBER
+\f[\rho (r) =  \fork{r^2/2}{if \(r < C\)}{C \cdot (r-C/2)}{otherwise} \quad \text{where} \quad C=1.345\f]
+
+The algorithm is based on the M-estimator ( <http://en.wikipedia.org/wiki/M-estimator> ) technique
+that iteratively fits the line using the weighted least-squares algorithm. After each iteration the
+weights \f$w_i\f$ are adjusted to be inversely proportional to \f$\rho(r_i)\f$ .
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine3DMat"
+
+@param src Input set of 3D points stored in one of possible containers: Mat,
+std::vector<cv::Point3i>, std::vector<cv::Point3f>, std::vector<cv::Point3d>.
+
+@note In case of an N-dimentional points' set given, Mat should be 2-dimensional, have a single row
+or column if there are N channels, or have N columns if there is a single channel.
+
+@param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER
+and @ref DIST_C are not suppored.
+@param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value
+is chosen.
+@param reps Sufficient accuracy for the radius (distance between the coordinate origin and the
+line). 1.0 would be a good default value for reps. If it is 0, a default value is chosen.
+@param aeps Sufficient accuracy for the angle. 0.01 would be a good default value for aeps.
+If it is 0, a default value is chosen.
+
+@return Output line parameters: a vector of 6 elements (like Vec6f) - (vx, vy, vz, x0, y0, z0),
+where (vx, vy, vz) is a normalized vector collinear to the line and (x0, y0, z0) is a point on
+the line.
+ */
+GAPI_EXPORTS GOpaque<Vec6f> fitLine3D(const GMat& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+/** @overload
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine3DVector32S"
+
+ */
+GAPI_EXPORTS GOpaque<Vec6f> fitLine3D(const GArray<Point3i>& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+/** @overload
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine3DVector32F"
+
+ */
+GAPI_EXPORTS GOpaque<Vec6f> fitLine3D(const GArray<Point3f>& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+/** @overload
+
+@note Function textual ID is "org.opencv.imgproc.shape.fitLine3DVector64F"
+
+ */
+GAPI_EXPORTS GOpaque<Vec6f> fitLine3D(const GArray<Point3d>& src, const DistanceTypes distType,
+                                      const double param = 0., const double reps = 0.,
+                                      const double aeps = 0.);
+
+//! @} gapi_shape
 
 //! @addtogroup gapi_colorconvert
 //! @{
+/** @brief Converts an image from BGR color space to RGB color space.
+
+The function converts an input image from BGR color space to RGB.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+
+Output image is 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2rgb"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa RGB2BGR
+*/
+GAPI_EXPORTS GMat BGR2RGB(const GMat& src);
+
 /** @brief Converts an image from RGB color space to gray-scaled.
 The conventional ranges for R, G, and B channel values are 0 to 255.
 Resulting gray color value computed as
@@ -826,7 +1350,7 @@ Resulting gray color value computed as
 @param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1.
 @sa RGB2YUV
  */
-GAPI_EXPORTS GMat RGB2Gray(const GMat& src);
+GAPI_EXPORTS_W GMat RGB2Gray(const GMat& src);
 
 /** @overload
 Resulting gray color value computed as
@@ -871,6 +1395,70 @@ Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
 */
 GAPI_EXPORTS GMat RGB2YUV(const GMat& src);
 
+/** @brief Converts an image from BGR color space to I420 color space.
+
+The function converts an input image from BGR color space to I420.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 1-channel image. @ref CV_8UC1.
+Width of I420 output image must be the same as width of input image.
+Height of I420 output image must be equal 3/2 from height of input image.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2i420"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa I4202BGR
+*/
+GAPI_EXPORTS GMat BGR2I420(const GMat& src);
+
+/** @brief Converts an image from RGB color space to I420 color space.
+
+The function converts an input image from RGB color space to I420.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 1-channel image. @ref CV_8UC1.
+Width of I420 output image must be the same as width of input image.
+Height of I420 output image must be equal 3/2 from height of input image.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2i420"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa I4202RGB
+*/
+GAPI_EXPORTS GMat RGB2I420(const GMat& src);
+
+/** @brief Converts an image from I420 color space to BGR color space.
+
+The function converts an input image from I420 color space to BGR.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image. @ref CV_8UC3.
+Width of BGR output image must be the same as width of input image.
+Height of BGR output image must be equal 2/3 from height of input image.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.i4202bgr"
+
+@param src input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@sa BGR2I420
+*/
+GAPI_EXPORTS GMat I4202BGR(const GMat& src);
+
+/** @brief Converts an image from I420 color space to BGR color space.
+
+The function converts an input image from I420 color space to BGR.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image. @ref CV_8UC3.
+Width of RGB output image must be the same as width of input image.
+Height of RGB output image must be equal 2/3 from height of input image.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.i4202rgb"
+
+@param src input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@sa RGB2I420
+*/
+GAPI_EXPORTS GMat I4202RGB(const GMat& src);
+
 /** @brief Converts an image from BGR color space to LUV color space.
 
 The function converts an input image from BGR color space to LUV.
diff --git a/modules/gapi/include/opencv2/gapi/infer.hpp b/modules/gapi/include/opencv2/gapi/infer.hpp
index 50086dd848..b850775a62 100644
--- a/modules/gapi/include/opencv2/gapi/infer.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2019-2020 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_INFER_HPP
@@ -77,6 +77,9 @@ public:
 
     using ResultL = std::tuple< cv::GArray<R>... >;
     using APIList = std::function<ResultL(cv::GArray<cv::Rect>, Args...)>;
+
+    // FIXME: Args... must be limited to a single GMat
+    using APIRoi = std::function<Result(cv::GOpaque<cv::Rect>, Args...)>;
 };
 
 // Single-return-value network definition (specialized base class)
@@ -92,6 +95,9 @@ public:
 
     using ResultL = cv::GArray<R>;
     using APIList = std::function<ResultL(cv::GArray<cv::Rect>, Args...)>;
+
+    // FIXME: Args... must be limited to a single GMat
+    using APIRoi = std::function<Result(cv::GOpaque<cv::Rect>, Args...)>;
 };
 
 // APIList2 is also template to allow different calling options
@@ -114,22 +120,75 @@ struct InferAPIList2 {
 // a particular backend, not by a network itself.
 struct GInferBase {
     static constexpr const char * id() {
-        return "org.opencv.dnn.infer";     // Universal stub
+        return "org.opencv.dnn.infer";            // Universal stub
     }
     static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
-        return GMetaArgs{};                // One more universal stub
+        return GMetaArgs{};                       // One more universal stub
     }
 };
 
+// Struct stores network input/output names.
+// Used by infer<Generic>
+struct InOutInfo
+{
+    std::vector<std::string> in_names;
+    std::vector<std::string> out_names;
+};
+
+/**
+ * @{
+ * @brief G-API object used to collect network inputs
+ */
+class GAPI_EXPORTS_W_SIMPLE GInferInputs
+{
+using Map = std::unordered_map<std::string, GMat>;
+public:
+    GAPI_WRAP GInferInputs();
+    GAPI_WRAP void setInput(const std::string& name, const cv::GMat& value);
+
+    cv::GMat& operator[](const std::string& name);
+    const Map& getBlobs() const;
+
+private:
+    std::shared_ptr<Map> in_blobs;
+};
+/** @} */
+
+/**
+ * @{
+ * @brief G-API object used to collect network outputs
+ */
+struct GAPI_EXPORTS_W_SIMPLE GInferOutputs
+{
+public:
+    GAPI_WRAP GInferOutputs() = default;
+    GInferOutputs(std::shared_ptr<cv::GCall> call);
+    GAPI_WRAP cv::GMat at(const std::string& name);
+
+private:
+    struct Priv;
+    std::shared_ptr<Priv> m_priv;
+};
+/** @} */
+// Base "InferROI" kernel.
+// All notes from "Infer" kernel apply here as well.
+struct GInferROIBase {
+    static constexpr const char * id() {
+        return "org.opencv.dnn.infer-roi";        // Universal stub
+    }
+    static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
+        return GMetaArgs{};                       // One more universal stub
+    }
+};
 
 // Base "Infer list" kernel.
 // All notes from "Infer" kernel apply here as well.
 struct GInferListBase {
     static constexpr const char * id() {
-        return "org.opencv.dnn.infer-roi";      // Universal stub
+        return "org.opencv.dnn.infer-roi-list-1"; // Universal stub
     }
     static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
-        return GMetaArgs{};                     // One more universal stub
+        return GMetaArgs{};                       // One more universal stub
     }
 };
 
@@ -137,10 +196,10 @@ struct GInferListBase {
 // All notes from "Infer" kernel apply here as well.
 struct GInferList2Base {
     static constexpr const char * id() {
-        return "org.opencv.dnn.infer-roi-list"; // Universal stub
+        return "org.opencv.dnn.infer-roi-list-2"; // Universal stub
     }
     static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
-        return GMetaArgs{};                     // One more universal stub
+        return GMetaArgs{};                       // One more universal stub
     }
 };
 
@@ -157,6 +216,19 @@ struct GInfer final
     static constexpr const char* tag() { return Net::tag(); }
 };
 
+// A specific roi-inference kernel. API (::on()) is fixed here and
+// verified against Net.
+template<typename Net>
+struct GInferROI final
+    : public GInferROIBase
+    , public detail::KernelTypeMedium< GInferROI<Net>
+                                     , typename Net::APIRoi > {
+    using GInferROIBase::getOutMeta; // FIXME: name lookup conflict workaround?
+
+    static constexpr const char* tag() { return Net::tag(); }
+};
+
+
 // A generic roi-list inference kernel. API (::on()) is derived from
 // the Net template parameter (see more in infer<> overload).
 template<typename Net>
@@ -195,6 +267,23 @@ struct GInferList2 final
 namespace cv {
 namespace gapi {
 
+/** @brief Calculates response for the specified network (template
+ *     parameter) for the specified region in the source image.
+ *     Currently expects a single-input network only.
+ *
+ * @tparam A network type defined with G_API_NET() macro.
+ * @param in input image where to take ROI from.
+ * @param roi an object describing the region of interest
+ *   in the source image. May be calculated in the same graph dynamically.
+ * @return an object of return type as defined in G_API_NET().
+ *   If a network has multiple return values (defined with a tuple), a tuple of
+ *   objects of appropriate type is returned.
+ * @sa  G_API_NET()
+ */
+template<typename Net>
+typename Net::Result infer(cv::GOpaque<cv::Rect> roi, cv::GMat in) {
+    return GInferROI<Net>::on(roi, in);
+}
 
 /** @brief Calculates responses for the specified network (template
  *     parameter) for every region in the source image.
@@ -254,6 +343,51 @@ typename Net::Result infer(Args&&... args) {
     return GInfer<Net>::on(std::forward<Args>(args)...);
 }
 
+/**
+ * @brief Special network type
+ */
+struct Generic { };
+
+/**
+ * @brief Calculates response for generic network
+ *
+ * @param tag a network tag
+ * @param inputs networks's inputs
+ * @return a GInferOutputs
+ */
+template<typename T = Generic> GInferOutputs
+infer(const std::string& tag, const GInferInputs& inputs)
+{
+    std::vector<GArg> input_args;
+    std::vector<std::string> input_names;
+
+    const auto& blobs = inputs.getBlobs();
+    for (auto&& p : blobs)
+    {
+        input_names.push_back(p.first);
+        input_args.emplace_back(p.second);
+    }
+
+    GKinds kinds(blobs.size(), cv::detail::OpaqueKind::CV_MAT);
+    auto call = std::make_shared<cv::GCall>(GKernel{
+                GInferBase::id(),
+                tag,
+                GInferBase::getOutMeta,
+                {}, // outShape will be filled later
+                std::move(kinds),
+                {}, // outCtors will be filled later
+            });
+
+    call->setArgs(std::move(input_args));
+    call->params() = InOutInfo{input_names, {}};
+
+    return GInferOutputs{std::move(call)};
+}
+
+GAPI_EXPORTS_W inline GInferOutputs infer(const String& name, const GInferInputs& inputs)
+{
+    return infer<Generic>(name, inputs);
+}
 
 } // namespace gapi
 } // namespace cv
@@ -283,8 +417,8 @@ struct GAPI_EXPORTS GNetParam {
  *
  * @sa cv::gapi::networks
  */
-struct GAPI_EXPORTS GNetPackage {
-    GNetPackage() : GNetPackage({}) {}
+struct GAPI_EXPORTS_W_SIMPLE GNetPackage {
+    GAPI_WRAP GNetPackage() : GNetPackage({}) {}
     explicit GNetPackage(std::initializer_list<GNetParam> &&ii);
     std::vector<GBackend> backends() const;
     std::vector<GNetParam> networks;
diff --git a/modules/gapi/include/opencv2/gapi/infer/bindings_ie.hpp b/modules/gapi/include/opencv2/gapi/infer/bindings_ie.hpp
new file mode 100644
index 0000000000..fdd4128b1a
--- /dev/null
+++ b/modules/gapi/include/opencv2/gapi/infer/bindings_ie.hpp
@@ -0,0 +1,56 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_INFER_BINDINGS_IE_HPP
+#define OPENCV_GAPI_INFER_BINDINGS_IE_HPP
+
+#include <opencv2/gapi/util/any.hpp>
+#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp>     // GKernelPackage
+#include <opencv2/gapi/infer/ie.hpp>    // Params
+
+#include <string>
+
+namespace cv {
+namespace gapi {
+namespace ie {
+
+// NB: Used by python wrapper
+// This class can be marked as SIMPLE, because it's implemented as pimpl
+class GAPI_EXPORTS_W_SIMPLE PyParams {
+public:
+    PyParams() = default;
+
+    PyParams(const std::string &tag,
+             const std::string &model,
+             const std::string &weights,
+             const std::string &device);
+
+    PyParams(const std::string &tag,
+             const std::string &model,
+             const std::string &device);
+
+    GBackend      backend() const;
+    std::string   tag()     const;
+    cv::util::any params()  const;
+
+private:
+    std::shared_ptr<Params<cv::gapi::Generic>> m_priv;
+};
+
+GAPI_EXPORTS_W PyParams params(const std::string &tag,
+                               const std::string &model,
+                               const std::string &weights,
+                               const std::string &device);
+
+GAPI_EXPORTS_W PyParams params(const std::string &tag,
+                               const std::string &model,
+                               const std::string &device);
+} // namespace ie
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_INFER_BINDINGS_IE_HPP
diff --git a/modules/gapi/include/opencv2/gapi/infer/ie.hpp b/modules/gapi/include/opencv2/gapi/infer/ie.hpp
index c6d7f272a8..53e31fbb09 100644
--- a/modules/gapi/include/opencv2/gapi/infer/ie.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer/ie.hpp
@@ -11,12 +11,14 @@
 #include <string>
 #include <array>
 #include <tuple> // tuple, tuple_size
+#include <map>
 
 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/util/any.hpp>
 
 #include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
 #include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+#include <opencv2/gapi/infer.hpp>   // Generic
 
 namespace cv {
 namespace gapi {
@@ -41,6 +43,8 @@ enum class TraitAs: int
     IMAGE   //!< G-API traits an associated cv::Mat as an image so creates an "image" blob (NCHW/NHWC, etc)
 };
 
+using IEConfig = std::map<std::string, std::string>;
+
 namespace detail {
     struct ParamDesc {
         std::string model_path;
@@ -58,6 +62,11 @@ namespace detail {
         // (e.g. topology's partial execution)
         std::size_t num_in;  // How many inputs are defined in the operation
         std::size_t num_out; // How many outputs are defined in the operation
+
+        enum class Kind { Load, Import };
+        Kind kind;
+        bool is_generic;
+        IEConfig config;
     };
 } // namespace detail
 
@@ -80,7 +89,19 @@ public:
         : desc{ model, weights, device, {}, {}, {}
               , std::tuple_size<typename Net::InArgs>::value  // num_in
               , std::tuple_size<typename Net::OutArgs>::value // num_out
-              } {
+              , detail::ParamDesc::Kind::Load
+              , false
+              , {}} {
+    };
+
+    Params(const std::string &model,
+           const std::string &device)
+        : desc{ model, {}, device, {}, {}, {}
+              , std::tuple_size<typename Net::InArgs>::value  // num_in
+              , std::tuple_size<typename Net::OutArgs>::value // num_out
+              , detail::ParamDesc::Kind::Import
+              , false
+              , {}} {
     };
 
     Params<Net>& cfgInputLayers(const typename PortCfg<Net>::In &ll) {
@@ -106,18 +127,65 @@ public:
         return *this;
     }
 
+    Params& pluginConfig(IEConfig&& cfg) {
+        desc.config = std::move(cfg);
+        return *this;
+    }
+
+    Params& pluginConfig(const IEConfig& cfg) {
+        desc.config = cfg;
+        return *this;
+    }
+
     // BEGIN(G-API's network parametrization API)
-    GBackend      backend() const { return cv::gapi::ie::backend();  }
-    std::string   tag()     const { return Net::tag(); }
-    cv::util::any params()  const { return { desc }; }
+    GBackend      backend()    const { return cv::gapi::ie::backend();  }
+    std::string   tag()        const { return Net::tag(); }
+    cv::util::any params()     const { return { desc }; }
     // END(G-API's network parametrization API)
 
 protected:
     detail::ParamDesc desc;
 };
 
+template<>
+class Params<cv::gapi::Generic> {
+public:
+    Params(const std::string &tag,
+           const std::string &model,
+           const std::string &weights,
+           const std::string &device)
+        : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}}, m_tag(tag) {
+    };
+
+    Params(const std::string &tag,
+           const std::string &model,
+           const std::string &device)
+        : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}}, m_tag(tag) {
+    };
+
+    Params& pluginConfig(IEConfig&& cfg) {
+        desc.config = std::move(cfg);
+        return *this;
+    }
+
+    Params& pluginConfig(const IEConfig& cfg) {
+        desc.config = cfg;
+        return *this;
+    }
+
+    // BEGIN(G-API's network parametrization API)
+    GBackend      backend()    const { return cv::gapi::ie::backend();  }
+    std::string   tag()        const { return m_tag; }
+    cv::util::any params()     const { return { desc }; }
+    // END(G-API's network parametrization API)
+
+protected:
+    detail::ParamDesc desc;
+    std::string m_tag;
+};
+
 } // namespace ie
 } // namespace gapi
 } // namespace cv
 
-#endif // OPENCV_GAPI_INFER_HPP
+#endif // OPENCV_GAPI_INFER_IE_HPP
diff --git a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp
new file mode 100644
index 0000000000..d61ceb3dca
--- /dev/null
+++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp
@@ -0,0 +1,138 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_INFER_ONNX_HPP
+#define OPENCV_GAPI_INFER_ONNX_HPP
+
+#include <unordered_map>
+#include <string>
+#include <array>
+#include <tuple> // tuple, tuple_size
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/util/any.hpp>
+
+#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+namespace onnx {
+
+GAPI_EXPORTS cv::gapi::GBackend backend();
+
+enum class TraitAs: int {
+    TENSOR, //!< G-API traits an associated cv::Mat as a raw tensor
+            // and passes dimensions as-is
+    IMAGE   //!< G-API traits an associated cv::Mat as an image so
+            // creates an "image" blob (NCHW/NHWC, etc)
+};
+
+using PostProc = std::function<void(const std::unordered_map<std::string, cv::Mat> &,
+                                          std::unordered_map<std::string, cv::Mat> &)>;
+
+
+namespace detail {
+struct ParamDesc {
+    std::string model_path;
+
+    // NB: nun_* may differ from topology's real input/output port numbers
+    // (e.g. topology's partial execution)
+    std::size_t num_in;  // How many inputs are defined in the operation
+    std::size_t num_out; // How many outputs are defined in the operation
+
+    // NB: Here order follows the `Net` API
+    std::vector<std::string> input_names;
+    std::vector<std::string> output_names;
+
+    using ConstInput = std::pair<cv::Mat, TraitAs>;
+    std::unordered_map<std::string, ConstInput> const_inputs;
+
+    std::vector<cv::Scalar> mean;
+    std::vector<cv::Scalar> stdev;
+
+    std::vector<cv::GMatDesc> out_metas;
+    PostProc custom_post_proc;
+
+    std::vector<bool> normalize;
+};
+} // namespace detail
+
+template<typename Net>
+struct PortCfg {
+    using In = std::array
+        < std::string
+        , std::tuple_size<typename Net::InArgs>::value >;
+    using Out = std::array
+        < std::string
+        , std::tuple_size<typename Net::OutArgs>::value >;
+    using NormCoefs = std::array
+        < cv::Scalar
+        , std::tuple_size<typename Net::InArgs>::value >;
+    using Normalize = std::array
+        < bool
+        , std::tuple_size<typename Net::InArgs>::value >;
+};
+
+template<typename Net> class Params {
+public:
+    Params(const std::string &model) {
+        desc.model_path = model;
+        desc.num_in  = std::tuple_size<typename Net::InArgs>::value;
+        desc.num_out = std::tuple_size<typename Net::OutArgs>::value;
+    };
+
+    // BEGIN(G-API's network parametrization API)
+    GBackend      backend() const { return cv::gapi::onnx::backend();  }
+    std::string   tag()     const { return Net::tag(); }
+    cv::util::any params()  const { return { desc }; }
+    // END(G-API's network parametrization API)
+
+    Params<Net>& cfgInputLayers(const typename PortCfg<Net>::In &ll) {
+        desc.input_names.assign(ll.begin(), ll.end());
+        return *this;
+    }
+
+    Params<Net>& cfgOutputLayers(const typename PortCfg<Net>::Out &ll) {
+        desc.output_names.assign(ll.begin(), ll.end());
+        return *this;
+    }
+
+    Params<Net>& constInput(const std::string &layer_name,
+                            const cv::Mat &data,
+                            TraitAs hint = TraitAs::TENSOR) {
+        desc.const_inputs[layer_name] = {data, hint};
+        return *this;
+    }
+
+    Params<Net>& cfgMeanStd(const typename PortCfg<Net>::NormCoefs &m,
+                            const typename PortCfg<Net>::NormCoefs &s) {
+        desc.mean.assign(m.begin(), m.end());
+        desc.stdev.assign(s.begin(), s.end());
+        return *this;
+    }
+
+    Params<Net>& cfgPostProc(const std::vector<cv::GMatDesc> &outs,
+                             const PostProc &pp) {
+        desc.out_metas = outs;
+        desc.custom_post_proc = pp;
+        return *this;
+    }
+
+    Params<Net>& cfgNormalize(const typename PortCfg<Net>::Normalize &n) {
+        desc.normalize.assign(n.begin(), n.end());
+        return *this;
+    }
+
+protected:
+    detail::ParamDesc desc;
+};
+
+} // namespace onnx
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_INFER_HPP
diff --git a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp
index c3488f5799..15742c6e55 100644
--- a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp
@@ -122,4 +122,16 @@ GAPI_EXPORTS std::tuple<GArray<Rect>, GArray<int>> parseYolo(const GMat& in,
 } // namespace gapi
 } // namespace cv
 
+// Reimport parseSSD & parseYolo under their initial namespace
+namespace cv {
+namespace gapi {
+namespace streaming {
+
+using cv::gapi::parseSSD;
+using cv::gapi::parseYolo;
+
+} // namespace streaming
+} // namespace gapi
+} // namespace cv
+
 #endif // OPENCV_GAPI_PARSERS_HPP
diff --git a/modules/gapi/include/opencv2/gapi/media.hpp b/modules/gapi/include/opencv2/gapi/media.hpp
index a7fe258757..f27cb80913 100644
--- a/modules/gapi/include/opencv2/gapi/media.hpp
+++ b/modules/gapi/include/opencv2/gapi/media.hpp
@@ -51,6 +51,7 @@ public:
     View(Ptrs&& ptrs, Strides&& strs, Callback &&cb = [](){});
     View(const View&) = delete;
     View(View&&) = default;
+    View& operator = (const View&) = delete;
     ~View();
 
     Ptrs    ptr;
diff --git a/modules/gapi/include/opencv2/gapi/opencv_includes.hpp b/modules/gapi/include/opencv2/gapi/opencv_includes.hpp
index 5f25fe4af7..08b2d6ed02 100644
--- a/modules/gapi/include/opencv2/gapi/opencv_includes.hpp
+++ b/modules/gapi/include/opencv2/gapi/opencv_includes.hpp
@@ -21,11 +21,12 @@
 #  include <opencv2/gapi/own/mat.hpp>
 // replacement of cv's structures:
 namespace cv {
-    using Rect   = gapi::own::Rect;
-    using Size   = gapi::own::Size;
-    using Point  = gapi::own::Point;
-    using Scalar = gapi::own::Scalar;
-    using Mat    = gapi::own::Mat;
+    using Rect    = gapi::own::Rect;
+    using Size    = gapi::own::Size;
+    using Point   = gapi::own::Point;
+    using Point2f = gapi::own::Point2f;
+    using Scalar  = gapi::own::Scalar;
+    using Mat     = gapi::own::Mat;
 }  // namespace cv
 #endif // !defined(GAPI_STANDALONE)
 
diff --git a/modules/gapi/include/opencv2/gapi/own/types.hpp b/modules/gapi/include/opencv2/gapi/own/types.hpp
index 20445ee0fd..c77a62ca53 100644
--- a/modules/gapi/include/opencv2/gapi/own/types.hpp
+++ b/modules/gapi/include/opencv2/gapi/own/types.hpp
@@ -28,6 +28,16 @@ public:
     int y = 0;
 };
 
+class Point2f
+{
+public:
+    Point2f() = default;
+    Point2f(float _x, float _y) : x(_x),  y(_y)  {};
+
+    float x = 0.f;
+    float y = 0.f;
+};
+
 class Rect
 {
 public:
diff --git a/modules/gapi/include/opencv2/gapi/render/render_types.hpp b/modules/gapi/include/opencv2/gapi/render/render_types.hpp
index 08b14d1ddd..ca403be361 100644
--- a/modules/gapi/include/opencv2/gapi/render/render_types.hpp
+++ b/modules/gapi/include/opencv2/gapi/render/render_types.hpp
@@ -252,7 +252,7 @@ struct Mosaic
     {
     }
 
-    Mosaic() = default;
+    Mosaic() : cellSz(0), decim(0) {}
 
     /*@{*/
     cv::Rect   mos;    //!< Coordinates of the mosaic
diff --git a/modules/gapi/include/opencv2/gapi/rmat.hpp b/modules/gapi/include/opencv2/gapi/rmat.hpp
index 626e67e9ee..f50bd08b65 100644
--- a/modules/gapi/include/opencv2/gapi/rmat.hpp
+++ b/modules/gapi/include/opencv2/gapi/rmat.hpp
@@ -10,6 +10,16 @@
 #include <opencv2/gapi/gmat.hpp>
 #include <opencv2/gapi/own/exports.hpp>
 
+// Forward declaration
+namespace cv {
+namespace gapi {
+namespace s11n {
+    struct IOStream;
+    struct IIStream;
+} // namespace s11n
+} // namespace gapi
+} // namespace cv
+
 namespace cv {
 
 // "Remote Mat", a general class which provides an abstraction layer over the data
@@ -44,11 +54,11 @@ public:
     {
     public:
         using DestroyCallback = std::function<void()>;
+        using stepsT = std::vector<size_t>;
 
         View() = default;
-        View(const GMatDesc& desc, uchar* data, size_t step = 0u, DestroyCallback&& cb = nullptr)
-            : m_desc(desc), m_data(data), m_step(step == 0u ? elemSize()*cols() : step), m_cb(std::move(cb))
-        {}
+        View(const GMatDesc& desc, uchar* data, const stepsT& steps = {}, DestroyCallback&& cb = nullptr);
+        View(const GMatDesc& desc, uchar* data, size_t step, DestroyCallback&& cb = nullptr);
 
         View(const View&) = delete;
         View& operator=(const View&) = delete;
@@ -60,23 +70,30 @@ public:
         const std::vector<int>& dims() const { return m_desc.dims; }
         int cols() const { return m_desc.size.width; }
         int rows() const { return m_desc.size.height; }
-        int type() const { return CV_MAKE_TYPE(depth(), chan()); }
+        int type() const;
         int depth() const { return m_desc.depth; }
         int chan() const { return m_desc.chan; }
         size_t elemSize() const { return CV_ELEM_SIZE(type()); }
 
-        template<typename T = uchar> T* ptr(int y = 0, int x = 0) {
-            return reinterpret_cast<T*>(m_data + m_step*y + x*CV_ELEM_SIZE(type()));
+        template<typename T = uchar> T* ptr(int y = 0) {
+            return reinterpret_cast<T*>(m_data + step()*y);
         }
-        template<typename T = uchar> const T* ptr(int y = 0, int x = 0) const {
-            return reinterpret_cast<const T*>(m_data + m_step*y + x*CV_ELEM_SIZE(type()));
+        template<typename T = uchar> const T* ptr(int y = 0) const {
+            return reinterpret_cast<T*>(m_data + step()*y);
         }
-        size_t step() const { return m_step; }
+        template<typename T = uchar> T* ptr(int y, int x) {
+            return reinterpret_cast<T*>(m_data + step()*y + step(1)*x);
+        }
+        template<typename T = uchar> const T* ptr(int y, int x) const {
+            return reinterpret_cast<const T*>(m_data + step()*y + step(1)*x);
+        }
+        size_t step(size_t i = 0) const { GAPI_DbgAssert(i<m_steps.size()); return m_steps[i]; }
+        const stepsT& steps() const { return m_steps; }
 
     private:
         GMatDesc m_desc;
         uchar* m_data = nullptr;
-        size_t m_step = 0u;
+        stepsT m_steps = {0u};
         DestroyCallback m_cb = nullptr;
     };
 
@@ -90,6 +107,12 @@ public:
         // the view when accessed for writing, to ensure that the data from the view
         // is transferred to the device when the view is destroyed
         virtual View access(Access) = 0;
+        virtual void serialize(cv::gapi::s11n::IOStream&) {
+            GAPI_Assert(false && "Generic serialize method should never be called for RMat adapter");
+        }
+        virtual void deserialize(cv::gapi::s11n::IIStream&) {
+            GAPI_Assert(false && "Generic deserialize method should never be called for RMat adapter");
+        }
     };
     using AdapterP = std::shared_ptr<Adapter>;
 
@@ -113,6 +136,10 @@ public:
         return dynamic_cast<T*>(m_adapter.get());
     }
 
+    void serialize(cv::gapi::s11n::IOStream& os) const {
+        m_adapter->serialize(os);
+    }
+
 private:
     AdapterP m_adapter = nullptr;
 };
diff --git a/modules/gapi/include/opencv2/gapi/s11n.hpp b/modules/gapi/include/opencv2/gapi/s11n.hpp
index 0b61304c5c..0e2c4c239b 100644
--- a/modules/gapi/include/opencv2/gapi/s11n.hpp
+++ b/modules/gapi/include/opencv2/gapi/s11n.hpp
@@ -10,21 +10,25 @@
 #include <vector>
 #include <map>
 #include <unordered_map>
+#include <opencv2/gapi/s11n/base.hpp>
 #include <opencv2/gapi/gcomputation.hpp>
+#include <opencv2/gapi/rmat.hpp>
 
 namespace cv {
 namespace gapi {
 
 namespace detail {
     GAPI_EXPORTS cv::GComputation getGraph(const std::vector<char> &p);
-} // namespace detail
 
-namespace detail {
     GAPI_EXPORTS cv::GMetaArgs getMetaArgs(const std::vector<char> &p);
-} // namespace detail
 
-namespace detail {
     GAPI_EXPORTS cv::GRunArgs getRunArgs(const std::vector<char> &p);
+
+    template<typename... Types>
+    cv::GCompileArgs getCompileArgs(const std::vector<char> &p);
+
+    template<typename RMatAdapterType>
+    cv::GRunArgs getRunArgsWithRMats(const std::vector<char> &p);
 } // namespace detail
 
 GAPI_EXPORTS std::vector<char> serialize(const cv::GComputation &c);
@@ -35,6 +39,7 @@ T deserialize(const std::vector<char> &p);
 
 //} //ananymous namespace
 
+GAPI_EXPORTS std::vector<char> serialize(const cv::GCompileArgs&);
 GAPI_EXPORTS std::vector<char> serialize(const cv::GMetaArgs&);
 GAPI_EXPORTS std::vector<char> serialize(const cv::GRunArgs&);
 
@@ -53,6 +58,17 @@ cv::GRunArgs deserialize(const std::vector<char> &p) {
     return detail::getRunArgs(p);
 }
 
+template<typename T, typename... Types> inline
+typename std::enable_if<std::is_same<T, GCompileArgs>::value, GCompileArgs>::
+type deserialize(const std::vector<char> &p) {
+    return detail::getCompileArgs<Types...>(p);
+}
+
+template<typename T, typename RMatAdapterType> inline
+typename std::enable_if<std::is_same<T, GRunArgs>::value, GRunArgs>::
+type deserialize(const std::vector<char> &p) {
+    return detail::getRunArgsWithRMats<RMatAdapterType>(p);
+}
 } // namespace gapi
 } // namespace cv
 
@@ -91,6 +107,10 @@ struct GAPI_EXPORTS IIStream {
     virtual IIStream& operator>> (std::string &) = 0;
 };
 
+namespace detail {
+GAPI_EXPORTS std::unique_ptr<IIStream> getInStream(const std::vector<char> &p);
+} // namespace detail
+
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 // S11N operators
@@ -101,6 +121,9 @@ struct GAPI_EXPORTS IIStream {
 GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::Point &pt);
 GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::Point &pt);
 
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::Point2f &pt);
+GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::Point2f &pt);
+
 GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::Size &sz);
 GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::Size &sz);
 
@@ -113,6 +136,27 @@ GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::Scalar &s);
 GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::Mat &m);
 GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::Mat &m);
 
+// FIXME: for GRunArgs serailization
+#if !defined(GAPI_STANDALONE)
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::UMat &);
+GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::UMat &);
+#endif // !defined(GAPI_STANDALONE)
+
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::RMat &r);
+GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::RMat &r);
+
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gapi::wip::IStreamSource::Ptr &);
+GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::gapi::wip::IStreamSource::Ptr &);
+
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::VectorRef &);
+GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::detail::VectorRef &);
+
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::OpaqueRef &);
+GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::detail::OpaqueRef &);
+
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::MediaFrame &);
+GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::MediaFrame &);
+
 // Generic STL types ////////////////////////////////////////////////////////////////
 template<typename K, typename V>
 IOStream& operator<< (IOStream& os, const std::map<K, V> &m) {
@@ -175,16 +219,146 @@ IIStream& operator>> (IIStream& is, std::vector<T> &ts) {
     return is;
 }
 
+// Generic: variant serialization
 namespace detail {
-    // Will be used along with default types if possible in specific cases (compile args, etc)
-    // Note: actual implementation is defined by user
-    template<typename T>
-    struct GAPI_EXPORTS S11N {
-        static void serialize(IOStream &, const T &) {}
-        static T deserialize(IIStream &) { T t; return t; }
-    };
+template<typename V>
+IOStream& put_v(IOStream&, const V&, std::size_t) {
+    GAPI_Assert(false && "variant>>: requested index is invalid");
+};
+template<typename V, typename X, typename... Xs>
+IOStream& put_v(IOStream& os, const V& v, std::size_t x) {
+    return (x == 0u)
+        ? os << cv::util::get<X>(v)
+        : put_v<V, Xs...>(os, v, x-1);
+}
+template<typename V>
+IIStream& get_v(IIStream&, V&, std::size_t, std::size_t) {
+    GAPI_Assert(false && "variant<<: requested index is invalid");
+}
+template<typename V, typename X, typename... Xs>
+IIStream& get_v(IIStream& is, V& v, std::size_t i, std::size_t gi) {
+    if (i == gi) {
+        X x{};
+        is >> x;
+        v = V{std::move(x)};
+        return is;
+    } else return get_v<V, Xs...>(is, v, i+1, gi);
+}
 } // namespace detail
+
+template<typename... Ts>
+IOStream& operator<< (IOStream& os, const cv::util::variant<Ts...> &v) {
+    os << static_cast<uint32_t>(v.index());
+    return detail::put_v<cv::util::variant<Ts...>, Ts...>(os, v, v.index());
+}
+template<typename... Ts>
+IIStream& operator>> (IIStream& is, cv::util::variant<Ts...> &v) {
+    int idx = -1;
+    is >> idx;
+    GAPI_Assert(idx >= 0 && idx < (int)sizeof...(Ts));
+    return detail::get_v<cv::util::variant<Ts...>, Ts...>(is, v, 0u, idx);
+}
+
+// FIXME: consider a better solution
+template<typename... Ts>
+void getRunArgByIdx (IIStream& is, cv::util::variant<Ts...> &v, uint32_t idx) {
+    is = detail::get_v<cv::util::variant<Ts...>, Ts...>(is, v, 0u, idx);
+}
 } // namespace s11n
+
+namespace detail
+{
+template<typename T> struct try_deserialize_comparg;
+
+template<> struct try_deserialize_comparg<std::tuple<>> {
+static cv::util::optional<GCompileArg> exec(const std::string&, cv::gapi::s11n::IIStream&) {
+        return { };
+    }
+};
+
+template<typename T, typename... Types>
+struct try_deserialize_comparg<std::tuple<T, Types...>> {
+static cv::util::optional<GCompileArg> exec(const std::string& tag, cv::gapi::s11n::IIStream& is) {
+    if (tag == cv::detail::CompileArgTag<T>::tag()) {
+        static_assert(cv::gapi::s11n::detail::has_S11N_spec<T>::value,
+            "cv::gapi::deserialize<GCompileArgs, Types...> expects Types to have S11N "
+            "specializations with deserialization callbacks!");
+        return cv::util::optional<GCompileArg>(
+            GCompileArg { cv::gapi::s11n::detail::S11N<T>::deserialize(is) });
+    }
+    return try_deserialize_comparg<std::tuple<Types...>>::exec(tag, is);
+}
+};
+
+template<typename T> struct deserialize_runarg;
+
+template<typename RMatAdapterType>
+struct deserialize_runarg {
+static GRunArg exec(cv::gapi::s11n::IIStream& is, uint32_t idx) {
+    if (idx == GRunArg::index_of<RMat>()) {
+        auto ptr = std::make_shared<RMatAdapterType>();
+        ptr->deserialize(is);
+        return GRunArg { RMat(std::move(ptr)) };
+    } else { // non-RMat arg - use default deserialization
+        GRunArg arg;
+        getRunArgByIdx(is, arg, idx);
+        return arg;
+    }
+}
+};
+
+template<typename... Types>
+inline cv::util::optional<GCompileArg> tryDeserializeCompArg(const std::string& tag,
+                                                             const std::vector<char>& sArg) {
+    std::unique_ptr<cv::gapi::s11n::IIStream> pArgIs = cv::gapi::s11n::detail::getInStream(sArg);
+    return try_deserialize_comparg<std::tuple<Types...>>::exec(tag, *pArgIs);
+}
+
+template<typename... Types>
+cv::GCompileArgs getCompileArgs(const std::vector<char> &sArgs) {
+    cv::GCompileArgs args;
+
+    std::unique_ptr<cv::gapi::s11n::IIStream> pIs = cv::gapi::s11n::detail::getInStream(sArgs);
+    cv::gapi::s11n::IIStream& is = *pIs;
+
+    uint32_t sz = 0;
+    is >> sz;
+    for (uint32_t i = 0; i < sz; ++i) {
+        std::string tag;
+        is >> tag;
+
+        std::vector<char> sArg;
+        is >> sArg;
+
+        cv::util::optional<GCompileArg> dArg =
+            cv::gapi::detail::tryDeserializeCompArg<Types...>(tag, sArg);
+
+        if (dArg.has_value())
+        {
+            args.push_back(dArg.value());
+        }
+    }
+
+    return args;
+}
+
+template<typename RMatAdapterType>
+cv::GRunArgs getRunArgsWithRMats(const std::vector<char> &p) {
+    std::unique_ptr<cv::gapi::s11n::IIStream> pIs = cv::gapi::s11n::detail::getInStream(p);
+    cv::gapi::s11n::IIStream& is = *pIs;
+    cv::GRunArgs args;
+
+    uint32_t sz = 0;
+    is >> sz;
+    for (uint32_t i = 0; i < sz; ++i) {
+        uint32_t idx = 0;
+        is >> idx;
+        args.push_back(cv::gapi::detail::deserialize_runarg<RMatAdapterType>::exec(is, idx));
+    }
+
+    return args;
+}
+} // namespace detail
 } // namespace gapi
 } // namespace cv
 
diff --git a/modules/gapi/include/opencv2/gapi/s11n/base.hpp b/modules/gapi/include/opencv2/gapi/s11n/base.hpp
new file mode 100644
index 0000000000..d9335ee9f7
--- /dev/null
+++ b/modules/gapi/include/opencv2/gapi/s11n/base.hpp
@@ -0,0 +1,46 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_S11N_BASE_HPP
+#define OPENCV_GAPI_S11N_BASE_HPP
+
+#include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/gapi/own/exports.hpp>
+
+namespace cv {
+namespace gapi {
+namespace s11n {
+struct IOStream;
+struct IIStream;
+
+namespace detail {
+
+struct NotImplemented {
+};
+
+// The default S11N for custom types is NotImplemented
+// Don't! sublass from NotImplemented if you actually implement S11N.
+template<typename T>
+struct S11N: public NotImplemented {
+    static void serialize(IOStream &, const T &) {
+        GAPI_Assert(false && "No serialization routine is provided!");
+    }
+    static T deserialize(IIStream &) {
+        GAPI_Assert(false && "No deserialization routine is provided!");
+    }
+};
+
+template<typename T> struct has_S11N_spec {
+    static constexpr bool value = !std::is_base_of<NotImplemented,
+                                        S11N<typename std::decay<T>::type>>::value;
+};
+
+} // namespace detail
+} // namespace s11n
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_S11N_BASE_HPP
diff --git a/modules/gapi/include/opencv2/gapi/streaming/cap.hpp b/modules/gapi/include/opencv2/gapi/streaming/cap.hpp
index faa555063a..aad6af618c 100644
--- a/modules/gapi/include/opencv2/gapi/streaming/cap.hpp
+++ b/modules/gapi/include/opencv2/gapi/streaming/cap.hpp
@@ -21,9 +21,11 @@
  * Note for developers: please don't put videoio dependency in G-API
  * because of this file.
  */
+#include <chrono>
 
 #include <opencv2/videoio.hpp>
 #include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/streaming/meta.hpp>
 
 namespace cv {
 namespace gapi {
@@ -55,6 +57,7 @@ protected:
     cv::VideoCapture cap;
     cv::Mat first;
     bool first_pulled = false;
+    int64_t counter = 0;
 
     void prep()
     {
@@ -80,19 +83,26 @@ protected:
             GAPI_Assert(!first.empty());
             first_pulled = true;
             data = first; // no need to clone here since it was cloned already
-            return true;
         }
-
-        if (!cap.isOpened()) return false;
-
-        cv::Mat frame;
-        if (!cap.read(frame))
+        else
         {
-            // end-of-stream happened
-            return false;
+            if (!cap.isOpened()) return false;
+
+            cv::Mat frame;
+            if (!cap.read(frame))
+            {
+                // end-of-stream happened
+                return false;
+            }
+            // Same reason to clone as in prep()
+            data = frame.clone();
         }
-        // Same reason to clone as in prep()
-        data = frame.clone();
+        // Tag data with seq_id/ts
+        const auto now = std::chrono::system_clock::now();
+        const auto dur = std::chrono::duration_cast<std::chrono::microseconds>
+            (now.time_since_epoch());
+        data.meta[cv::gapi::streaming::meta_tag::timestamp] = int64_t{dur.count()};
+        data.meta[cv::gapi::streaming::meta_tag::seq_id]    = int64_t{counter++};
         return true;
     }
 
@@ -103,6 +113,12 @@ protected:
     }
 };
 
+// NB: Overload for using from python
+GAPI_EXPORTS_W cv::Ptr<IStreamSource> inline make_capture_src(const std::string& path)
+{
+    return make_src<GCaptureSource>(path);
+}
+
 } // namespace wip
 } // namespace gapi
 } // namespace cv
diff --git a/modules/gapi/include/opencv2/gapi/streaming/desync.hpp b/modules/gapi/include/opencv2/gapi/streaming/desync.hpp
new file mode 100644
index 0000000000..86de279fe9
--- /dev/null
+++ b/modules/gapi/include/opencv2/gapi/streaming/desync.hpp
@@ -0,0 +1,84 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GSTREAMING_DESYNC_HPP
+#define OPENCV_GAPI_GSTREAMING_DESYNC_HPP
+
+#include <tuple>
+
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gcall.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+
+namespace cv {
+namespace gapi {
+namespace streaming {
+
+namespace detail {
+struct GDesync {
+    static const char *id() {
+        return "org.opencv.streaming.desync";
+    }
+
+    // An universal yield for desync.
+    // Yields output objects according to the input Types...
+    // Reuses gkernel machinery.
+    // FIXME: This function can be generic and declared in gkernel.hpp
+    //        (it is there already, but a part of GKernelType[M]
+    template<typename... R, int... IIs>
+    static std::tuple<R...> yield(cv::GCall &call, cv::detail::Seq<IIs...>) {
+        return std::make_tuple(cv::detail::Yield<R>::yield(call, IIs)...);
+    }
+};
+
+template<typename G>
+G desync(const G &g) {
+    cv::GKernel k{
+          GDesync::id()                                     // kernel id
+        , ""                                                // kernel tag
+        , [](const GMetaArgs &a, const GArgs &) {return a;} // outMeta callback
+        , {cv::detail::GTypeTraits<G>::shape}               // output Shape
+        , {cv::detail::GTypeTraits<G>::op_kind}             // input data kinds
+        , {cv::detail::GObtainCtor<G>::get()}               // output template ctors
+    };
+    cv::GCall call(std::move(k));
+    call.pass(g);
+    return std::get<0>(GDesync::yield<G>(call, cv::detail::MkSeq<1>::type()));
+}
+} // namespace detail
+
+/**
+ * @brief Starts a desynchronized branch in the graph.
+ *
+ * This operation takes a single G-API data object and returns a
+ * graph-level "duplicate" of this object.
+ *
+ * Operations which use this data object can be desynchronized
+ * from the rest of the graph.
+ *
+ * This operation has no effect when a GComputation is compiled with
+ * regular cv::GComputation::compile(), since cv::GCompiled objects
+ * always produce their full output vectors.
+ *
+ * This operation only makes sense when a GComputation is compiled in
+ * straming mode with cv::GComputation::compileStreaming(). If this
+ * operation is used and there are desynchronized outputs, the user
+ * should use a special version of cv::GStreamingCompiled::pull()
+ * which produces an array of cv::util::optional<> objects.
+ *
+ * @note This feature is highly experimental now and is currently
+ * limited to a single GMat argument only.
+ */
+GAPI_EXPORTS GMat desync(const GMat &g);
+
+} // namespace streaming
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_GSTREAMING_DESYNC_HPP
diff --git a/modules/gapi/include/opencv2/gapi/streaming/meta.hpp b/modules/gapi/include/opencv2/gapi/streaming/meta.hpp
new file mode 100644
index 0000000000..cbcfc3aa37
--- /dev/null
+++ b/modules/gapi/include/opencv2/gapi/streaming/meta.hpp
@@ -0,0 +1,79 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GSTREAMING_META_HPP
+#define OPENCV_GAPI_GSTREAMING_META_HPP
+
+#include <opencv2/gapi/gopaque.hpp>
+#include <opencv2/gapi/gcall.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
+
+namespace cv {
+namespace gapi {
+namespace streaming {
+
+// FIXME: the name is debatable
+namespace meta_tag {
+static constexpr const char * timestamp = "org.opencv.gapi.meta.timestamp";
+static constexpr const char * seq_id    = "org.opencv.gapi.meta.seq_id";
+} // namespace meta_tag
+
+namespace detail {
+struct GMeta {
+    static const char *id() {
+        return "org.opencv.streaming.meta";
+    }
+    // A universal yield for meta(), same as in GDesync
+    template<typename... R, int... IIs>
+    static std::tuple<R...> yield(cv::GCall &call, cv::detail::Seq<IIs...>) {
+        return std::make_tuple(cv::detail::Yield<R>::yield(call, IIs)...);
+    }
+    // Also a universal outMeta stub here
+    static GMetaArgs getOutMeta(const GMetaArgs &args, const GArgs &) {
+        return args;
+    }
+};
+} // namespace detail
+
+template<typename T, typename G>
+cv::GOpaque<T> meta(G g, const std::string &tag) {
+    using O = cv::GOpaque<T>;
+    cv::GKernel k{
+          detail::GMeta::id()                    // kernel id
+        , tag                                    // kernel tag. Use meta tag here
+        , &detail::GMeta::getOutMeta             // outMeta callback
+        , {cv::detail::GTypeTraits<O>::shape}    // output Shape
+        , {cv::detail::GTypeTraits<G>::op_kind}  // input data kinds
+        , {cv::detail::GObtainCtor<O>::get()}    // output template ctors
+    };
+    cv::GCall call(std::move(k));
+    call.pass(g);
+    return std::get<0>(detail::GMeta::yield<O>(call, cv::detail::MkSeq<1>::type()));
+}
+
+template<typename G>
+cv::GOpaque<int64_t> timestamp(G g) {
+    return meta<int64_t>(g, meta_tag::timestamp);
+}
+
+template<typename G>
+cv::GOpaque<int64_t> seq_id(G g) {
+    return meta<int64_t>(g, meta_tag::seq_id);
+}
+
+template<typename G>
+cv::GOpaque<int64_t> seqNo(G g) {
+    // Old name, compatibility only
+    return seq_id(g);
+}
+
+} // namespace streaming
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_GSTREAMING_META_HPP
diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp
index 702e8c4032..e25328e64f 100644
--- a/modules/gapi/misc/python/pyopencv_gapi.hpp
+++ b/modules/gapi/misc/python/pyopencv_gapi.hpp
@@ -3,7 +3,16 @@
 
 #ifdef HAVE_OPENCV_GAPI
 
+// NB: Python wrapper replaces :: with _ for classes
 using gapi_GKernelPackage = cv::gapi::GKernelPackage;
+using gapi_GNetPackage = cv::gapi::GNetPackage;
+using gapi_ie_PyParams = cv::gapi::ie::PyParams;
+using gapi_wip_IStreamSource_Ptr = cv::Ptr<cv::gapi::wip::IStreamSource>;
+
+// FIXME: Python wrapper generate code without namespace std,
+// so it cause error: "string wasn't declared"
+// WA: Create using
+using std::string;
 
 template<>
 bool pyopencv_to(PyObject* obj, std::vector<GCompileArg>& value, const ArgInfo& info)
@@ -38,8 +47,20 @@ static PyObject* from_grunarg(const GRunArg& v)
             const auto& s = util::get<cv::Scalar>(v);
             return pyopencv_from(s);
         }
-
+        case GRunArg::index_of<cv::detail::VectorRef>():
+        {
+            const auto& vref = util::get<cv::detail::VectorRef>(v);
+            switch (vref.getKind())
+            {
+                case cv::detail::OpaqueKind::CV_POINT2F:
+                    return pyopencv_from(vref.rref<cv::Point2f>());
+                default:
+                    PyErr_SetString(PyExc_TypeError, "Unsupported kind for GArray");
+                    return NULL;
+            }
+        }
         default:
+            PyErr_SetString(PyExc_TypeError, "Failed to unpack GRunArgs");
             return NULL;
     }
     GAPI_Assert(false);
@@ -56,7 +77,6 @@ PyObject* pyopencv_from(const GRunArgs& value)
         PyObject* item = from_grunarg(value[0]);
         if(!item)
         {
-            PyErr_SetString(PyExc_TypeError, "Failed to unpack GRunArgs");
             return NULL;
         }
         return item;
@@ -78,6 +98,18 @@ PyObject* pyopencv_from(const GRunArgs& value)
     return list;
 }
 
+template<>
+bool pyopencv_to(PyObject* obj, GMetaArgs& value, const ArgInfo& info)
+{
+    return pyopencv_to_generic_vec(obj, value, info);
+}
+
+template<>
+PyObject* pyopencv_from(const GMetaArgs& value)
+{
+    return pyopencv_from_generic_vec(value);
+}
+
 template <typename T>
 static PyObject* extract_proto_args(PyObject* py_args, PyObject* kw)
 {
@@ -96,9 +128,13 @@ static PyObject* extract_proto_args(PyObject* py_args, PyObject* kw)
         {
             args.emplace_back(reinterpret_cast<pyopencv_GMat_t*>(item)->v);
         }
+        else if (PyObject_TypeCheck(item, reinterpret_cast<PyTypeObject*>(pyopencv_GArrayP2f_TypePtr)))
+        {
+            args.emplace_back(reinterpret_cast<pyopencv_GArrayP2f_t*>(item)->v.strip());
+        }
         else
         {
-            PyErr_SetString(PyExc_TypeError, "cv.GIn() supports only cv.GMat and cv.GScalar");
+            PyErr_SetString(PyExc_TypeError, "Unsupported type for cv.GIn()/cv.GOut()");
             return NULL;
         }
     }
@@ -151,6 +187,19 @@ static PyObject* pyopencv_cv_gin(PyObject* , PyObject* py_args, PyObject* kw)
                 return NULL;
             }
         }
+        else if (PyObject_TypeCheck(item,
+                    reinterpret_cast<PyTypeObject*>(pyopencv_gapi_wip_IStreamSource_TypePtr)))
+        {
+            cv::gapi::wip::IStreamSource::Ptr source =
+                reinterpret_cast<pyopencv_gapi_wip_IStreamSource_t*>(item)->v;
+            args.emplace_back(source);
+        }
+        else
+        {
+            PyErr_SetString(PyExc_TypeError, "cv.gin can works only with cv::Mat,"
+                                             "cv::Scalar, cv::gapi::wip::IStreamSource::Ptr");
+            return NULL;
+        }
     }
 
     return pyopencv_from_generic_vec(args);
diff --git a/modules/gapi/misc/python/shadow_gapi.hpp b/modules/gapi/misc/python/shadow_gapi.hpp
index dab083def7..792314512c 100644
--- a/modules/gapi/misc/python/shadow_gapi.hpp
+++ b/modules/gapi/misc/python/shadow_gapi.hpp
@@ -3,13 +3,30 @@
 
 namespace cv
 {
-   GAPI_EXPORTS_W GCompileArgs compile_args(gapi::GKernelPackage pkg);
+   struct GAPI_EXPORTS_W_SIMPLE GCompileArg { };
 
+   GAPI_EXPORTS_W GCompileArgs compile_args(gapi::GKernelPackage pkg);
+   GAPI_EXPORTS_W GCompileArgs compile_args(gapi::GNetPackage pkg);
+
+   // NB: This classes doesn't exist in *.so
+   // HACK: Mark them as a class to force python wrapper generate code for this entities
    class GAPI_EXPORTS_W_SIMPLE GProtoArg { };
    class GAPI_EXPORTS_W_SIMPLE GProtoInputArgs { };
    class GAPI_EXPORTS_W_SIMPLE GProtoOutputArgs { };
-   class GAPI_EXPORTS_W_SIMPLE GRunArg {  };
+   class GAPI_EXPORTS_W_SIMPLE GRunArg { };
+   class GAPI_EXPORTS_W_SIMPLE GMetaArg { };
+
+   class GAPI_EXPORTS_W_SIMPLE GArrayP2f { };
 
    using GProtoInputArgs  = GIOProtoArgs<In_Tag>;
    using GProtoOutputArgs = GIOProtoArgs<Out_Tag>;
+
+   namespace gapi
+   {
+       GAPI_EXPORTS_W gapi::GNetPackage networks(const cv::gapi::ie::PyParams& params);
+       namespace wip
+       {
+           class GAPI_EXPORTS_W IStreamSource { };
+       } // namespace wip
+   } // namespace gapi
 } // namespace cv
diff --git a/modules/gapi/misc/python/test/test_gapi_core.py b/modules/gapi/misc/python/test/test_gapi_core.py
index cd85d9cadb..267037a78d 100644
--- a/modules/gapi/misc/python/test/test_gapi_core.py
+++ b/modules/gapi/misc/python/test/test_gapi_core.py
@@ -2,26 +2,27 @@
 
 import numpy as np
 import cv2 as cv
+import os
 
 from tests_common import NewOpenCVTests
 
 
 # Plaidml is an optional backend
 pkgs = [
-         cv.gapi.core.ocl.kernels(),
-         cv.gapi.core.cpu.kernels(),
-         cv.gapi.core.fluid.kernels()
-         # cv.gapi.core.plaidml.kernels()
-       ]
+          ('ocl'    , cv.gapi.core.ocl.kernels()),
+          ('cpu'    , cv.gapi.core.cpu.kernels()),
+          ('fluid'  , cv.gapi.core.fluid.kernels())
+          # ('plaidml', cv.gapi.core.plaidml.kernels())
+      ]
 
 
 class gapi_core_test(NewOpenCVTests):
 
     def test_add(self):
         # TODO: Extend to use any type and size here
-        sz = (1280, 720)
-        in1 = np.random.randint(0, 100, sz)
-        in2 = np.random.randint(0, 100, sz)
+        sz = (720, 1280)
+        in1 = np.full(sz, 100)
+        in2 = np.full(sz, 50)
 
         # OpenCV
         expected = cv.add(in1, in2)
@@ -32,17 +33,18 @@ class gapi_core_test(NewOpenCVTests):
         g_out = cv.gapi.add(g_in1, g_in2)
         comp = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out))
 
-        for pkg in pkgs:
+        for pkg_name, pkg in pkgs:
             actual = comp.apply(cv.gin(in1, in2), args=cv.compile_args(pkg))
             # Comparison
-            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF))
-            self.assertEqual(expected.dtype, actual.dtype)
+            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF),
+                             'Failed on ' + pkg_name + ' backend')
+            self.assertEqual(expected.dtype, actual.dtype, 'Failed on ' + pkg_name + ' backend')
 
 
     def test_add_uint8(self):
-        sz = (1280, 720)
-        in1 = np.random.randint(0, 100, sz).astype(np.uint8)
-        in2 = np.random.randint(0, 100, sz).astype(np.uint8)
+        sz = (720, 1280)
+        in1 = np.full(sz, 100, dtype=np.uint8)
+        in2 = np.full(sz, 50 , dtype=np.uint8)
 
         # OpenCV
         expected = cv.add(in1, in2)
@@ -53,16 +55,17 @@ class gapi_core_test(NewOpenCVTests):
         g_out = cv.gapi.add(g_in1, g_in2)
         comp = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out))
 
-        for pkg in pkgs:
+        for pkg_name, pkg in pkgs:
             actual = comp.apply(cv.gin(in1, in2), args=cv.compile_args(pkg))
             # Comparison
-            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF))
-            self.assertEqual(expected.dtype, actual.dtype)
+            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF),
+                             'Failed on ' + pkg_name + ' backend')
+            self.assertEqual(expected.dtype, actual.dtype, 'Failed on ' + pkg_name + ' backend')
 
 
     def test_mean(self):
-        sz = (1280, 720, 3)
-        in_mat = np.random.randint(0, 100, sz)
+        img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
+        in_mat = cv.imread(img_path)
 
         # OpenCV
         expected = cv.mean(in_mat)
@@ -72,15 +75,16 @@ class gapi_core_test(NewOpenCVTests):
         g_out = cv.gapi.mean(g_in)
         comp = cv.GComputation(g_in, g_out)
 
-        for pkg in pkgs:
+        for pkg_name, pkg in pkgs:
             actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg))
             # Comparison
-            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF))
+            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF),
+                             'Failed on ' + pkg_name + ' backend')
 
 
     def test_split3(self):
-        sz = (1280, 720, 3)
-        in_mat = np.random.randint(0, 100, sz)
+        img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
+        in_mat = cv.imread(img_path)
 
         # OpenCV
         expected = cv.split(in_mat)
@@ -90,19 +94,19 @@ class gapi_core_test(NewOpenCVTests):
         b, g, r = cv.gapi.split3(g_in)
         comp = cv.GComputation(cv.GIn(g_in), cv.GOut(b, g, r))
 
-        for pkg in pkgs:
+        for pkg_name, pkg in pkgs:
             actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg))
             # Comparison
             for e, a in zip(expected, actual):
-                self.assertEqual(0.0, cv.norm(e, a, cv.NORM_INF))
-                self.assertEqual(e.dtype, a.dtype)
+                self.assertEqual(0.0, cv.norm(e, a, cv.NORM_INF),
+                                 'Failed on ' + pkg_name + ' backend')
+                self.assertEqual(e.dtype, a.dtype, 'Failed on ' + pkg_name + ' backend')
 
 
     def test_threshold(self):
-        sz = (1280, 720)
-        in_mat = np.random.randint(0, 100, sz).astype(np.uint8)
-        rand_int = np.random.randint(0, 50)
-        maxv = (rand_int, rand_int)
+        img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
+        in_mat = cv.cvtColor(cv.imread(img_path), cv.COLOR_RGB2GRAY)
+        maxv = (30, 30)
 
         # OpenCV
         expected_thresh, expected_mat = cv.threshold(in_mat, maxv[0], maxv[0], cv.THRESH_TRIANGLE)
@@ -113,12 +117,15 @@ class gapi_core_test(NewOpenCVTests):
         mat, threshold = cv.gapi.threshold(g_in, g_sc, cv.THRESH_TRIANGLE)
         comp = cv.GComputation(cv.GIn(g_in, g_sc), cv.GOut(mat, threshold))
 
-        for pkg in pkgs:
+        for pkg_name, pkg in pkgs:
             actual_mat, actual_thresh = comp.apply(cv.gin(in_mat, maxv), args=cv.compile_args(pkg))
             # Comparison
-            self.assertEqual(0.0, cv.norm(expected_mat, actual_mat, cv.NORM_INF))
-            self.assertEqual(expected_mat.dtype, actual_mat.dtype)
-            self.assertEqual(expected_thresh, actual_thresh[0])
+            self.assertEqual(0.0, cv.norm(expected_mat, actual_mat, cv.NORM_INF),
+                             'Failed on ' + pkg_name + ' backend')
+            self.assertEqual(expected_mat.dtype, actual_mat.dtype,
+                             'Failed on ' + pkg_name + ' backend')
+            self.assertEqual(expected_thresh, actual_thresh[0],
+                             'Failed on ' + pkg_name + ' backend')
 
 
 if __name__ == '__main__':
diff --git a/modules/gapi/misc/python/test/test_gapi_imgproc.py b/modules/gapi/misc/python/test/test_gapi_imgproc.py
new file mode 100644
index 0000000000..dd1e397081
--- /dev/null
+++ b/modules/gapi/misc/python/test/test_gapi_imgproc.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+import numpy as np
+import cv2 as cv
+import os
+
+from tests_common import NewOpenCVTests
+
+
+# Plaidml is an optional backend
+pkgs = [
+           ('ocl'    , cv.gapi.core.ocl.kernels()),
+           ('cpu'    , cv.gapi.core.cpu.kernels()),
+           ('fluid'  , cv.gapi.core.fluid.kernels())
+           # ('plaidml', cv.gapi.core.plaidml.kernels())
+       ]
+
+
+class gapi_imgproc_test(NewOpenCVTests):
+
+    def test_good_features_to_track(self):
+        # TODO: Extend to use any type and size here
+        img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
+        in1 = cv.cvtColor(cv.imread(img_path), cv.COLOR_RGB2GRAY)
+
+        # NB: goodFeaturesToTrack configuration
+        max_corners         = 50
+        quality_lvl         = 0.01
+        min_distance        = 10
+        block_sz            = 3
+        use_harris_detector = True
+        k                   = 0.04
+        mask                = None
+
+        # OpenCV
+        expected = cv.goodFeaturesToTrack(in1, max_corners, quality_lvl,
+                                          min_distance, mask=mask,
+                                          blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k)
+
+        # G-API
+        g_in = cv.GMat()
+        g_out = cv.gapi.goodFeaturesToTrack(g_in, max_corners, quality_lvl,
+                                            min_distance, mask, block_sz, use_harris_detector, k)
+
+        comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out))
+
+        for pkg_name, pkg in pkgs:
+            actual = comp.apply(cv.gin(in1), args=cv.compile_args(pkg))
+            # NB: OpenCV & G-API have different output shapes:
+            # OpenCV - (num_points, 1, 2)
+            # G-API  - (num_points, 2)
+            # Comparison
+            self.assertEqual(0.0, cv.norm(expected.flatten(), actual.flatten(), cv.NORM_INF),
+                             'Failed on ' + pkg_name + ' backend')
+
+
+    def test_rgb2gray(self):
+        # TODO: Extend to use any type and size here
+        img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
+        in1 = cv.imread(img_path)
+
+        # OpenCV
+        expected = cv.cvtColor(in1, cv.COLOR_RGB2GRAY)
+
+        # G-API
+        g_in = cv.GMat()
+        g_out = cv.gapi.RGB2Gray(g_in)
+
+        comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out))
+
+        for pkg_name, pkg in pkgs:
+            actual = comp.apply(cv.gin(in1), args=cv.compile_args(pkg))
+            # Comparison
+            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF),
+                             'Failed on ' + pkg_name + ' backend')
+
+
+if __name__ == '__main__':
+    NewOpenCVTests.bootstrap()
diff --git a/modules/gapi/misc/python/test/test_gapi_infer.py b/modules/gapi/misc/python/test/test_gapi_infer.py
new file mode 100644
index 0000000000..a6fabf7253
--- /dev/null
+++ b/modules/gapi/misc/python/test/test_gapi_infer.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+import numpy as np
+import cv2 as cv
+import os
+
+from tests_common import NewOpenCVTests
+
+
+class test_gapi_infer(NewOpenCVTests):
+
+    def test_getAvailableTargets(self):
+        targets = cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_OPENCV)
+        self.assertTrue(cv.dnn.DNN_TARGET_CPU in targets)
+
+
+    def test_age_gender_infer(self):
+
+        # NB: Check IE
+        if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE):
+            return
+
+        root_path    = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
+        model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+        weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+        img_path     = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
+        device_id    = 'CPU'
+        img          = cv.resize(cv.imread(img_path), (62,62))
+
+        # OpenCV DNN
+        net = cv.dnn.readNetFromModelOptimizer(model_path, weights_path)
+        net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
+        net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
+
+        blob = cv.dnn.blobFromImage(img)
+
+        net.setInput(blob)
+        dnn_age, dnn_gender = net.forward(net.getUnconnectedOutLayersNames())
+
+        # OpenCV G-API
+        g_in   = cv.GMat()
+        inputs = cv.GInferInputs()
+        inputs.setInput('data', g_in)
+
+        outputs  = cv.gapi.infer("net", inputs)
+        age_g    = outputs.at("age_conv3")
+        gender_g = outputs.at("prob")
+
+        comp = cv.GComputation(cv.GIn(g_in), cv.GOut(age_g, gender_g))
+        pp = cv.gapi.ie.params("net", model_path, weights_path, device_id)
+
+        nets = cv.gapi.networks(pp)
+        args = cv.compile_args(nets)
+        gapi_age, gapi_gender = comp.apply(cv.gin(img), args=cv.compile_args(cv.gapi.networks(pp)))
+
+        # Check
+        self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF))
+        self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF))
+
+
+if __name__ == '__main__':
+    NewOpenCVTests.bootstrap()
diff --git a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py
index 8000496f79..53304fcb26 100644
--- a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py
+++ b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py
@@ -2,25 +2,26 @@
 
 import numpy as np
 import cv2 as cv
+import os
 
 from tests_common import NewOpenCVTests
 
 
 # Plaidml is an optional backend
 pkgs = [
-         cv.gapi.core.ocl.kernels(),
-         cv.gapi.core.cpu.kernels(),
-         cv.gapi.core.fluid.kernels()
-         # cv.gapi.core.plaidml.kernels()
-       ]
+         ('ocl'    , cv.gapi.core.ocl.kernels()),
+         ('cpu'    , cv.gapi.core.cpu.kernels()),
+         ('fluid'  , cv.gapi.core.fluid.kernels())
+         # ('plaidml', cv.gapi.core.plaidml.kernels())
+     ]
 
 
 class gapi_sample_pipelines(NewOpenCVTests):
 
     # NB: This test check multiple outputs for operation
     def test_mean_over_r(self):
-        sz = (100, 100, 3)
-        in_mat = np.random.randint(0, 100, sz).astype(np.uint8)
+        img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
+        in_mat = cv.imread(img_path)
 
         # # OpenCV
         _, _, r_ch = cv.split(in_mat)
@@ -32,10 +33,11 @@ class gapi_sample_pipelines(NewOpenCVTests):
         g_out = cv.gapi.mean(r)
         comp = cv.GComputation(g_in, g_out)
 
-        for pkg in pkgs:
+        for pkg_name, pkg in pkgs:
             actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg))
             # Comparison
-            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF))
+            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF),
+                             'Failed on ' + pkg_name + ' backend')
 
 
 if __name__ == '__main__':
diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py
new file mode 100644
index 0000000000..ae7ef5d338
--- /dev/null
+++ b/modules/gapi/misc/python/test/test_gapi_streaming.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+import numpy as np
+import cv2 as cv
+import os
+
+from tests_common import NewOpenCVTests
+
+class test_gapi_streaming(NewOpenCVTests):
+
+    def test_image_input(self):
+        sz = (1280, 720)
+        in_mat = np.random.randint(0, 100, sz).astype(np.uint8)
+
+        # OpenCV
+        expected = cv.medianBlur(in_mat, 3)
+
+        # G-API
+        g_in = cv.GMat()
+        g_out = cv.gapi.medianBlur(g_in, 3)
+        c = cv.GComputation(g_in, g_out)
+        ccomp = c.compileStreaming(cv.descr_of(cv.gin(in_mat)))
+        ccomp.setSource(cv.gin(in_mat))
+        ccomp.start()
+
+        _, actual = ccomp.pull()
+
+        # Assert
+        self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF))
+
+
+    def test_video_input(self):
+        ksize = 3
+        path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']])
+
+        # OpenCV
+        cap = cv.VideoCapture(path)
+
+        # G-API
+        g_in = cv.GMat()
+        g_out = cv.gapi.medianBlur(g_in, ksize)
+        c = cv.GComputation(g_in, g_out)
+
+        ccomp = c.compileStreaming()
+        source = cv.gapi.wip.make_capture_src(path)
+        ccomp.setSource(source)
+        ccomp.start()
+
+        # Assert
+        max_num_frames  = 10
+        proc_num_frames = 0
+        while cap.isOpened():
+            has_expected, expected = cap.read()
+            has_actual,   actual   = ccomp.pull()
+
+            self.assertEqual(has_expected, has_actual)
+
+            if not has_actual:
+                break
+
+            self.assertEqual(0.0, cv.norm(cv.medianBlur(expected, ksize), actual, cv.NORM_INF))
+
+            proc_num_frames += 1
+            if proc_num_frames == max_num_frames:
+                break;
+
+
+    def test_video_split3(self):
+        path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']])
+
+        # OpenCV
+        cap = cv.VideoCapture(path)
+
+        # G-API
+        g_in = cv.GMat()
+        b, g, r = cv.gapi.split3(g_in)
+        c = cv.GComputation(cv.GIn(g_in), cv.GOut(b, g, r))
+
+        ccomp = c.compileStreaming()
+        source = cv.gapi.wip.make_capture_src(path)
+        ccomp.setSource(source)
+        ccomp.start()
+
+        # Assert
+        max_num_frames  = 10
+        proc_num_frames = 0
+        while cap.isOpened():
+            has_expected, frame = cap.read()
+            has_actual,   actual   = ccomp.pull()
+
+            self.assertEqual(has_expected, has_actual)
+
+            if not has_actual:
+                break
+
+            expected = cv.split(frame)
+            for e, a in zip(expected, actual):
+                self.assertEqual(0.0, cv.norm(e, a, cv.NORM_INF))
+
+            proc_num_frames += 1
+            if proc_num_frames == max_num_frames:
+                break;
+
+
+    def test_video_add(self):
+        sz = (576, 768, 3)
+        in_mat = np.random.randint(0, 100, sz).astype(np.uint8)
+
+        path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']])
+
+        # OpenCV
+        cap = cv.VideoCapture(path)
+
+        # G-API
+        g_in1 = cv.GMat()
+        g_in2 = cv.GMat()
+        out = cv.gapi.add(g_in1, g_in2)
+        c = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(out))
+
+        ccomp = c.compileStreaming()
+        source = cv.gapi.wip.make_capture_src(path)
+        ccomp.setSource(cv.gin(source, in_mat))
+        ccomp.start()
+
+        # Assert
+        max_num_frames  = 10
+        proc_num_frames = 0
+        while cap.isOpened():
+            has_expected, frame  = cap.read()
+            has_actual,   actual = ccomp.pull()
+
+            self.assertEqual(has_expected, has_actual)
+
+            if not has_actual:
+                break
+
+            expected = cv.add(frame, in_mat)
+            self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF))
+
+            proc_num_frames += 1
+            if proc_num_frames == max_num_frames:
+                break;
+
+
+    def test_video_good_features_to_track(self):
+        path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']])
+
+        # NB: goodFeaturesToTrack configuration
+        max_corners         = 50
+        quality_lvl         = 0.01
+        min_distance        = 10
+        block_sz            = 3
+        use_harris_detector = True
+        k                   = 0.04
+        mask                = None
+
+        # OpenCV
+        cap = cv.VideoCapture(path)
+
+        # G-API
+        g_in = cv.GMat()
+        g_gray = cv.gapi.RGB2Gray(g_in)
+        g_out = cv.gapi.goodFeaturesToTrack(g_gray, max_corners, quality_lvl,
+                                            min_distance, mask, block_sz, use_harris_detector, k)
+
+        c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out))
+
+        ccomp = c.compileStreaming()
+        source = cv.gapi.wip.make_capture_src(path)
+        ccomp.setSource(source)
+        ccomp.start()
+
+        # Assert
+        max_num_frames  = 10
+        proc_num_frames = 0
+        while cap.isOpened():
+            has_expected, frame  = cap.read()
+            has_actual,   actual = ccomp.pull()
+
+            self.assertEqual(has_expected, has_actual)
+
+            if not has_actual:
+                break
+
+            # OpenCV
+            frame = cv.cvtColor(frame, cv.COLOR_RGB2GRAY)
+            expected = cv.goodFeaturesToTrack(frame, max_corners, quality_lvl,
+                                              min_distance, mask=mask,
+                                              blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k)
+            for e, a in zip(expected, actual):
+                # NB: OpenCV & G-API have different output shapes:
+                # OpenCV - (num_points, 1, 2)
+                # G-API  - (num_points, 2)
+                self.assertEqual(0.0, cv.norm(e.flatten(), a.flatten(), cv.NORM_INF))
+
+            proc_num_frames += 1
+            if proc_num_frames == max_num_frames:
+                break;
+
+
+if __name__ == '__main__':
+    NewOpenCVTests.bootstrap()
diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
index 91d08bba06..ac90181184 100644
--- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
+++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
@@ -2124,7 +2124,7 @@ PERF_TEST_P_(SizePerfTest, TestPerformance)
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
-    auto out = cv::gapi::size(in);
+    auto out = cv::gapi::streaming::size(in);
     cv::GComputation c(cv::GIn(in), cv::GOut(out));
     cv::Size out_sz;
 
@@ -2156,7 +2156,7 @@ PERF_TEST_P_(SizeRPerfTest, TestPerformance)
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GOpaque<cv::Rect> op_rect;
-    auto out = cv::gapi::size(op_rect);
+    auto out = cv::gapi::streaming::size(op_rect);
     cv::GComputation c(cv::GIn(op_rect), cv::GOut(out));
     cv::Size out_sz;
 
diff --git a/modules/gapi/samples/infer_ie_onnx_hybrid.cpp b/modules/gapi/samples/infer_ie_onnx_hybrid.cpp
new file mode 100644
index 0000000000..b8612a25ca
--- /dev/null
+++ b/modules/gapi/samples/infer_ie_onnx_hybrid.cpp
@@ -0,0 +1,195 @@
+#include <chrono>
+#include <iomanip>
+
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+
+#include "opencv2/gapi.hpp"
+#include "opencv2/gapi/core.hpp"
+#include "opencv2/gapi/imgproc.hpp"
+#include "opencv2/gapi/infer.hpp"
+#include "opencv2/gapi/infer/ie.hpp"
+#include "opencv2/gapi/infer/onnx.hpp"
+#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include "opencv2/gapi/streaming/cap.hpp"
+
+namespace {
+const std::string keys =
+    "{ h help |   | print this help message }"
+    "{ input  |   | Path to an input video file }"
+    "{ fdm    |   | IE face detection model IR }"
+    "{ fdw    |   | IE face detection model weights }"
+    "{ fdd    |   | IE face detection device }"
+    "{ emom   |   | ONNX emotions recognition model }"
+    "{ output |   | (Optional) Path to an output video file }"
+    ;
+} // namespace
+
+namespace custom {
+G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector");
+G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition");
+
+G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postproc") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
+    static void run(const cv::Mat &in_ssd_result,
+                    const cv::Mat &in_frame,
+                    std::vector<cv::Rect> &out_faces) {
+        const int MAX_PROPOSALS = 200;
+        const int OBJECT_SIZE   =   7;
+        const cv::Size upscale = in_frame.size();
+        const cv::Rect surface({0,0}, upscale);
+
+        out_faces.clear();
+
+        const float *data = in_ssd_result.ptr<float>();
+        for (int i = 0; i < MAX_PROPOSALS; i++) {
+            const float image_id   = data[i * OBJECT_SIZE + 0]; // batch id
+            const float confidence = data[i * OBJECT_SIZE + 2];
+            const float rc_left    = data[i * OBJECT_SIZE + 3];
+            const float rc_top     = data[i * OBJECT_SIZE + 4];
+            const float rc_right   = data[i * OBJECT_SIZE + 5];
+            const float rc_bottom  = data[i * OBJECT_SIZE + 6];
+
+            if (image_id < 0.f) {  // indicates end of detections
+                break;
+            }
+            if (confidence < 0.5f) {
+                continue;
+            }
+
+            cv::Rect rc;
+            rc.x      = static_cast<int>(rc_left   * upscale.width);
+            rc.y      = static_cast<int>(rc_top    * upscale.height);
+            rc.width  = static_cast<int>(rc_right  * upscale.width)  - rc.x;
+            rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
+            out_faces.push_back(rc & surface);
+        }
+    }
+};
+//! [Postproc]
+
+} // namespace custom
+
+namespace labels {
+// Labels as defined in
+// https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus
+//
+const std::string emotions[] = {
+    "neutral", "happiness", "surprise", "sadness", "anger", "disgust", "fear", "contempt"
+};
+namespace {
+template<typename Iter>
+std::vector<float> softmax(Iter begin, Iter end) {
+    std::vector<float> prob(end - begin, 0.f);
+    std::transform(begin, end, prob.begin(), [](float x) { return std::exp(x); });
+    float sum = std::accumulate(prob.begin(), prob.end(), 0.0f);
+    for (int i = 0; i < static_cast<int>(prob.size()); i++)
+        prob[i] /= sum;
+    return prob;
+}
+
+void DrawResults(cv::Mat &frame,
+                 const std::vector<cv::Rect> &faces,
+                 const std::vector<cv::Mat>  &out_emotions) {
+    CV_Assert(faces.size() == out_emotions.size());
+
+    for (auto it = faces.begin(); it != faces.end(); ++it) {
+        const auto idx = std::distance(faces.begin(), it);
+        const auto &rc = *it;
+
+        const float *emotions_data = out_emotions[idx].ptr<float>();
+        auto sm = softmax(emotions_data, emotions_data + 8);
+        const auto emo_id = std::max_element(sm.begin(), sm.end()) - sm.begin();
+
+        const int ATTRIB_OFFSET = 15;
+        cv::rectangle(frame, rc, {0, 255, 0},  4);
+        cv::putText(frame, emotions[emo_id],
+                    cv::Point(rc.x, rc.y - ATTRIB_OFFSET),
+                    cv::FONT_HERSHEY_COMPLEX_SMALL,
+                    1,
+                    cv::Scalar(0, 0, 255));
+
+        std::cout << emotions[emo_id] << " at " << rc << std::endl;
+    }
+}
+} // anonymous namespace
+} // namespace labels
+
+int main(int argc, char *argv[])
+{
+    cv::CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help")) {
+        cmd.printMessage();
+        return 0;
+    }
+    const std::string input = cmd.get<std::string>("input");
+    const std::string output = cmd.get<std::string>("output");
+
+    // OpenVINO FD parameters here
+    auto det_net = cv::gapi::ie::Params<custom::Faces> {
+        cmd.get<std::string>("fdm"),   // read cmd args: path to topology IR
+        cmd.get<std::string>("fdw"),   // read cmd args: path to weights
+        cmd.get<std::string>("fdd"),   // read cmd args: device specifier
+    };
+
+    // ONNX Emotions parameters here
+    auto emo_net = cv::gapi::onnx::Params<custom::Emotions> {
+        cmd.get<std::string>("emom"),   // read cmd args: path to the ONNX model
+    }.cfgNormalize({false}); // model accepts 0..255 range in FP32
+
+    auto kernels = cv::gapi::kernels<custom::OCVPostProc>();
+    auto networks = cv::gapi::networks(det_net, emo_net);
+
+    cv::GMat in;
+    cv::GMat bgr = cv::gapi::copy(in);
+    cv::GMat frame = cv::gapi::streaming::desync(bgr);
+    cv::GMat detections = cv::gapi::infer<custom::Faces>(frame);
+    cv::GArray<cv::Rect> faces = custom::PostProc::on(detections, frame);
+    cv::GArray<cv::GMat> emotions = cv::gapi::infer<custom::Emotions>(faces, frame);
+    auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(bgr, faces, emotions))
+        .compileStreaming(cv::compile_args(kernels, networks));
+
+    auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input);
+    pipeline.setSource(cv::gin(in_src));
+    pipeline.start();
+
+    cv::util::optional<cv::Mat>               out_frame;
+    cv::util::optional<std::vector<cv::Rect>> out_faces;
+    cv::util::optional<std::vector<cv::Mat>>  out_emotions;
+
+    cv::Mat               last_mat;
+    std::vector<cv::Rect> last_faces;
+    std::vector<cv::Mat>  last_emotions;
+
+    cv::VideoWriter writer;
+
+    while (pipeline.pull(cv::gout(out_frame, out_faces, out_emotions))) {
+        if (out_faces && out_emotions) {
+            last_faces = *out_faces;
+            last_emotions = *out_emotions;
+        }
+        if (out_frame) {
+            last_mat = *out_frame;
+            labels::DrawResults(last_mat, last_faces, last_emotions);
+
+            if (!output.empty()) {
+                if (!writer.isOpened()) {
+                    const auto sz = cv::Size{last_mat.cols, last_mat.rows};
+                    writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
+                    CV_Assert(writer.isOpened());
+                }
+                writer << last_mat;
+            }
+        }
+        if (!last_mat.empty()) {
+            cv::imshow("Out", last_mat);
+            cv::waitKey(1);
+        }
+    }
+    return 0;
+}
diff --git a/modules/gapi/samples/infer_single_roi.cpp b/modules/gapi/samples/infer_single_roi.cpp
new file mode 100644
index 0000000000..6054a3f4a6
--- /dev/null
+++ b/modules/gapi/samples/infer_single_roi.cpp
@@ -0,0 +1,264 @@
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+
+#include <opencv2/imgproc.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/infer.hpp>
+#include <opencv2/gapi/render.hpp>
+#include <opencv2/gapi/infer/ie.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <opencv2/gapi/streaming/cap.hpp>
+#include <opencv2/highgui.hpp>
+
+const std::string keys =
+    "{ h help |                              | Print this help message }"
+    "{ input  |                              | Path to the input video file }"
+    "{ facem  | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }"
+    "{ faced  | CPU                          | Target device for face detection model (e.g. CPU, GPU, VPU, ...) }"
+    "{ r roi  | -1,-1,-1,-1                  | Region of interest (ROI) to use for inference. Identified automatically when not set }";
+
+namespace {
+
+std::string weights_path(const std::string &model_path) {
+    const auto EXT_LEN = 4u;
+    const auto sz = model_path.size();
+    CV_Assert(sz > EXT_LEN);
+
+    auto ext = model_path.substr(sz - EXT_LEN);
+    std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){
+            return static_cast<unsigned char>(std::tolower(c));
+        });
+    CV_Assert(ext == ".xml");
+    return model_path.substr(0u, sz - EXT_LEN) + ".bin";
+}
+
+cv::util::optional<cv::Rect> parse_roi(const std::string &rc) {
+    cv::Rect rv;
+    char delim[3];
+
+    std::stringstream is(rc);
+    is >> rv.x >> delim[0] >> rv.y >> delim[1] >> rv.width >> delim[2] >> rv.height;
+    if (is.bad()) {
+        return cv::util::optional<cv::Rect>(); // empty value
+    }
+    const auto is_delim = [](char c) {
+        return c == ',';
+    };
+    if (!std::all_of(std::begin(delim), std::end(delim), is_delim)) {
+        return cv::util::optional<cv::Rect>(); // empty value
+
+    }
+    if (rv.x < 0 || rv.y < 0 || rv.width <= 0 || rv.height <= 0) {
+        return cv::util::optional<cv::Rect>(); // empty value
+    }
+    return cv::util::make_optional(std::move(rv));
+}
+
+} // namespace
+
+namespace custom {
+
+G_API_NET(FaceDetector,   <cv::GMat(cv::GMat)>, "face-detector");
+
+using GDetections = cv::GArray<cv::Rect>;
+using GRect       = cv::GOpaque<cv::Rect>;
+using GSize       = cv::GOpaque<cv::Size>;
+using GPrims      = cv::GArray<cv::gapi::wip::draw::Prim>;
+
+G_API_OP(GetSize, <GSize(cv::GMat)>, "sample.custom.get-size") {
+    static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_gopaque_desc();
+    }
+};
+
+G_API_OP(LocateROI, <GRect(cv::GMat)>, "sample.custom.locate-roi") {
+    static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_gopaque_desc();
+    }
+};
+
+G_API_OP(ParseSSD, <GDetections(cv::GMat, GRect, GSize)>, "sample.custom.parse-ssd") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+G_API_OP(BBoxes, <GPrims(GDetections, GRect)>, "sample.custom.b-boxes") {
+    static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GOpaqueDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+GAPI_OCV_KERNEL(OCVGetSize, GetSize) {
+    static void run(const cv::Mat &in, cv::Size &out) {
+        out = {in.cols, in.rows};
+    }
+};
+
+GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) {
+    // This is the place where we can run extra analytics
+    // on the input image frame and select the ROI (region
+    // of interest) where we want to detect our objects (or
+    // run any other inference).
+    //
+    // Currently it doesn't do anything intelligent,
+    // but only crops the input image to square (this is
+    // the most convenient aspect ratio for detectors to use)
+
+    static void run(const cv::Mat &in_mat, cv::Rect &out_rect) {
+
+        // Identify the central point & square size (- some padding)
+        const auto center = cv::Point{in_mat.cols/2, in_mat.rows/2};
+        auto sqside = std::min(in_mat.cols, in_mat.rows);
+
+        // Now build the central square ROI
+        out_rect = cv::Rect{ center.x - sqside/2
+                           , center.y - sqside/2
+                           , sqside
+                           , sqside
+                           };
+    }
+};
+
+GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
+    static void run(const cv::Mat &in_ssd_result,
+                    const cv::Rect &in_roi,
+                    const cv::Size &in_parent_size,
+                    std::vector<cv::Rect> &out_objects) {
+        const auto &in_ssd_dims = in_ssd_result.size;
+        CV_Assert(in_ssd_dims.dims() == 4u);
+
+        const int MAX_PROPOSALS = in_ssd_dims[2];
+        const int OBJECT_SIZE   = in_ssd_dims[3];
+        CV_Assert(OBJECT_SIZE  == 7); // fixed SSD object size
+
+        const cv::Size up_roi = in_roi.size();
+        const cv::Rect surface({0,0}, in_parent_size);
+
+        out_objects.clear();
+
+        const float *data = in_ssd_result.ptr<float>();
+        for (int i = 0; i < MAX_PROPOSALS; i++) {
+            const float image_id   = data[i * OBJECT_SIZE + 0];
+            const float label      = data[i * OBJECT_SIZE + 1];
+            const float confidence = data[i * OBJECT_SIZE + 2];
+            const float rc_left    = data[i * OBJECT_SIZE + 3];
+            const float rc_top     = data[i * OBJECT_SIZE + 4];
+            const float rc_right   = data[i * OBJECT_SIZE + 5];
+            const float rc_bottom  = data[i * OBJECT_SIZE + 6];
+            (void) label; // unused
+
+            if (image_id < 0.f) {
+                break;    // marks end-of-detections
+            }
+            if (confidence < 0.5f) {
+                continue; // skip objects with low confidence
+            }
+
+            // map relative coordinates to the original image scale
+            // taking the ROI into account
+            cv::Rect rc;
+            rc.x      = static_cast<int>(rc_left   * up_roi.width);
+            rc.y      = static_cast<int>(rc_top    * up_roi.height);
+            rc.width  = static_cast<int>(rc_right  * up_roi.width)  - rc.x;
+            rc.height = static_cast<int>(rc_bottom * up_roi.height) - rc.y;
+            rc.x += in_roi.x;
+            rc.y += in_roi.y;
+            out_objects.emplace_back(rc & surface);
+        }
+    }
+};
+
+GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) {
+    // This kernel converts the rectangles into G-API's
+    // rendering primitives
+    static void run(const std::vector<cv::Rect> &in_face_rcs,
+                    const             cv::Rect  &in_roi,
+                          std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
+        out_prims.clear();
+        const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) {
+            return cv::gapi::wip::draw::Rect(rc, clr, 2);
+        };
+        out_prims.emplace_back(cvt(in_roi, CV_RGB(0,255,255))); // cyan
+        for (auto &&rc : in_face_rcs) {
+            out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0)));   // green
+        }
+    }
+};
+
+} // namespace custom
+
+int main(int argc, char *argv[])
+{
+    cv::CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help")) {
+        cmd.printMessage();
+        return 0;
+    }
+
+    // Prepare parameters first
+    const std::string input = cmd.get<std::string>("input");
+    const auto opt_roi = parse_roi(cmd.get<std::string>("roi"));
+
+    const auto face_model_path = cmd.get<std::string>("facem");
+    auto face_net = cv::gapi::ie::Params<custom::FaceDetector> {
+        face_model_path,                 // path to topology IR
+        weights_path(face_model_path),   // path to weights
+        cmd.get<std::string>("faced"),   // device specifier
+    };
+    auto kernels = cv::gapi::kernels
+        < custom::OCVGetSize
+        , custom::OCVLocateROI
+        , custom::OCVParseSSD
+        , custom::OCVBBoxes>();
+    auto networks = cv::gapi::networks(face_net);
+
+    // Now build the graph. The graph structure may vary
+    // pased on the input parameters
+    cv::GStreamingCompiled pipeline;
+    auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
+
+    if (opt_roi.has_value()) {
+        // Use the value provided by user
+        std::cout << "Will run inference for static region "
+                  << opt_roi.value()
+                  << " only"
+                  << std::endl;
+        cv::GMat in;
+        cv::GOpaque<cv::Rect> in_roi;
+        auto blob = cv::gapi::infer<custom::FaceDetector>(in_roi, in);
+        auto  rcs = custom::ParseSSD::on(blob, in_roi, custom::GetSize::on(in));
+        auto  out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, in_roi));
+        pipeline  = cv::GComputation(cv::GIn(in, in_roi), cv::GOut(out))
+            .compileStreaming(cv::compile_args(kernels, networks));
+
+        // Since the ROI to detect is manual, make it part of the input vector
+        inputs.push_back(cv::gin(opt_roi.value())[0]);
+    } else {
+        // Automatically detect ROI to infer. Make it output parameter
+        std::cout << "ROI is not set or invalid. Locating it automatically"
+                  << std::endl;
+        cv::GMat in;
+        cv::GOpaque<cv::Rect> roi = custom::LocateROI::on(in);
+        auto blob = cv::gapi::infer<custom::FaceDetector>(roi, in);
+        auto  rcs = custom::ParseSSD::on(blob, roi, custom::GetSize::on(in));
+        auto  out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, roi));
+        pipeline  = cv::GComputation(cv::GIn(in), cv::GOut(out))
+            .compileStreaming(cv::compile_args(kernels, networks));
+    }
+
+    // The execution part
+    pipeline.setSource(std::move(inputs));
+    pipeline.start();
+
+    cv::Mat out;
+    while (pipeline.pull(cv::gout(out))) {
+        cv::imshow("Out", out);
+        cv::waitKey(1);
+    }
+    return 0;
+}
diff --git a/modules/gapi/samples/infer_ssd_onnx.cpp b/modules/gapi/samples/infer_ssd_onnx.cpp
new file mode 100644
index 0000000000..fc26ca1e36
--- /dev/null
+++ b/modules/gapi/samples/infer_ssd_onnx.cpp
@@ -0,0 +1,213 @@
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+
+#include <opencv2/imgproc.hpp>
+#include <opencv2/imgcodecs.hpp>
+
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/infer.hpp>
+#include <opencv2/gapi/render.hpp>
+#include <opencv2/gapi/infer/onnx.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <opencv2/gapi/streaming/cap.hpp>
+#include <opencv2/highgui.hpp>
+
+namespace custom {
+
+G_API_NET(ObjDetector,   <cv::GMat(cv::GMat)>, "object-detector");
+
+using GDetections = cv::GArray<cv::Rect>;
+using GSize       = cv::GOpaque<cv::Size>;
+using GPrims      = cv::GArray<cv::gapi::wip::draw::Prim>;
+
+G_API_OP(GetSize, <GSize(cv::GMat)>, "sample.custom.get-size") {
+    static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_gopaque_desc();
+    }
+};
+G_API_OP(ParseSSD, <GDetections(cv::GMat, GSize)>, "sample.custom.parse-ssd") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+G_API_OP(BBoxes, <GPrims(GDetections)>, "sample.custom.b-boxes") {
+    static cv::GArrayDesc outMeta(const cv::GArrayDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+GAPI_OCV_KERNEL(OCVGetSize, GetSize) {
+    static void run(const cv::Mat &in, cv::Size &out) {
+        out = {in.cols, in.rows};
+    }
+};
+GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
+    static void run(const cv::Mat &in_ssd_result,
+                    const cv::Size &in_parent_size,
+                    std::vector<cv::Rect> &out_objects) {
+        const auto &in_ssd_dims = in_ssd_result.size;
+        CV_Assert(in_ssd_dims.dims() == 4u);
+
+        const int MAX_PROPOSALS = in_ssd_dims[2];
+        const int OBJECT_SIZE   = in_ssd_dims[3];
+
+        CV_Assert(OBJECT_SIZE  == 7); // fixed SSD object size
+
+        const cv::Rect surface({0,0}, in_parent_size);
+
+        out_objects.clear();
+
+        const float *data = in_ssd_result.ptr<float>();
+        for (int i = 0; i < MAX_PROPOSALS; i++) {
+            const float image_id   = data[i * OBJECT_SIZE + 0];
+            const float label      = data[i * OBJECT_SIZE + 1];
+            const float confidence = data[i * OBJECT_SIZE + 2];
+            const float rc_left    = data[i * OBJECT_SIZE + 3];
+            const float rc_top     = data[i * OBJECT_SIZE + 4];
+            const float rc_right   = data[i * OBJECT_SIZE + 5];
+            const float rc_bottom  = data[i * OBJECT_SIZE + 6];
+            (void) label; // unused
+
+            if (image_id < 0.f) {
+                break;    // marks end-of-detections
+            }
+            if (confidence < 0.5f) {
+                continue; // skip objects with low confidence
+            }
+
+            // map relative coordinates to the original image scale
+            cv::Rect rc;
+            rc.x      = static_cast<int>(rc_left   * in_parent_size.width);
+            rc.y      = static_cast<int>(rc_top    * in_parent_size.height);
+            rc.width  = static_cast<int>(rc_right  * in_parent_size.width)  - rc.x;
+            rc.height = static_cast<int>(rc_bottom * in_parent_size.height) - rc.y;
+            out_objects.emplace_back(rc & surface);
+        }
+    }
+};
+GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) {
+    // This kernel converts the rectangles into G-API's
+    // rendering primitives
+    static void run(const std::vector<cv::Rect> &in_obj_rcs,
+                          std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
+        out_prims.clear();
+        const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) {
+            return cv::gapi::wip::draw::Rect(rc, clr, 2);
+        };
+        for (auto &&rc : in_obj_rcs) {
+            out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0)));   // green
+        }
+
+        std::cout << "Detections:";
+        for (auto &&rc : in_obj_rcs) std::cout << ' ' << rc;
+        std::cout << std::endl;
+    }
+};
+
+} // namespace custom
+
+namespace {
+void remap_ssd_ports(const std::unordered_map<std::string, cv::Mat> &onnx,
+                           std::unordered_map<std::string, cv::Mat> &gapi) {
+    // Assemble ONNX-processed outputs back to a single 1x1x200x7 blob
+    // to preserve compatibility with OpenVINO-based SSD pipeline
+    const cv::Mat &num_detections = onnx.at("num_detections:0");
+    const cv::Mat &detection_boxes = onnx.at("detection_boxes:0");
+    const cv::Mat &detection_scores = onnx.at("detection_scores:0");
+    const cv::Mat &detection_classes = onnx.at("detection_classes:0");
+
+    GAPI_Assert(num_detections.depth() == CV_32F);
+    GAPI_Assert(detection_boxes.depth() == CV_32F);
+    GAPI_Assert(detection_scores.depth() == CV_32F);
+    GAPI_Assert(detection_classes.depth() == CV_32F);
+
+    cv::Mat &ssd_output = gapi.at("detection_output");
+
+    const int num_objects = static_cast<int>(num_detections.ptr<float>()[0]);
+    const float *in_boxes = detection_boxes.ptr<float>();
+    const float *in_scores = detection_scores.ptr<float>();
+    const float *in_classes = detection_classes.ptr<float>();
+    float *ptr = ssd_output.ptr<float>();
+
+    for (int i = 0; i < num_objects; i++) {
+        ptr[0] = 0.f;               // "image_id"
+        ptr[1] = in_classes[i];     // "label"
+        ptr[2] = in_scores[i];      // "confidence"
+        ptr[3] = in_boxes[4*i + 1]; // left
+        ptr[4] = in_boxes[4*i + 0]; // top
+        ptr[5] = in_boxes[4*i + 3]; // right
+        ptr[6] = in_boxes[4*i + 2]; // bottom
+
+        ptr      += 7;
+        in_boxes += 4;
+    }
+    if (num_objects < ssd_output.size[2]-1) {
+        // put a -1 mark at the end of output blob if there is space left
+        ptr[0] = -1.f;
+    }
+}
+} // anonymous namespace
+
+
+const std::string keys =
+    "{ h help | | Print this help message }"
+    "{ input  | | Path to the input video file }"
+    "{ output | | (Optional) path to output video file }"
+    "{ detm   | | Path to an ONNX SSD object detection model (.onnx) }"
+    ;
+
+int main(int argc, char *argv[])
+{
+    cv::CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help")) {
+        cmd.printMessage();
+        return 0;
+    }
+
+    // Prepare parameters first
+    const std::string input = cmd.get<std::string>("input");
+    const std::string output = cmd.get<std::string>("output");
+    const auto obj_model_path = cmd.get<std::string>("detm");
+
+    auto obj_net = cv::gapi::onnx::Params<custom::ObjDetector>{obj_model_path}
+        .cfgOutputLayers({"detection_output"})
+        .cfgPostProc({cv::GMatDesc{CV_32F, {1,1,200,7}}}, remap_ssd_ports);
+    auto kernels = cv::gapi::kernels< custom::OCVGetSize
+                                    , custom::OCVParseSSD
+                                    , custom::OCVBBoxes>();
+    auto networks = cv::gapi::networks(obj_net);
+
+    // Now build the graph
+    cv::GMat in;
+    auto blob = cv::gapi::infer<custom::ObjDetector>(in);
+    auto  rcs = custom::ParseSSD::on(blob, custom::GetSize::on(in));
+    auto  out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs));
+    cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out))
+        .compileStreaming(cv::compile_args(kernels, networks));
+
+    auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
+
+    // The execution part
+    pipeline.setSource(std::move(inputs));
+    pipeline.start();
+
+    cv::VideoWriter writer;
+
+    cv::Mat outMat;
+    while (pipeline.pull(cv::gout(outMat))) {
+        cv::imshow("Out", outMat);
+        cv::waitKey(1);
+        if (!output.empty()) {
+            if (!writer.isOpened()) {
+                const auto sz = cv::Size{outMat.cols, outMat.rows};
+                writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
+                CV_Assert(writer.isOpened());
+            }
+            writer << outMat;
+        }
+    }
+    return 0;
+}
diff --git a/modules/gapi/samples/text_detection.cpp b/modules/gapi/samples/text_detection.cpp
new file mode 100644
index 0000000000..da1bab6ca9
--- /dev/null
+++ b/modules/gapi/samples/text_detection.cpp
@@ -0,0 +1,698 @@
+#include <algorithm>
+#include <cctype>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <numeric>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <opencv2/gapi/infer.hpp>
+#include <opencv2/gapi/infer/ie.hpp>
+#include <opencv2/gapi/streaming/cap.hpp>
+
+#include <opencv2/highgui.hpp>
+#include <opencv2/core/utility.hpp>
+
+const std::string about =
+    "This is an OpenCV-based version of OMZ Text Detection example";
+const std::string keys =
+    "{ h help |                           | Print this help message }"
+    "{ input  |                           | Path to the input video file }"
+    "{ tdm    | text-detection-0004.xml   | Path to OpenVINO text detection model (.xml), versions 0003 and 0004 work }"
+    "{ tdd    | CPU                       | Target device for the text detector (e.g. CPU, GPU, VPU, ...) }"
+    "{ trm    | text-recognition-0012.xml | Path to OpenVINO text recognition model (.xml) }"
+    "{ trd    | CPU                       | Target device for the text recognition (e.g. CPU, GPU, VPU, ...) }"
+    "{ bw     | 0                         | CTC beam search decoder bandwidth, if 0, a CTC greedy decoder is used}"
+    "{ sset   | 0123456789abcdefghijklmnopqrstuvwxyz | Symbol set to use with text recognition decoder. Shouldn't contain symbol #. }"
+    "{ thr    | 0.2                       | Text recognition confidence threshold}"
+    ;
+
+namespace {
+std::string weights_path(const std::string &model_path) {
+    const auto EXT_LEN = 4u;
+    const auto sz = model_path.size();
+    CV_Assert(sz > EXT_LEN);
+
+    const auto ext = model_path.substr(sz - EXT_LEN);
+    CV_Assert(cv::toLowerCase(ext) == ".xml");
+    return model_path.substr(0u, sz - EXT_LEN) + ".bin";
+}
+
+//////////////////////////////////////////////////////////////////////
+// Taken from OMZ samples as-is
+template<typename Iter>
+void softmax_and_choose(Iter begin, Iter end, int *argmax, float *prob) {
+    auto max_element = std::max_element(begin, end);
+    *argmax = static_cast<int>(std::distance(begin, max_element));
+    float max_val = *max_element;
+    double sum = 0;
+    for (auto i = begin; i != end; i++) {
+       sum += std::exp((*i) - max_val);
+    }
+    if (std::fabs(sum) < std::numeric_limits<double>::epsilon()) {
+        throw std::logic_error("sum can't be equal to zero");
+    }
+    *prob = 1.0f / static_cast<float>(sum);
+}
+
+template<typename Iter>
+std::vector<float> softmax(Iter begin, Iter end) {
+    std::vector<float> prob(end - begin, 0.f);
+    std::transform(begin, end, prob.begin(), [](float x) { return std::exp(x); });
+    float sum = std::accumulate(prob.begin(), prob.end(), 0.0f);
+    for (int i = 0; i < static_cast<int>(prob.size()); i++)
+        prob[i] /= sum;
+    return prob;
+}
+
+struct BeamElement {
+    std::vector<int> sentence;   //!< The sequence of chars that will be a result of the beam element
+
+    float prob_blank;            //!< The probability that the last char in CTC sequence
+                                 //!< for the beam element is the special blank char
+
+    float prob_not_blank;        //!< The probability that the last char in CTC sequence
+                                 //!< for the beam element is NOT the special blank char
+
+    float prob() const {         //!< The probability of the beam element.
+        return prob_blank + prob_not_blank;
+    }
+};
+
+std::string CTCGreedyDecoder(const float *data,
+                             const std::size_t sz,
+                             const std::string &alphabet,
+                             const char pad_symbol,
+                             double *conf) {
+    std::string res = "";
+    bool prev_pad = false;
+    *conf = 1;
+
+    const auto num_classes = alphabet.length();
+    for (auto it = data; it != (data+sz); it += num_classes) {
+        int argmax = 0;
+        float prob = 0.f;
+
+        softmax_and_choose(it, it + num_classes, &argmax, &prob);
+        (*conf) *= prob;
+
+        auto symbol = alphabet[argmax];
+        if (symbol != pad_symbol) {
+            if (res.empty() || prev_pad || (!res.empty() && symbol != res.back())) {
+                prev_pad = false;
+                res += symbol;
+            }
+        } else {
+            prev_pad = true;
+        }
+    }
+    return res;
+}
+
+std::string CTCBeamSearchDecoder(const float *data,
+                                 const std::size_t sz,
+                                 const std::string &alphabet,
+                                 double *conf,
+                                 int bandwidth) {
+    const auto num_classes = alphabet.length();
+
+    std::vector<BeamElement> curr;
+    std::vector<BeamElement> last;
+
+    last.push_back(BeamElement{std::vector<int>(), 1.f, 0.f});
+
+    for (auto it = data; it != (data+sz); it += num_classes) {
+        curr.clear();
+
+        std::vector<float> prob = softmax(it, it + num_classes);
+
+        for(const auto& candidate: last) {
+            float prob_not_blank = 0.f;
+            const std::vector<int>& candidate_sentence = candidate.sentence;
+            if (!candidate_sentence.empty()) {
+                int n = candidate_sentence.back();
+                prob_not_blank = candidate.prob_not_blank * prob[n];
+            }
+            float prob_blank = candidate.prob() * prob[num_classes - 1];
+
+            auto check_res = std::find_if(curr.begin(),
+                                          curr.end(),
+                                          [&candidate_sentence](const BeamElement& n) {
+                                              return n.sentence == candidate_sentence;
+                                          });
+            if (check_res == std::end(curr)) {
+                curr.push_back(BeamElement{candidate.sentence, prob_blank, prob_not_blank});
+            } else {
+                check_res->prob_not_blank  += prob_not_blank;
+                if (check_res->prob_blank != 0.f) {
+                    throw std::logic_error("Probability that the last char in CTC-sequence "
+                                           "is the special blank char must be zero here");
+                }
+                check_res->prob_blank = prob_blank;
+            }
+
+            for (int i = 0; i < static_cast<int>(num_classes) - 1; i++) {
+                auto extend = candidate_sentence;
+                extend.push_back(i);
+
+                if (candidate_sentence.size() > 0 && candidate.sentence.back() == i) {
+                    prob_not_blank = prob[i] * candidate.prob_blank;
+                } else {
+                    prob_not_blank = prob[i] * candidate.prob();
+                }
+
+                auto check_res2 = std::find_if(curr.begin(),
+                                              curr.end(),
+                                              [&extend](const BeamElement &n) {
+                                                  return n.sentence == extend;
+                                              });
+                if (check_res2 == std::end(curr)) {
+                    curr.push_back(BeamElement{extend, 0.f, prob_not_blank});
+                } else {
+                    check_res2->prob_not_blank += prob_not_blank;
+                }
+            }
+        }
+
+        sort(curr.begin(), curr.end(), [](const BeamElement &a, const BeamElement &b) -> bool {
+            return a.prob() > b.prob();
+        });
+
+        last.clear();
+        int num_to_copy = std::min(bandwidth, static_cast<int>(curr.size()));
+        for (int b = 0; b < num_to_copy; b++) {
+            last.push_back(curr[b]);
+        }
+    }
+
+    *conf = last[0].prob();
+    std::string res="";
+    for (const auto& idx: last[0].sentence) {
+        res += alphabet[idx];
+    }
+
+    return res;
+}
+
+//////////////////////////////////////////////////////////////////////
+} // anonymous namespace
+
+namespace custom {
+namespace {
+
+//////////////////////////////////////////////////////////////////////
+// Define networks for this sample
+using GMat2 = std::tuple<cv::GMat, cv::GMat>;
+G_API_NET(TextDetection,
+          <GMat2(cv::GMat)>,
+          "sample.custom.text_detect");
+
+G_API_NET(TextRecognition,
+          <cv::GMat(cv::GMat)>,
+          "sample.custom.text_recogn");
+
+// Define custom operations
+using GSize = cv::GOpaque<cv::Size>;
+using GRRects = cv::GArray<cv::RotatedRect>;
+G_API_OP(PostProcess,
+        <GRRects(cv::GMat,cv::GMat,GSize,float,float)>,
+        "sample.custom.text.post_proc") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &,
+                                  const cv::GMatDesc &,
+                                  const cv::GOpaqueDesc &,
+                                  float,
+                                  float) {
+        return cv::empty_array_desc();
+    }
+};
+
+using GMats = cv::GArray<cv::GMat>;
+G_API_OP(CropLabels,
+         <GMats(cv::GMat,GRRects,GSize)>,
+         "sample.custom.text.crop") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &,
+                                  const cv::GArrayDesc &,
+                                  const cv::GOpaqueDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+//////////////////////////////////////////////////////////////////////
+// Implement custom operations
+GAPI_OCV_KERNEL(OCVPostProcess, PostProcess) {
+    static void run(const cv::Mat &link,
+                    const cv::Mat &segm,
+                    const cv::Size &img_size,
+                    const float link_threshold,
+                    const float segm_threshold,
+                    std::vector<cv::RotatedRect> &out) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        const int kMinArea = 300;
+        const int kMinHeight = 10;
+
+        const float *link_data_pointer = link.ptr<float>();
+        std::vector<float> link_data(link_data_pointer, link_data_pointer + link.total());
+        link_data = transpose4d(link_data, dimsToShape(link.size), {0, 2, 3, 1});
+        softmax(link_data);
+        link_data = sliceAndGetSecondChannel(link_data);
+        std::vector<int> new_link_data_shape = {
+            link.size[0],
+            link.size[2],
+            link.size[3],
+            link.size[1]/2,
+        };
+
+        const float *cls_data_pointer = segm.ptr<float>();
+        std::vector<float> cls_data(cls_data_pointer, cls_data_pointer + segm.total());
+        cls_data = transpose4d(cls_data, dimsToShape(segm.size), {0, 2, 3, 1});
+        softmax(cls_data);
+        cls_data = sliceAndGetSecondChannel(cls_data);
+        std::vector<int> new_cls_data_shape = {
+            segm.size[0],
+            segm.size[2],
+            segm.size[3],
+            segm.size[1]/2,
+        };
+
+        out = maskToBoxes(decodeImageByJoin(cls_data, new_cls_data_shape,
+                                            link_data, new_link_data_shape,
+                                            segm_threshold, link_threshold),
+                          static_cast<float>(kMinArea),
+                          static_cast<float>(kMinHeight),
+                          img_size);
+    }
+
+    static std::vector<std::size_t> dimsToShape(const cv::MatSize &sz) {
+        const int n_dims = sz.dims();
+        std::vector<std::size_t> result;
+        result.reserve(n_dims);
+
+        // cv::MatSize is not iterable...
+        for (int i = 0; i < n_dims; i++) {
+            result.emplace_back(static_cast<std::size_t>(sz[i]));
+        }
+        return result;
+    }
+
+    static void softmax(std::vector<float> &rdata) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        const size_t last_dim = 2;
+        for (size_t i = 0 ; i < rdata.size(); i+=last_dim) {
+            float m = std::max(rdata[i], rdata[i+1]);
+            rdata[i] = std::exp(rdata[i] - m);
+            rdata[i + 1] = std::exp(rdata[i + 1] - m);
+            float s = rdata[i] + rdata[i + 1];
+            rdata[i] /= s;
+            rdata[i + 1] /= s;
+        }
+    }
+
+    static std::vector<float> transpose4d(const std::vector<float> &data,
+                                          const std::vector<size_t> &shape,
+                                          const std::vector<size_t> &axes) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        if (shape.size() != axes.size())
+            throw std::runtime_error("Shape and axes must have the same dimension.");
+
+        for (size_t a : axes) {
+            if (a >= shape.size())
+                throw std::runtime_error("Axis must be less than dimension of shape.");
+        }
+        size_t total_size = shape[0]*shape[1]*shape[2]*shape[3];
+        std::vector<size_t> steps {
+            shape[axes[1]]*shape[axes[2]]*shape[axes[3]],
+            shape[axes[2]]*shape[axes[3]],
+            shape[axes[3]],
+            1
+         };
+
+        size_t source_data_idx = 0;
+        std::vector<float> new_data(total_size, 0);
+        std::vector<size_t> ids(shape.size());
+        for (ids[0] = 0; ids[0] < shape[0]; ids[0]++) {
+            for (ids[1] = 0; ids[1] < shape[1]; ids[1]++) {
+                for (ids[2] = 0; ids[2] < shape[2]; ids[2]++) {
+                    for (ids[3]= 0; ids[3] < shape[3]; ids[3]++) {
+                        size_t new_data_idx = ids[axes[0]]*steps[0] + ids[axes[1]]*steps[1] +
+                            ids[axes[2]]*steps[2] + ids[axes[3]]*steps[3];
+                        new_data[new_data_idx] = data[source_data_idx++];
+                    }
+                }
+            }
+        }
+        return new_data;
+    }
+
+    static std::vector<float> sliceAndGetSecondChannel(const std::vector<float> &data) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        std::vector<float> new_data(data.size() / 2, 0);
+        for (size_t i = 0; i < data.size() / 2; i++) {
+            new_data[i] = data[2 * i + 1];
+        }
+        return new_data;
+    }
+
+    static void join(const int p1,
+                     const int p2,
+                     std::unordered_map<int, int> &group_mask) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        const int root1 = findRoot(p1, group_mask);
+        const int root2 = findRoot(p2, group_mask);
+        if (root1 != root2) {
+            group_mask[root1] = root2;
+        }
+    }
+
+    static cv::Mat decodeImageByJoin(const std::vector<float> &cls_data,
+                                     const std::vector<int>   &cls_data_shape,
+                                     const std::vector<float> &link_data,
+                                     const std::vector<int>   &link_data_shape,
+                                     float cls_conf_threshold,
+                                     float link_conf_threshold) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        const int h = cls_data_shape[1];
+        const int w = cls_data_shape[2];
+
+        std::vector<uchar> pixel_mask(h * w, 0);
+        std::unordered_map<int, int> group_mask;
+        std::vector<cv::Point> points;
+        for (int i = 0; i < static_cast<int>(pixel_mask.size()); i++) {
+            pixel_mask[i] = cls_data[i] >= cls_conf_threshold;
+            if (pixel_mask[i]) {
+                points.emplace_back(i % w, i / w);
+                group_mask[i] = -1;
+            }
+        }
+        std::vector<uchar> link_mask(link_data.size(), 0);
+        for (size_t i = 0; i < link_mask.size(); i++) {
+            link_mask[i] = link_data[i] >= link_conf_threshold;
+        }
+        size_t neighbours = size_t(link_data_shape[3]);
+        for (const auto &point : points) {
+            size_t neighbour = 0;
+            for (int ny = point.y - 1; ny <= point.y + 1; ny++) {
+                for (int nx = point.x - 1; nx <= point.x + 1; nx++) {
+                    if (nx == point.x && ny == point.y)
+                        continue;
+                    if (nx >= 0 && nx < w && ny >= 0 && ny < h) {
+                        uchar pixel_value = pixel_mask[size_t(ny) * size_t(w) + size_t(nx)];
+                        uchar link_value = link_mask[(size_t(point.y) * size_t(w) + size_t(point.x))
+                                                     *neighbours + neighbour];
+                        if (pixel_value && link_value) {
+                            join(point.x + point.y * w, nx + ny * w, group_mask);
+                        }
+                    }
+                    neighbour++;
+                }
+            }
+        }
+        return get_all(points, w, h, group_mask);
+    }
+
+    static cv::Mat get_all(const std::vector<cv::Point> &points,
+                           const int w,
+                           const int h,
+                           std::unordered_map<int, int> &group_mask) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        std::unordered_map<int, int> root_map;
+        cv::Mat mask(h, w, CV_32S, cv::Scalar(0));
+        for (const auto &point : points) {
+            int point_root = findRoot(point.x + point.y * w, group_mask);
+            if (root_map.find(point_root) == root_map.end()) {
+                root_map.emplace(point_root, static_cast<int>(root_map.size() + 1));
+            }
+            mask.at<int>(point.x + point.y * w) = root_map[point_root];
+        }
+        return mask;
+    }
+
+    static int findRoot(const int point,
+                        std::unordered_map<int, int> &group_mask) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        int root = point;
+        bool update_parent = false;
+        while (group_mask.at(root) != -1) {
+            root = group_mask.at(root);
+            update_parent = true;
+        }
+        if (update_parent) {
+            group_mask[point] = root;
+        }
+        return root;
+    }
+
+    static std::vector<cv::RotatedRect> maskToBoxes(const cv::Mat &mask,
+                                                    const float min_area,
+                                                    const float min_height,
+                                                    const cv::Size &image_size) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        std::vector<cv::RotatedRect> bboxes;
+        double min_val = 0.;
+        double max_val = 0.;
+        cv::minMaxLoc(mask, &min_val, &max_val);
+        int max_bbox_idx = static_cast<int>(max_val);
+        cv::Mat resized_mask;
+        cv::resize(mask, resized_mask, image_size, 0, 0, cv::INTER_NEAREST);
+
+        for (int i = 1; i <= max_bbox_idx; i++) {
+            cv::Mat bbox_mask = resized_mask == i;
+            std::vector<std::vector<cv::Point>> contours;
+
+            cv::findContours(bbox_mask, contours, cv::RETR_CCOMP, cv::CHAIN_APPROX_SIMPLE);
+            if (contours.empty())
+                continue;
+            cv::RotatedRect r = cv::minAreaRect(contours[0]);
+            if (std::min(r.size.width, r.size.height) < min_height)
+                continue;
+            if (r.size.area() < min_area)
+                continue;
+            bboxes.emplace_back(r);
+        }
+        return bboxes;
+    }
+}; // GAPI_OCV_KERNEL(PostProcess)
+
+GAPI_OCV_KERNEL(OCVCropLabels, CropLabels) {
+    static void run(const cv::Mat &image,
+                    const std::vector<cv::RotatedRect> &detections,
+                    const cv::Size &outSize,
+                    std::vector<cv::Mat> &out) {
+        out.clear();
+        out.reserve(detections.size());
+        cv::Mat crop(outSize, CV_8UC3, cv::Scalar(0));
+        cv::Mat gray(outSize, CV_8UC1, cv::Scalar(0));
+        std::vector<int> blob_shape = {1,1,outSize.height,outSize.width};
+
+        for (auto &&rr : detections) {
+            std::vector<cv::Point2f> points(4);
+            rr.points(points.data());
+
+            const auto top_left_point_idx = topLeftPointIdx(points);
+            cv::Point2f point0 = points[static_cast<size_t>(top_left_point_idx)];
+            cv::Point2f point1 = points[(top_left_point_idx + 1) % 4];
+            cv::Point2f point2 = points[(top_left_point_idx + 2) % 4];
+
+            std::vector<cv::Point2f> from{point0, point1, point2};
+            std::vector<cv::Point2f> to{
+                cv::Point2f(0.0f, 0.0f),
+                cv::Point2f(static_cast<float>(outSize.width-1), 0.0f),
+                cv::Point2f(static_cast<float>(outSize.width-1),
+                            static_cast<float>(outSize.height-1))
+            };
+            cv::Mat M = cv::getAffineTransform(from, to);
+            cv::warpAffine(image, crop, M, outSize);
+            cv::cvtColor(crop, gray, cv::COLOR_BGR2GRAY);
+
+            cv::Mat blob;
+            gray.convertTo(blob, CV_32F);
+            out.push_back(blob.reshape(1, blob_shape)); // pass as 1,1,H,W instead of H,W
+        }
+    }
+
+    static int topLeftPointIdx(const std::vector<cv::Point2f> &points) {
+        // NOTE: Taken from the OMZ text detection sample almost as-is
+        cv::Point2f most_left(std::numeric_limits<float>::max(),
+                              std::numeric_limits<float>::max());
+        cv::Point2f almost_most_left(std::numeric_limits<float>::max(),
+                                     std::numeric_limits<float>::max());
+        int most_left_idx = -1;
+        int almost_most_left_idx = -1;
+
+        for (size_t i = 0; i < points.size() ; i++) {
+            if (most_left.x > points[i].x) {
+                if (most_left.x < std::numeric_limits<float>::max()) {
+                    almost_most_left = most_left;
+                    almost_most_left_idx = most_left_idx;
+                }
+                most_left = points[i];
+                most_left_idx = static_cast<int>(i);
+            }
+            if (almost_most_left.x > points[i].x && points[i] != most_left) {
+                almost_most_left = points[i];
+                almost_most_left_idx = static_cast<int>(i);
+            }
+        }
+
+        if (almost_most_left.y < most_left.y) {
+            most_left = almost_most_left;
+            most_left_idx = almost_most_left_idx;
+        }
+        return most_left_idx;
+    }
+
+}; // GAPI_OCV_KERNEL(CropLabels)
+
+} // anonymous namespace
+} // namespace custom
+
+namespace vis {
+namespace {
+
+void drawRotatedRect(cv::Mat &m, const cv::RotatedRect &rc) {
+    std::vector<cv::Point2f> tmp_points(5);
+    rc.points(tmp_points.data());
+    tmp_points[4] = tmp_points[0];
+    auto prev = tmp_points.begin(), it = prev+1;
+    for (; it != tmp_points.end(); ++it) {
+        cv::line(m, *prev, *it, cv::Scalar(50, 205, 50), 2);
+        prev = it;
+    }
+}
+
+void drawText(cv::Mat &m, const cv::RotatedRect &rc, const std::string &str) {
+    const int    fface   = cv::FONT_HERSHEY_SIMPLEX;
+    const double scale   = 0.7;
+    const int    thick   = 1;
+          int    base    = 0;
+    const auto text_size = cv::getTextSize(str, fface, scale, thick, &base);
+
+    std::vector<cv::Point2f> tmp_points(4);
+    rc.points(tmp_points.data());
+    const auto tl_point_idx = custom::OCVCropLabels::topLeftPointIdx(tmp_points);
+    cv::Point text_pos = tmp_points[tl_point_idx];
+    text_pos.x = std::max(0, text_pos.x);
+    text_pos.y = std::max(text_size.height, text_pos.y);
+
+    cv::rectangle(m,
+                  text_pos + cv::Point{0, base},
+                  text_pos + cv::Point{text_size.width, -text_size.height},
+                  CV_RGB(50, 205, 50),
+                  cv::FILLED);
+    const auto white = CV_RGB(255, 255, 255);
+    cv::putText(m, str, text_pos, fface, scale, white, thick, 8);
+}
+
+} // anonymous namespace
+} // namespace vis
+
+int main(int argc, char *argv[])
+{
+    cv::CommandLineParser cmd(argc, argv, keys);
+    cmd.about(about);
+    if (cmd.has("help")) {
+        cmd.printMessage();
+        return 0;
+    }
+    const auto input_file_name = cmd.get<std::string>("input");
+    const auto tdet_model_path = cmd.get<std::string>("tdm");
+    const auto trec_model_path = cmd.get<std::string>("trm");
+    const auto tdet_target_dev = cmd.get<std::string>("tdd");
+    const auto trec_target_dev = cmd.get<std::string>("trd");
+    const auto ctc_beam_dec_bw = cmd.get<int>("bw");
+    const auto dec_conf_thresh = cmd.get<double>("thr");
+
+    const auto pad_symbol      = '#';
+    const auto symbol_set      = cmd.get<std::string>("sset") + pad_symbol;
+
+    cv::GMat in;
+    cv::GOpaque<cv::Size> in_rec_sz;
+    cv::GMat link, segm;
+    std::tie(link, segm) = cv::gapi::infer<custom::TextDetection>(in);
+    cv::GOpaque<cv::Size> size = cv::gapi::streaming::size(in);
+    cv::GArray<cv::RotatedRect> rrs = custom::PostProcess::on(link, segm, size, 0.8f, 0.8f);
+    cv::GArray<cv::GMat> labels = custom::CropLabels::on(in, rrs, in_rec_sz);
+    cv::GArray<cv::GMat> text = cv::gapi::infer2<custom::TextRecognition>(in, labels);
+
+    cv::GComputation graph(cv::GIn(in, in_rec_sz),
+                           cv::GOut(cv::gapi::copy(in), rrs, text));
+
+    // Text detection network
+    auto tdet_net = cv::gapi::ie::Params<custom::TextDetection> {
+        tdet_model_path,                // path to topology IR
+        weights_path(tdet_model_path),  // path to weights
+        tdet_target_dev,                // device specifier
+    }.cfgOutputLayers({"model/link_logits_/add", "model/segm_logits/add"});
+
+    auto trec_net = cv::gapi::ie::Params<custom::TextRecognition> {
+        trec_model_path,                // path to topology IR
+        weights_path(trec_model_path),  // path to weights
+        trec_target_dev,                // device specifier
+    };
+    auto networks = cv::gapi::networks(tdet_net, trec_net);
+
+    auto kernels = cv::gapi::kernels< custom::OCVPostProcess
+                                    , custom::OCVCropLabels
+                                    >();
+    auto pipeline = graph.compileStreaming(cv::compile_args(kernels, networks));
+
+    std::cout << "Reading " << input_file_name << std::endl;
+
+    // Input stream
+    auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input_file_name);
+
+    // Text recognition input size (also an input parameter to the graph)
+    auto in_rsz = cv::Size{ 120, 32 };
+
+    // Set the pipeline source & start the pipeline
+    pipeline.setSource(cv::gin(in_src, in_rsz));
+    pipeline.start();
+
+    // Declare the output data & run the processing loop
+    cv::TickMeter tm;
+    cv::Mat image;
+    std::vector<cv::RotatedRect> out_rcs;
+    std::vector<cv::Mat> out_text;
+
+    tm.start();
+    int frames = 0;
+    while (pipeline.pull(cv::gout(image, out_rcs, out_text))) {
+        frames++;
+
+        CV_Assert(out_rcs.size() == out_text.size());
+        const auto num_labels = out_rcs.size();
+
+        std::vector<cv::Point2f> tmp_points(4);
+        for (std::size_t l = 0; l < num_labels; l++) {
+            // Decode the recognized text in the rectangle
+            const auto &blob = out_text[l];
+            const float *data = blob.ptr<float>();
+            const auto sz = blob.total();
+            double conf = 1.0;
+            const std::string res = ctc_beam_dec_bw == 0
+                ? CTCGreedyDecoder(data, sz, symbol_set, pad_symbol, &conf)
+                : CTCBeamSearchDecoder(data, sz, symbol_set, &conf, ctc_beam_dec_bw);
+
+            // Draw a bounding box for this rotated rectangle
+            const auto &rc = out_rcs[l];
+            vis::drawRotatedRect(image, rc);
+
+            // Draw text, if decoded
+            if (conf >= dec_conf_thresh) {
+                vis::drawText(image, rc, res);
+            }
+        }
+        tm.stop();
+        cv::imshow("Out", image);
+        cv::waitKey(1);
+        tm.start();
+    }
+    tm.stop();
+    std::cout << "Processed " << frames << " frames"
+              << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
+    return 0;
+}
diff --git a/modules/gapi/src/api/gbackend.cpp b/modules/gapi/src/api/gbackend.cpp
index 600e5cc84d..fd4a5eb38b 100644
--- a/modules/gapi/src/api/gbackend.cpp
+++ b/modules/gapi/src/api/gbackend.cpp
@@ -67,6 +67,21 @@ cv::gapi::GKernelPackage cv::gapi::GBackend::Priv::auxiliaryKernels() const
     return {};
 }
 
+bool cv::gapi::GBackend::Priv::controlsMerge() const
+{
+    return false;
+}
+
+bool cv::gapi::GBackend::Priv::allowsMerge(const cv::gimpl::GIslandModel::Graph &,
+                                           const ade::NodeHandle &,
+                                           const ade::NodeHandle &,
+                                           const ade::NodeHandle &) const
+{
+    GAPI_Assert(controlsMerge());
+    return true;
+}
+
+
 // GBackend public implementation //////////////////////////////////////////////
 cv::gapi::GBackend::GBackend()
 {
@@ -128,6 +143,14 @@ void bindInArg(Mag& mag, const RcDesc &rc, const GRunArg &arg, HandleRMat handle
         if (handleRMat == HandleRMat::SKIP) return;
         GAPI_Assert(arg.index() == GRunArg::index_of<cv::RMat>());
         bindRMat(mag, rc, util::get<cv::RMat>(arg), RMat::Access::R);
+
+        // FIXME: Here meta may^WWILL be copied multiple times!
+        // Replace it is reference-counted object?
+        mag.meta<cv::RMat>()[rc.id] = arg.meta;
+        mag.meta<cv::Mat>()[rc.id] = arg.meta;
+#if !defined(GAPI_STANDALONE)
+        mag.meta<cv::UMat>()[rc.id] = arg.meta;
+#endif
         break;
     }
 
@@ -139,19 +162,23 @@ void bindInArg(Mag& mag, const RcDesc &rc, const GRunArg &arg, HandleRMat handle
         case GRunArg::index_of<cv::Scalar>() : mag_scalar = util::get<cv::Scalar>(arg);    break;
         default: util::throw_error(std::logic_error("content type of the runtime argument does not match to resource description ?"));
         }
+        mag.meta<cv::Scalar>()[rc.id] = arg.meta;
         break;
     }
 
     case GShape::GARRAY:
-        mag.template slot<cv::detail::VectorRef>()[rc.id] = util::get<cv::detail::VectorRef>(arg);
+        mag.slot<cv::detail::VectorRef>()[rc.id] = util::get<cv::detail::VectorRef>(arg);
+        mag.meta<cv::detail::VectorRef>()[rc.id] = arg.meta;
         break;
 
     case GShape::GOPAQUE:
-        mag.template slot<cv::detail::OpaqueRef>()[rc.id] = util::get<cv::detail::OpaqueRef>(arg);
+        mag.slot<cv::detail::OpaqueRef>()[rc.id] = util::get<cv::detail::OpaqueRef>(arg);
+        mag.meta<cv::detail::OpaqueRef>()[rc.id] = arg.meta;
         break;
 
     case GShape::GFRAME:
-        mag.template slot<cv::MediaFrame>()[rc.id] = util::get<cv::MediaFrame>(arg);
+        mag.slot<cv::MediaFrame>()[rc.id] = util::get<cv::MediaFrame>(arg);
+        mag.meta<cv::MediaFrame>()[rc.id] = arg.meta;
         break;
 
     default:
@@ -235,13 +262,23 @@ cv::GRunArg getArg(const Mag& mag, const RcDesc &ref)
     // Wrap associated CPU object (either host or an internal one)
     switch (ref.shape)
     {
-    case GShape::GMAT:    return GRunArg(mag.template slot<cv::RMat>().at(ref.id));
-    case GShape::GSCALAR: return GRunArg(mag.template slot<cv::Scalar>().at(ref.id));
+    case GShape::GMAT:
+        return GRunArg(mag.slot<cv::RMat>().at(ref.id),
+                       mag.meta<cv::RMat>().at(ref.id));
+    case GShape::GSCALAR:
+        return GRunArg(mag.slot<cv::Scalar>().at(ref.id),
+                       mag.meta<cv::Scalar>().at(ref.id));
     // Note: .at() is intentional for GArray and GOpaque as objects MUST be already there
     //   (and constructed by either bindIn/Out or resetInternal)
-    case GShape::GARRAY:  return GRunArg(mag.template slot<cv::detail::VectorRef>().at(ref.id));
-    case GShape::GOPAQUE: return GRunArg(mag.template slot<cv::detail::OpaqueRef>().at(ref.id));
-    case GShape::GFRAME:  return GRunArg(mag.template slot<cv::MediaFrame>().at(ref.id));
+    case GShape::GARRAY:
+        return GRunArg(mag.slot<cv::detail::VectorRef>().at(ref.id),
+                       mag.meta<cv::detail::VectorRef>().at(ref.id));
+    case GShape::GOPAQUE:
+        return GRunArg(mag.slot<cv::detail::OpaqueRef>().at(ref.id),
+                       mag.meta<cv::detail::OpaqueRef>().at(ref.id));
+    case GShape::GFRAME:
+        return GRunArg(mag.slot<cv::MediaFrame>().at(ref.id),
+                       mag.meta<cv::MediaFrame>().at(ref.id));
     default:
         util::throw_error(std::logic_error("Unsupported GShape type"));
         break;
diff --git a/modules/gapi/src/api/gbackend_priv.hpp b/modules/gapi/src/api/gbackend_priv.hpp
index 13f39acc86..45237514a5 100644
--- a/modules/gapi/src/api/gbackend_priv.hpp
+++ b/modules/gapi/src/api/gbackend_priv.hpp
@@ -19,7 +19,7 @@
 #include "opencv2/gapi/gkernel.hpp"
 
 #include "compiler/gmodel.hpp"
-
+#include "compiler/gislandmodel.hpp"
 
 namespace cv
 {
@@ -68,6 +68,22 @@ public:
 
     virtual cv::gapi::GKernelPackage auxiliaryKernels() const;
 
+    // Ask backend if it has a custom control over island fusion process
+    // This method is quite redundant but there's nothing better fits
+    // the current fusion process. By default, [existing] backends don't
+    // control the merge.
+    // FIXME: Refactor to a single entity?
+    virtual bool controlsMerge() const;
+
+    // Ask backend if it is ok to merge these two islands connected
+    // via a data slot. By default, [existing] backends allow to merge everything.
+    // FIXME: Refactor to a single entity?
+    // FIXME: Strip down the type details form graph? (make it ade::Graph?)
+    virtual bool allowsMerge(const cv::gimpl::GIslandModel::Graph &g,
+                             const ade::NodeHandle &a_nh,
+                             const ade::NodeHandle &slot_nh,
+                             const ade::NodeHandle &b_nh) const;
+
     virtual ~Priv() = default;
 };
 
diff --git a/modules/gapi/src/api/gcall.cpp b/modules/gapi/src/api/gcall.cpp
index 6f5f65bbfd..6a2121bd36 100644
--- a/modules/gapi/src/api/gcall.cpp
+++ b/modules/gapi/src/api/gcall.cpp
@@ -78,3 +78,13 @@ const cv::GCall::Priv& cv::GCall::priv() const
 {
     return *m_priv;
 }
+
+cv::GKernel& cv::GCall::kernel()
+{
+    return m_priv->m_k;
+}
+
+cv::util::any& cv::GCall::params()
+{
+    return m_priv->m_params;
+}
diff --git a/modules/gapi/src/api/gcall_priv.hpp b/modules/gapi/src/api/gcall_priv.hpp
index edc2c225dc..b142432c78 100644
--- a/modules/gapi/src/api/gcall_priv.hpp
+++ b/modules/gapi/src/api/gcall_priv.hpp
@@ -42,10 +42,11 @@ class GCall::Priv
 {
 public:
     std::vector<GArg> m_args;
-    const GKernel     m_k;
+    GKernel     m_k;
 
     // TODO: Rename to "constructionNode" or smt to reflect its lifetime
     GNode             m_node;
+    cv::util::any     m_params;
 
     explicit Priv(const GKernel &k);
 };
diff --git a/modules/gapi/src/api/gcomputation.cpp b/modules/gapi/src/api/gcomputation.cpp
index 9ff0273b40..5668cddc93 100644
--- a/modules/gapi/src/api/gcomputation.cpp
+++ b/modules/gapi/src/api/gcomputation.cpp
@@ -9,6 +9,7 @@
 #include <algorithm> // remove_if
 #include <cctype>    // isspace (non-locale version)
 #include <ade/util/algorithm.hpp>
+#include <ade/util/zip_range.hpp>   // util::indexed
 
 #include "logger.hpp" // GAPI_LOG
 
@@ -21,6 +22,7 @@
 
 #include "compiler/gmodelbuilder.hpp"
 #include "compiler/gcompiler.hpp"
+#include "compiler/gcompiled_priv.hpp"
 
 // cv::GComputation private implementation /////////////////////////////////////
 // <none>
@@ -174,28 +176,42 @@ cv::GRunArgs cv::GComputation::apply(GRunArgs &&ins, GCompileArgs &&args)
 {
     recompile(descr_of(ins), std::move(args));
 
-    const auto& out_metas = m_priv->m_lastCompiled.outMetas();
+    const auto& out_info = m_priv->m_lastCompiled.priv().outInfo();
+
     GRunArgs run_args;
     GRunArgsP outs;
-    run_args.reserve(out_metas.size());
-    outs.reserve(out_metas.size());
+    run_args.reserve(out_info.size());
+    outs.reserve(out_info.size());
 
-    for (auto&& meta : out_metas)
+    for (auto&& info : out_info)
     {
-        switch (meta.index())
+        switch (info.shape)
         {
-            case cv::GMetaArg::index_of<cv::GMatDesc>():
+            case cv::GShape::GMAT:
             {
                 run_args.emplace_back(cv::Mat{});
                 outs.emplace_back(&cv::util::get<cv::Mat>(run_args.back()));
                 break;
             }
-            case cv::GMetaArg::index_of<cv::GScalarDesc>():
+            case cv::GShape::GSCALAR:
             {
                 run_args.emplace_back(cv::Scalar{});
                 outs.emplace_back(&cv::util::get<cv::Scalar>(run_args.back()));
                 break;
             }
+            case cv::GShape::GARRAY:
+            {
+                switch (info.kind)
+                {
+                    case cv::detail::OpaqueKind::CV_POINT2F:
+                        run_args.emplace_back(cv::detail::VectorRef{std::vector<cv::Point2f>{}});
+                        outs.emplace_back(cv::util::get<cv::detail::VectorRef>(run_args.back()));
+                        break;
+                    default:
+                        util::throw_error(std::logic_error("Unsupported kind for GArray"));
+                }
+                break;
+            }
             default:
                 util::throw_error(std::logic_error("Only cv::GMat and cv::GScalar are supported for python output"));
         }
diff --git a/modules/gapi/src/api/ginfer.cpp b/modules/gapi/src/api/ginfer.cpp
index 98eeef5ab6..156f8938c4 100644
--- a/modules/gapi/src/api/ginfer.cpp
+++ b/modules/gapi/src/api/ginfer.cpp
@@ -25,3 +25,59 @@ std::vector<cv::gapi::GBackend> cv::gapi::GNetPackage::backends() const {
     for (const auto &nn : networks) unique_set.insert(nn.backend);
     return std::vector<cv::gapi::GBackend>(unique_set.begin(), unique_set.end());
 }
+
+// FIXME: Inference API is currently only available in full mode
+#if !defined(GAPI_STANDALONE)
+
+cv::GInferInputs::GInferInputs()
+    : in_blobs(std::make_shared<Map>())
+{
+}
+
+cv::GMat& cv::GInferInputs::operator[](const std::string& name) {
+    return (*in_blobs)[name];
+}
+
+const cv::GInferInputs::Map& cv::GInferInputs::getBlobs() const {
+    return *in_blobs;
+}
+
+void cv::GInferInputs::setInput(const std::string& name, const cv::GMat& value) {
+    in_blobs->emplace(name, value);
+}
+
+struct cv::GInferOutputs::Priv
+{
+    Priv(std::shared_ptr<cv::GCall>);
+
+    std::shared_ptr<cv::GCall> call;
+    InOutInfo* info = nullptr;
+    std::unordered_map<std::string, cv::GMat> out_blobs;
+};
+
+cv::GInferOutputs::Priv::Priv(std::shared_ptr<cv::GCall> c)
+    : call(std::move(c)), info(cv::util::any_cast<InOutInfo>(&call->params()))
+{
+}
+
+cv::GInferOutputs::GInferOutputs(std::shared_ptr<cv::GCall> call)
+    : m_priv(std::make_shared<cv::GInferOutputs::Priv>(std::move(call)))
+{
+}
+
+cv::GMat cv::GInferOutputs::at(const std::string& name)
+{
+    auto it = m_priv->out_blobs.find(name);
+    if (it == m_priv->out_blobs.end()) {
+        // FIXME: Avoid modifying GKernel
+        // Expect output to be always GMat
+        m_priv->call->kernel().outShapes.push_back(cv::GShape::GMAT);
+        // ...so _empty_ constructor is passed here.
+        m_priv->call->kernel().outCtors.emplace_back(cv::util::monostate{});
+        int out_idx = static_cast<int>(m_priv->out_blobs.size());
+        it = m_priv->out_blobs.emplace(name, m_priv->call->yield(out_idx)).first;
+        m_priv->info->out_names.push_back(name);
+    }
+    return it->second;
+}
+#endif // GAPI_STANDALONE
diff --git a/modules/gapi/src/api/gmat.cpp b/modules/gapi/src/api/gmat.cpp
index d9f135222b..08bb170a86 100644
--- a/modules/gapi/src/api/gmat.cpp
+++ b/modules/gapi/src/api/gmat.cpp
@@ -144,7 +144,7 @@ bool GMatDesc::canDescribe(const cv::Mat& mat) const
 
 bool GMatDesc::canDescribe(const cv::RMat& mat) const
 {
-    return *this == mat.desc();
+    return canDescribeHelper(*this, mat);
 }
 
 }// namespace cv
diff --git a/modules/gapi/src/api/grunarg.cpp b/modules/gapi/src/api/grunarg.cpp
new file mode 100644
index 0000000000..30ae2adbc0
--- /dev/null
+++ b/modules/gapi/src/api/grunarg.cpp
@@ -0,0 +1,33 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "precomp.hpp"
+#include <opencv2/gapi/garg.hpp>
+
+cv::GRunArg::GRunArg() {
+}
+
+cv::GRunArg::GRunArg(const cv::GRunArg &arg)
+    : cv::GRunArgBase(static_cast<const cv::GRunArgBase&>(arg))
+    , meta(arg.meta) {
+}
+
+cv::GRunArg::GRunArg(cv::GRunArg &&arg)
+    : cv::GRunArgBase(std::move(static_cast<const cv::GRunArgBase&>(arg)))
+    , meta(std::move(arg.meta)) {
+}
+
+cv::GRunArg& cv::GRunArg::operator= (const cv::GRunArg &arg) {
+    cv::GRunArgBase::operator=(static_cast<const cv::GRunArgBase&>(arg));
+    meta = arg.meta;
+    return *this;
+}
+
+cv::GRunArg& cv::GRunArg::operator= (cv::GRunArg &&arg) {
+    cv::GRunArgBase::operator=(std::move(static_cast<const cv::GRunArgBase&>(arg)));
+    meta = std::move(arg.meta);
+    return *this;
+}
diff --git a/modules/gapi/src/api/kernels_core.cpp b/modules/gapi/src/api/kernels_core.cpp
index 55c43594af..82aceb1f26 100644
--- a/modules/gapi/src/api/kernels_core.cpp
+++ b/modules/gapi/src/api/kernels_core.cpp
@@ -388,14 +388,14 @@ GMat warpAffine(const GMat& src, const Mat& M, const Size& dsize, int flags,
     return core::GWarpAffine::on(src, M, dsize, flags, borderMode, borderValue);
 }
 
-GOpaque<Size> size(const GMat& src)
+GOpaque<Size> streaming::size(const GMat& src)
 {
-    return core::GSize::on(src);
+    return streaming::GSize::on(src);
 }
 
-GOpaque<Size> size(const GOpaque<Rect>& r)
+GOpaque<Size> streaming::size(const GOpaque<Rect>& r)
 {
-    return core::GSizeR::on(r);
+    return streaming::GSizeR::on(r);
 }
 
 } //namespace gapi
diff --git a/modules/gapi/src/api/kernels_imgproc.cpp b/modules/gapi/src/api/kernels_imgproc.cpp
index 108eefcb81..41085a7ebf 100644
--- a/modules/gapi/src/api/kernels_imgproc.cpp
+++ b/modules/gapi/src/api/kernels_imgproc.cpp
@@ -73,6 +73,13 @@ GMat dilate3x3(const GMat& src, int iterations,
     return dilate(src, cv::Mat(), cv::Point(-1,-1), iterations, borderType, borderValue);
 }
 
+GMat morphologyEx(const GMat &src, const MorphTypes op, const Mat &kernel, const Point &anchor,
+                  const int iterations, const BorderTypes borderType, const Scalar &borderValue)
+{
+    return imgproc::GMorphologyEx::on(src, op, kernel, anchor, iterations,
+                                      borderType, borderValue);
+}
+
 GMat Sobel(const GMat& src, int ddepth, int dx, int dy, int ksize,
            double scale, double delta,
            int borderType, const Scalar& bordVal)
@@ -115,6 +122,101 @@ cv::GArray<cv::Point2f> goodFeaturesToTrack(const GMat& image, int maxCorners, d
                                       useHarrisDetector, k);
 }
 
+GArray<GArray<Point>>
+findContours(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method,
+             const GOpaque<Point> &offset)
+{
+    return imgproc::GFindContours::on(src, mode, method, offset);
+}
+
+GArray<GArray<Point>>
+findContours(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method)
+{
+    return imgproc::GFindContoursNoOffset::on(src, mode, method);
+}
+
+
+std::tuple<GArray<GArray<Point>>,GArray<Vec4i>>
+findContoursH(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method,
+              const GOpaque<Point> &offset)
+{
+    return imgproc::GFindContoursH::on(src, mode, method, offset);
+}
+
+std::tuple<GArray<GArray<Point>>,GArray<Vec4i>>
+findContoursH(const GMat &src, const RetrievalModes mode, const ContourApproximationModes method)
+{
+    return imgproc::GFindContoursHNoOffset::on(src, mode, method);
+}
+
+GOpaque<Rect> boundingRect(const GMat& src)
+{
+    return imgproc::GBoundingRectMat::on(src);
+}
+
+GOpaque<Rect> boundingRect(const GArray<Point2i>& src)
+{
+    return imgproc::GBoundingRectVector32S::on(src);
+}
+
+GOpaque<Rect> boundingRect(const GArray<Point2f>& src)
+{
+    return imgproc::GBoundingRectVector32F::on(src);
+}
+
+GOpaque<Vec4f> fitLine2D(const GMat& src, const DistanceTypes distType, const double param,
+                         const double reps, const double aeps)
+{
+    return imgproc::GFitLine2DMat::on(src, distType, param, reps, aeps);
+}
+
+GOpaque<Vec4f> fitLine2D(const GArray<Point2i>& src, const DistanceTypes distType,
+                         const double param, const double reps, const double aeps)
+{
+    return imgproc::GFitLine2DVector32S::on(src, distType, param, reps, aeps);
+}
+
+GOpaque<Vec4f> fitLine2D(const GArray<Point2f>& src, const DistanceTypes distType,
+                         const double param, const double reps, const double aeps)
+{
+    return imgproc::GFitLine2DVector32F::on(src, distType, param, reps, aeps);
+}
+
+GOpaque<Vec4f> fitLine2D(const GArray<Point2d>& src, const DistanceTypes distType,
+                         const double param, const double reps, const double aeps)
+{
+    return imgproc::GFitLine2DVector64F::on(src, distType, param, reps, aeps);
+}
+
+GOpaque<Vec6f> fitLine3D(const GMat& src, const DistanceTypes distType, const double param,
+                         const double reps, const double aeps)
+{
+    return imgproc::GFitLine3DMat::on(src, distType, param, reps, aeps);
+}
+
+GOpaque<Vec6f> fitLine3D(const GArray<Point3i>& src, const DistanceTypes distType,
+                         const double param, const double reps, const double aeps)
+{
+    return imgproc::GFitLine3DVector32S::on(src, distType, param, reps, aeps);
+}
+
+GOpaque<Vec6f> fitLine3D(const GArray<Point3f>& src, const DistanceTypes distType,
+                         const double param, const double reps, const double aeps)
+{
+    return imgproc::GFitLine3DVector32F::on(src, distType, param, reps, aeps);
+}
+
+GOpaque<Vec6f> fitLine3D(const GArray<Point3d>& src, const DistanceTypes distType,
+                         const double param, const double reps, const double aeps)
+{
+    return imgproc::GFitLine3DVector64F::on(src, distType, param, reps, aeps);
+}
+
+GMat BGR2RGB(const GMat& src)
+{
+    return imgproc::GBGR2RGB::on(src);
+}
+
 GMat RGB2Gray(const GMat& src)
 {
     return imgproc::GRGB2Gray::on(src);
@@ -160,6 +262,26 @@ GMat YUV2RGB(const GMat& src)
     return imgproc::GYUV2RGB::on(src);
 }
 
+GMat BGR2I420(const GMat& src)
+{
+    return imgproc::GBGR2I420::on(src);
+}
+
+GMat RGB2I420(const GMat& src)
+{
+    return imgproc::GRGB2I420::on(src);
+}
+
+GMat I4202BGR(const GMat& src)
+{
+    return imgproc::GI4202BGR::on(src);
+}
+
+GMat I4202RGB(const GMat& src)
+{
+    return imgproc::GI4202RGB::on(src);
+}
+
 GMat NV12toRGB(const GMat& src_y, const GMat& src_uv)
 {
     return imgproc::GNV12toRGB::on(src_y, src_uv);
diff --git a/modules/gapi/src/api/kernels_streaming.cpp b/modules/gapi/src/api/kernels_streaming.cpp
new file mode 100644
index 0000000000..af7bd19dd1
--- /dev/null
+++ b/modules/gapi/src/api/kernels_streaming.cpp
@@ -0,0 +1,74 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "precomp.hpp"
+
+#include <opencv2/gapi/streaming/desync.hpp>
+#include <opencv2/gapi/core.hpp>
+
+cv::GMat cv::gapi::streaming::desync(const cv::GMat &g) {
+    // FIXME: this is a limited implementation of desync
+    // The real implementation must be generic (template) and
+    // reside in desync.hpp (and it is detail::desync<>())
+
+    // FIXME: Put a copy here to solve the below problem
+    // FIXME: Because of the copy, the desync functionality is limited
+    // to GMat only (we don't have generic copy kernel for other
+    // object types)
+    return cv::gapi::copy(detail::desync(g));
+
+    // FIXME
+    //
+    // If consumed by multiple different islands (OCV and Fluid by
+    // example, an object needs to be desynchronized individually
+    // for every path.
+    //
+    // This is a limitation of the current implementation. It works
+    // this way: every "desync" link from the main path to a new
+    // desync path gets its "DesyncQueue" object which stores only the
+    // last value written before of the desync object (DO) it consumes
+    // (the container of type "last written value" or LWV.
+    //
+    //                         LWV
+    // [Sync path] -> desync() - - > DO -> [ISL0 @ Desync path #1]
+    //
+    // At the same time, generally, every island in the streaming
+    // graph gets its individual input as a queue (so normally, a
+    // writer pushes the same output MULTIPLE TIMES if it has mutliple
+    // readers):
+    //
+    //                         LWV
+    // [Sync path] -> desync() - - > DO1 -> [ISL0 @ Desync path #1]
+    //                       : LWV
+    //                       ' - - > DO2 -> [ISL1 @ Desync path #1]
+    //
+    // For users, it may seem legit to use desync here only once, and
+    // it MUST BE legit once the problem is fixed.
+    // But the problem with the current implementation is that islands
+    // on the same desync path get different desync queues and in fact
+    // stay desynchronized between each other. One shouldn't consider
+    // this as a single desync path anymore.
+    // If these two ISLs are then merged e.g. with add(a,b), the
+    // results will be inconsistent, given that the latency of ISL0
+    // and ISL1 may be different. This is not the same frame anymore
+    // coming as `a` and `b` to add(a,b) because of it.
+    //
+    // To make things clear, we forbid this now and ask to call
+    // desync one more time to allow that. It is bad since the graph
+    // structure and island layout depends on kernel packages used,
+    // not on the sole GComputation structure. This needs to be fixed!
+    // Here's the working configuration:
+    //
+    //                         LWV
+    // [Sync path] -> desync() - - > DO1 -> [ISL0 @ Desync path #1]
+    //            :            LWV
+    //            '-> desync() - - > DO2 -> [ISL1 @ Desync path #2] <-(!)
+    //
+    // Put an operation right after desync() is a quick workaround to
+    // this synchronization problem. There will be one "last_written_value"
+    // connected to a desynchronized data object, and this sole last_written_value
+    // object will feed both branches of the streaming executable.
+}
diff --git a/modules/gapi/src/api/render_ocv.cpp b/modules/gapi/src/api/render_ocv.cpp
index a298a958bd..5ab2e1dd07 100644
--- a/modules/gapi/src/api/render_ocv.cpp
+++ b/modules/gapi/src/api/render_ocv.cpp
@@ -2,7 +2,7 @@
 #include <opencv2/gapi/render/render.hpp> // Kernel API's
 
 #include "api/render_ocv.hpp"
-#include "api/ft_render.hpp"
+#include "backends/render/ft_render.hpp"
 
 namespace cv
 {
@@ -146,12 +146,8 @@ struct EmptyConverter
 template <typename ColorConverter>
 void drawPrimitivesOCV(cv::Mat& in,
                        const cv::gapi::wip::draw::Prims& prims,
-                       cv::gapi::wip::draw::FTTextRender* ftpr)
+                       std::shared_ptr<cv::gapi::wip::draw::FTTextRender>& ftpr)
 {
-#ifndef HAVE_FREETYPE
-    cv::util::suppress_unused_warning(ftpr);
-#endif
-
     using namespace cv::gapi::wip::draw;
 
     ColorConverter converter;
@@ -177,7 +173,6 @@ void drawPrimitivesOCV(cv::Mat& in,
 
             case Prim::index_of<FText>():
             {
-#ifdef HAVE_FREETYPE
                 const auto& ftp  = cv::util::get<FText>(p);
                 const auto color = converter.cvtColor(ftp.color);
 
@@ -196,9 +191,6 @@ void drawPrimitivesOCV(cv::Mat& in,
                 cv::Point tl(ftp.org.x, ftp.org.y - mask.size().height + baseline);
 
                 blendTextMask(in, mask, tl, color);
-#else
-                cv::util::throw_error(std::runtime_error("FreeType not found !"));
-#endif
                 break;
             }
 
@@ -251,16 +243,16 @@ void drawPrimitivesOCV(cv::Mat& in,
     }
 }
 
-void drawPrimitivesOCVBGR(cv::Mat &in,
-                          const cv::gapi::wip::draw::Prims &prims,
-                          cv::gapi::wip::draw::FTTextRender* ftpr)
+void drawPrimitivesOCVBGR(cv::Mat                                                  &in,
+                          const cv::gapi::wip::draw::Prims                         &prims,
+                          std::shared_ptr<cv::gapi::wip::draw::FTTextRender> &ftpr)
 {
     drawPrimitivesOCV<EmptyConverter>(in, prims, ftpr);
 }
 
-void drawPrimitivesOCVYUV(cv::Mat &in,
-                          const cv::gapi::wip::draw::Prims &prims,
-                          cv::gapi::wip::draw::FTTextRender* ftpr)
+void drawPrimitivesOCVYUV(cv::Mat                                                  &in,
+                          const cv::gapi::wip::draw::Prims                         &prims,
+                          std::shared_ptr<cv::gapi::wip::draw::FTTextRender> &ftpr)
 {
     drawPrimitivesOCV<BGR2YUVConverter>(in, prims, ftpr);
 }
diff --git a/modules/gapi/src/api/render_ocv.hpp b/modules/gapi/src/api/render_ocv.hpp
index 91194dcdc1..a9a98f93fb 100644
--- a/modules/gapi/src/api/render_ocv.hpp
+++ b/modules/gapi/src/api/render_ocv.hpp
@@ -1,6 +1,6 @@
 #include <vector>
 #include "render_priv.hpp"
-#include "ft_render.hpp"
+#include "backends/render/ft_render.hpp"
 
 #ifndef OPENCV_RENDER_OCV_HPP
 #define OPENCV_RENDER_OCV_HPP
@@ -15,8 +15,8 @@ namespace draw
 {
 
 // FIXME only for tests
-void GAPI_EXPORTS drawPrimitivesOCVYUV(cv::Mat& yuv, const Prims& prims, cv::gapi::wip::draw::FTTextRender* mc);
-void GAPI_EXPORTS drawPrimitivesOCVBGR(cv::Mat& bgr, const Prims& prims, cv::gapi::wip::draw::FTTextRender* mc);
+void GAPI_EXPORTS drawPrimitivesOCVYUV(cv::Mat& yuv, const Prims& prims, std::shared_ptr<cv::gapi::wip::draw::FTTextRender>& mc);
+void GAPI_EXPORTS drawPrimitivesOCVBGR(cv::Mat& bgr, const Prims& prims, std::shared_ptr<cv::gapi::wip::draw::FTTextRender>& mc);
 
 } // namespace draw
 } // namespace wip
diff --git a/modules/gapi/src/api/rmat.cpp b/modules/gapi/src/api/rmat.cpp
index 9c2da2ebc7..12ba4e5e0e 100644
--- a/modules/gapi/src/api/rmat.cpp
+++ b/modules/gapi/src/api/rmat.cpp
@@ -8,16 +8,68 @@
 
 using View = cv::RMat::View;
 
+namespace {
+cv::GMatDesc checkDesc(const cv::GMatDesc& desc) {
+    if (!desc.dims.empty() && desc.chan != -1) {
+        cv::util::throw_error(
+            std::logic_error("Multidimesional RMat::Views with chan different from -1 are not supported!"));
+    }
+    return desc;
+}
+
+int typeFromDesc(const cv::GMatDesc& desc) {
+    // In multidimensional case GMatDesc::chan is -1,
+    // change it to 1 when calling CV_MAKE_TYPE
+    return CV_MAKE_TYPE(desc.depth, desc.chan == -1 ? 1 : desc.chan);
+}
+
+static View::stepsT defaultSteps(const cv::GMatDesc& desc) {
+    const auto& dims = desc.dims.empty()
+                       ? std::vector<int>{desc.size.height, desc.size.width}
+                       : desc.dims;
+    View::stepsT steps(dims.size(), 0u);
+    auto type = typeFromDesc(desc);
+    steps.back() = CV_ELEM_SIZE(type);
+    for (int i = static_cast<int>(dims.size())-2; i >= 0; i--) {
+        steps[i] = steps[i+1]*dims[i];
+    }
+    return steps;
+}
+} // anonymous namespace
+
+View::View(const cv::GMatDesc& desc, uchar* data, size_t step, DestroyCallback&& cb)
+    : m_desc(checkDesc(desc))
+    , m_data(data)
+    , m_steps([this, step](){
+        GAPI_Assert(m_desc.dims.empty());
+        auto steps = defaultSteps(m_desc);
+        if (step != 0u) {
+            steps[0] = step;
+        }
+        return steps;
+    }())
+    , m_cb(std::move(cb)) {
+}
+
+View::View(const cv::GMatDesc& desc, uchar* data, const stepsT &steps, DestroyCallback&& cb)
+    : m_desc(checkDesc(desc))
+    , m_data(data)
+    , m_steps(steps == stepsT{} ? defaultSteps(m_desc): steps)
+    , m_cb(std::move(cb)) {
+}
+
+int View::type() const { return typeFromDesc(m_desc); }
+
 // There is an issue with default generated operator=(View&&) on Mac:
-// it doesn't nullify m_cb of a moved object
+// it doesn't nullify m_cb of the moved object
 View& View::operator=(View&& v) {
-    m_desc = v.m_desc;
-    m_data = v.m_data;
-    m_step = v.m_step;
-    m_cb   = v.m_cb;
-    v.m_desc = {};
-    v.m_data = nullptr;
-    v.m_step = 0u;
-    v.m_cb   = nullptr;
+    m_desc  = v.m_desc;
+    m_data  = v.m_data;
+    m_steps = v.m_steps;
+    m_cb    = v.m_cb;
+    v.m_desc  = {};
+    v.m_data  = nullptr;
+    v.m_steps = {0u};
+    v.m_cb    = nullptr;
     return *this;
 }
diff --git a/modules/gapi/src/api/s11n.cpp b/modules/gapi/src/api/s11n.cpp
index 54a0850394..b6acf28ea4 100644
--- a/modules/gapi/src/api/s11n.cpp
+++ b/modules/gapi/src/api/s11n.cpp
@@ -44,6 +44,13 @@ std::vector<char> cv::gapi::serialize(const cv::GRunArgs& ra)
     return os.data();
 }
 
+std::vector<char> cv::gapi::serialize(const cv::GCompileArgs& ca)
+{
+    cv::gapi::s11n::ByteMemoryOutStream os;
+    serialize(os, ca);
+    return os.data();
+}
+
 // FIXME: This function should move from S11N to GRunArg-related entities.
 // it has nothing to do with the S11N as it is
 cv::GRunArgsP cv::gapi::bind(cv::GRunArgs &results)
@@ -72,6 +79,9 @@ cv::GRunArgsP cv::gapi::bind(cv::GRunArgs &results)
         case T::index_of<cv::detail::OpaqueRef>() :
             outputs.emplace_back(cv::util::get<cv::detail::OpaqueRef>(res_obj));
             break;
+        case cv::GRunArg::index_of<cv::RMat>() :
+            outputs.emplace_back((cv::RMat*)(&(cv::util::get<cv::RMat>(res_obj))));
+            break;
         default:
             GAPI_Assert(false && "This value type is not supported!"); // ...maybe because of STANDALONE mode.
             break;
@@ -105,6 +115,9 @@ cv::GRunArg cv::gapi::bind(cv::GRunArgP &out)
     case T::index_of<cv::Scalar*>() :
         return cv::GRunArg(*cv::util::get<cv::Scalar*>(out));
 
+    case T::index_of<cv::RMat*>() :
+        return cv::GRunArg(*cv::util::get<cv::RMat*>(out));
+
     default:
         // ...maybe our types were extended
         GAPI_Assert(false && "This value type is UNKNOWN!");
diff --git a/modules/gapi/src/backends/common/gbackend.hpp b/modules/gapi/src/backends/common/gbackend.hpp
index f747a0dd1c..576168db53 100644
--- a/modules/gapi/src/backends/common/gbackend.hpp
+++ b/modules/gapi/src/backends/common/gbackend.hpp
@@ -23,11 +23,26 @@ namespace cv {
 namespace gimpl {
 
     inline cv::Mat asMat(RMat::View& v) {
+#if !defined(GAPI_STANDALONE)
+        return v.dims().empty() ? cv::Mat(v.rows(), v.cols(), v.type(), v.ptr(), v.step())
+                                : cv::Mat(v.dims(), v.type(), v.ptr(), v.steps().data());
+#else
+        // FIXME: add a check that steps are default
         return v.dims().empty() ? cv::Mat(v.rows(), v.cols(), v.type(), v.ptr(), v.step())
                                 : cv::Mat(v.dims(), v.type(), v.ptr());
+
+#endif
     }
     inline RMat::View asView(const Mat& m, RMat::View::DestroyCallback&& cb = nullptr) {
+#if !defined(GAPI_STANDALONE)
+        RMat::View::stepsT steps(m.dims);
+        for (int i = 0; i < m.dims; i++) {
+            steps[i] = m.step[i];
+        }
+        return RMat::View(cv::descr_of(m), m.data, steps, std::move(cb));
+#else
         return RMat::View(cv::descr_of(m), m.data, m.step, std::move(cb));
+#endif
     }
 
     class RMatAdapter : public RMat::Adapter {
@@ -47,6 +62,8 @@ namespace magazine {
     template<typename... Ts> struct Class
     {
         template<typename T> using MapT = std::unordered_map<int, T>;
+        using MapM = std::unordered_map<int, GRunArg::Meta>;
+
         template<typename T>       MapT<T>& slot()
         {
             return std::get<ade::util::type_list_index<T, Ts...>::value>(slots);
@@ -55,8 +72,17 @@ namespace magazine {
         {
             return std::get<ade::util::type_list_index<T, Ts...>::value>(slots);
         }
+        template<typename T> MapM& meta()
+        {
+            return metas[ade::util::type_list_index<T, Ts...>::value];
+        }
+        template<typename T> const MapM& meta() const
+        {
+            return metas[ade::util::type_list_index<T, Ts...>::value];
+        }
     private:
         std::tuple<MapT<Ts>...> slots;
+        std::array<MapM, sizeof...(Ts)> metas;
     };
 
 } // namespace magazine
@@ -133,7 +159,7 @@ inline cv::util::optional<T> getCompileArg(const cv::GCompileArgs &args)
     return cv::gapi::getCompileArg<T>(args);
 }
 
-void createMat(const cv::GMatDesc& desc, cv::Mat& mat);
+void GAPI_EXPORTS createMat(const cv::GMatDesc& desc, cv::Mat& mat);
 
 }} // cv::gimpl
 
diff --git a/modules/gapi/src/backends/common/gmetabackend.cpp b/modules/gapi/src/backends/common/gmetabackend.cpp
new file mode 100644
index 0000000000..5364152b65
--- /dev/null
+++ b/modules/gapi/src/backends/common/gmetabackend.cpp
@@ -0,0 +1,105 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "precomp.hpp"
+
+#include <opencv2/gapi/gcommon.hpp>        // compile args
+#include <opencv2/gapi/util/any.hpp>       // any
+#include <opencv2/gapi/streaming/meta.hpp> // GMeta
+
+#include "compiler/gobjref.hpp"            // RcDesc
+#include "compiler/gmodel.hpp"             // GModel, Op
+#include "backends/common/gbackend.hpp"
+#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
+
+#include "backends/common/gmetabackend.hpp"
+
+namespace {
+
+class GraphMetaExecutable final: public cv::gimpl::GIslandExecutable {
+    std::string m_meta_tag;
+
+public:
+    GraphMetaExecutable(const ade::Graph& g,
+                        const std::vector<ade::NodeHandle>& nodes);
+    bool canReshape() const override;
+    void reshape(ade::Graph&, const cv::GCompileArgs&) override;
+
+    void run(std::vector<InObj> &&input_objs,
+             std::vector<OutObj> &&output_objs) override;
+};
+
+bool GraphMetaExecutable::canReshape() const {
+    return true;
+}
+void GraphMetaExecutable::reshape(ade::Graph&, const cv::GCompileArgs&) {
+    // do nothing here
+}
+
+GraphMetaExecutable::GraphMetaExecutable(const ade::Graph& g,
+                                         const std::vector<ade::NodeHandle>& nodes) {
+    // There may be only one node in the graph
+    GAPI_Assert(nodes.size() == 1u);
+
+    cv::gimpl::GModel::ConstGraph cg(g);
+    const auto &op = cg.metadata(nodes[0]).get<cv::gimpl::Op>();
+    GAPI_Assert(op.k.name == cv::gapi::streaming::detail::GMeta::id());
+    m_meta_tag = op.k.tag;
+}
+
+void GraphMetaExecutable::run(std::vector<InObj>  &&input_objs,
+                              std::vector<OutObj> &&output_objs) {
+    GAPI_Assert(input_objs.size() == 1u);
+    GAPI_Assert(output_objs.size() == 1u);
+
+    const cv::GRunArg in_arg = input_objs[0].second;
+    cv::GRunArgP out_arg = output_objs[0].second;
+
+    auto it = in_arg.meta.find(m_meta_tag);
+    if (it == in_arg.meta.end()) {
+        cv::util::throw_error
+            (std::logic_error("Run-time meta "
+                              + m_meta_tag
+                              + " is not found in object "
+                              + std::to_string(static_cast<int>(input_objs[0].first.shape))
+                              + "/"
+                              + std::to_string(input_objs[0].first.id)));
+    }
+    cv::util::get<cv::detail::OpaqueRef>(out_arg) = it->second;
+}
+
+class GraphMetaBackendImpl final: public cv::gapi::GBackend::Priv {
+    virtual void unpackKernel(ade::Graph            &,
+                              const ade::NodeHandle &,
+                              const cv::GKernelImpl &) override {
+        // Do nothing here
+    }
+
+    virtual EPtr compile(const ade::Graph& graph,
+                         const cv::GCompileArgs&,
+                         const std::vector<ade::NodeHandle>& nodes,
+                         const std::vector<cv::gimpl::Data>&,
+                         const std::vector<cv::gimpl::Data>&) const override {
+        return EPtr{new GraphMetaExecutable(graph, nodes)};
+    }
+};
+
+cv::gapi::GBackend graph_meta_backend() {
+    static cv::gapi::GBackend this_backend(std::make_shared<GraphMetaBackendImpl>());
+    return this_backend;
+}
+
+struct InGraphMetaKernel final: public cv::detail::KernelTag {
+    using API = cv::gapi::streaming::detail::GMeta;
+    static cv::gapi::GBackend backend() { return graph_meta_backend(); }
+    static int                kernel()  { return 42; }
+};
+
+} // anonymous namespace
+
+cv::gapi::GKernelPackage cv::gimpl::meta::kernels() {
+    return cv::gapi::kernels<InGraphMetaKernel>();
+}
diff --git a/modules/gapi/src/backends/common/gmetabackend.hpp b/modules/gapi/src/backends/common/gmetabackend.hpp
new file mode 100644
index 0000000000..56f61d0e3d
--- /dev/null
+++ b/modules/gapi/src/backends/common/gmetabackend.hpp
@@ -0,0 +1,16 @@
+#ifndef OPENCV_GAPI_SRC_COMMON_META_BACKEND_HPP
+#define OPENCV_GAPI_SRC_COMMON_META_BACKEND_HPP
+
+#include <opencv2/gapi/gkernel.hpp>
+
+namespace cv {
+namespace gimpl {
+namespace meta {
+
+cv::gapi::GKernelPackage kernels();
+
+} // namespace meta
+} // namespace gimpl
+} // namespace cv
+
+#endif // OPENCV_GAPI_SRC_COMMON_META_BACKEND_HPP
diff --git a/modules/gapi/src/backends/common/serialization.cpp b/modules/gapi/src/backends/common/serialization.cpp
index c0b3281449..8c2313b292 100644
--- a/modules/gapi/src/backends/common/serialization.cpp
+++ b/modules/gapi/src/backends/common/serialization.cpp
@@ -94,13 +94,14 @@ void linkNodes(ade::Graph& g) {
 }
 
 void relinkProto(ade::Graph& g) {
+    using namespace cv::gimpl;
     // identify which node handles map to the protocol
     // input/output object in the reconstructed graph
-    using S = std::set<cv::gimpl::RcDesc>;                  // FIXME: use ...
-    using M = std::map<cv::gimpl::RcDesc, ade::NodeHandle>; // FIXME: unordered!
+    using S = std::set<RcDesc>;                  // FIXME: use ...
+    using M = std::map<RcDesc, ade::NodeHandle>; // FIXME: unordered!
 
-    cv::gimpl::GModel::Graph gm(g);
-    auto &proto = gm.metadata().get<cv::gimpl::Protocol>();
+    GModel::Graph gm(g);
+    auto &proto = gm.metadata().get<Protocol>();
 
     const S set_in(proto.inputs.begin(), proto.inputs.end());
     const S set_out(proto.outputs.begin(), proto.outputs.end());
@@ -108,9 +109,9 @@ void relinkProto(ade::Graph& g) {
 
     // Associate the protocol node handles with their resource identifiers
     for (auto &&nh : gm.nodes()) {
-        if (gm.metadata(nh).get<cv::gimpl::NodeType>().t == cv::gimpl::NodeType::DATA) {
-            const auto &d = gm.metadata(nh).get<cv::gimpl::Data>();
-            const auto rc = cv::gimpl::RcDesc{d.rc, d.shape, d.ctor};
+        if (gm.metadata(nh).get<NodeType>().t == NodeType::DATA) {
+            const auto &d = gm.metadata(nh).get<Data>();
+            const auto rc = RcDesc{d.rc, d.shape, d.ctor};
             if (set_in.count(rc) > 0) {
                 GAPI_DbgAssert(set_out.count(rc) == 0);
                 map_in[rc] = nh;
@@ -128,6 +129,12 @@ void relinkProto(ade::Graph& g) {
     proto.out_nhs.clear();
     for (auto &rc : proto.inputs)  { proto.in_nhs .push_back(map_in .at(rc)); }
     for (auto &rc : proto.outputs) { proto.out_nhs.push_back(map_out.at(rc)); }
+
+    // If a subgraph is being serialized it's possible that
+    // some of its in/out nodes are INTERNAL in the full graph.
+    // Set their storage apporpriately
+    for (auto &nh : proto.in_nhs)  { gm.metadata(nh).get<Data>().storage = Data::Storage::INPUT; }
+    for (auto &nh : proto.out_nhs) { gm.metadata(nh).get<Data>().storage = Data::Storage::OUTPUT; }
 }
 
 } // anonymous namespace
@@ -145,6 +152,13 @@ IIStream& operator>> (IIStream& is, cv::Point& pt) {
     return is >> pt.x >> pt.y;
 }
 
+IOStream& operator<< (IOStream& os, const cv::Point2f &pt) {
+    return os << pt.x << pt.y;
+}
+IIStream& operator>> (IIStream& is, cv::Point2f& pt) {
+    return is >> pt.x >> pt.y;
+}
+
 IOStream& operator<< (IOStream& os, const cv::Size &sz) {
     return os << sz.width << sz.height;
 }
@@ -165,12 +179,12 @@ IOStream& operator<< (IOStream& os, const cv::Scalar &s) {
 IIStream& operator>> (IIStream& is, cv::Scalar& s) {
     return is >> s.val[0] >> s.val[1] >> s.val[2] >> s.val[3];
 }
-IOStream& operator<< (IOStream& os, const cv::RMat&) {
-    util::throw_error(std::logic_error("Serialization of RMat is not supported"));
+IOStream& operator<< (IOStream& os, const cv::RMat& mat) {
+    mat.serialize(os);
     return os;
 }
 IIStream& operator>> (IIStream& is, cv::RMat&) {
-    util::throw_error(std::logic_error("Serialization of RMat is not supported"));
+    util::throw_error(std::logic_error("operator>> for RMat should never be called"));
     return is;
 }
 
@@ -329,6 +343,18 @@ IIStream& operator>> (IIStream& is,       cv::gapi::wip::draw::Line &l) {
 
 // G-API types /////////////////////////////////////////////////////////////////
 
+IOStream& operator<< (IOStream& os, const cv::GCompileArg& arg)
+{
+    ByteMemoryOutStream tmpS;
+    arg.serialize(tmpS);
+    std::vector<char> data = tmpS.data();
+
+    os << arg.tag;
+    os << data;
+
+    return os;
+}
+
 // Stubs (empty types)
 
 IOStream& operator<< (IOStream& os, cv::util::monostate  ) {return os;}
@@ -497,17 +523,17 @@ IOStream& operator<< (IOStream& os, const cv::GArg &arg) {
         GAPI_Assert(arg.kind == cv::detail::ArgKind::OPAQUE_VAL);
         GAPI_Assert(arg.opaque_kind != cv::detail::OpaqueKind::CV_UNKNOWN);
         switch (arg.opaque_kind) {
-        case cv::detail::OpaqueKind::CV_BOOL:   os << arg.get<bool>();         break;
-        case cv::detail::OpaqueKind::CV_INT:    os << arg.get<int>();          break;
-        case cv::detail::OpaqueKind::CV_UINT64: os << arg.get<uint64_t>();     break;
-        case cv::detail::OpaqueKind::CV_DOUBLE: os << arg.get<double>();       break;
-        case cv::detail::OpaqueKind::CV_FLOAT:  os << arg.get<float>();        break;
-        case cv::detail::OpaqueKind::CV_STRING: os << arg.get<std::string>();  break;
-        case cv::detail::OpaqueKind::CV_POINT:  os << arg.get<cv::Point>();    break;
-        case cv::detail::OpaqueKind::CV_SIZE:   os << arg.get<cv::Size>();     break;
-        case cv::detail::OpaqueKind::CV_RECT:   os << arg.get<cv::Rect>();     break;
-        case cv::detail::OpaqueKind::CV_SCALAR: os << arg.get<cv::Scalar>();   break;
-        case cv::detail::OpaqueKind::CV_MAT:    os << arg.get<cv::Mat>();      break;
+        case cv::detail::OpaqueKind::CV_BOOL:    os << arg.get<bool>();         break;
+        case cv::detail::OpaqueKind::CV_INT:     os << arg.get<int>();          break;
+        case cv::detail::OpaqueKind::CV_UINT64:  os << arg.get<uint64_t>();     break;
+        case cv::detail::OpaqueKind::CV_DOUBLE:  os << arg.get<double>();       break;
+        case cv::detail::OpaqueKind::CV_FLOAT:   os << arg.get<float>();        break;
+        case cv::detail::OpaqueKind::CV_STRING:  os << arg.get<std::string>();  break;
+        case cv::detail::OpaqueKind::CV_POINT:   os << arg.get<cv::Point>();    break;
+        case cv::detail::OpaqueKind::CV_SIZE:    os << arg.get<cv::Size>();     break;
+        case cv::detail::OpaqueKind::CV_RECT:    os << arg.get<cv::Rect>();     break;
+        case cv::detail::OpaqueKind::CV_SCALAR:  os << arg.get<cv::Scalar>();   break;
+        case cv::detail::OpaqueKind::CV_MAT:     os << arg.get<cv::Mat>();      break;
         default: GAPI_Assert(false && "GArg: Unsupported (unknown?) opaque value type");
         }
     }
@@ -531,17 +557,18 @@ IIStream& operator>> (IIStream& is, cv::GArg &arg) {
         switch (arg.opaque_kind) {
 #define HANDLE_CASE(E,T) case cv::detail::OpaqueKind::CV_##E:           \
             { T t{}; is >> t; arg = (cv::GArg(t)); } break
-            HANDLE_CASE(BOOL   , bool);
-            HANDLE_CASE(INT    , int);
-            HANDLE_CASE(UINT64 , uint64_t);
-            HANDLE_CASE(DOUBLE , double);
-            HANDLE_CASE(FLOAT  , float);
-            HANDLE_CASE(STRING , std::string);
-            HANDLE_CASE(POINT  , cv::Point);
-            HANDLE_CASE(SIZE   , cv::Size);
-            HANDLE_CASE(RECT   , cv::Rect);
-            HANDLE_CASE(SCALAR , cv::Scalar);
-            HANDLE_CASE(MAT    , cv::Mat);
+            HANDLE_CASE(BOOL    , bool);
+            HANDLE_CASE(INT     , int);
+            HANDLE_CASE(UINT64  , uint64_t);
+            HANDLE_CASE(DOUBLE  , double);
+            HANDLE_CASE(FLOAT   , float);
+            HANDLE_CASE(STRING  , std::string);
+            HANDLE_CASE(POINT   , cv::Point);
+            HANDLE_CASE(POINT2F , cv::Point2f);
+            HANDLE_CASE(SIZE    , cv::Size);
+            HANDLE_CASE(RECT    , cv::Rect);
+            HANDLE_CASE(SCALAR  , cv::Scalar);
+            HANDLE_CASE(MAT     , cv::Mat);
 #undef HANDLE_CASE
         default: GAPI_Assert(false && "GArg: Unsupported (unknown?) opaque value type");
         }
@@ -865,6 +892,14 @@ IIStream& ByteMemoryInStream::operator>> (std::string& str) {
     return *this;
 }
 
+GAPI_EXPORTS std::unique_ptr<IIStream> detail::getInStream(const std::vector<char> &p) {
+    return std::unique_ptr<ByteMemoryInStream>(new ByteMemoryInStream(p));
+}
+
+GAPI_EXPORTS void serialize(IOStream& os, const cv::GCompileArgs &ca) {
+    os << ca;
+}
+
 GAPI_EXPORTS void serialize(IOStream& os, const cv::GMetaArgs &ma) {
     os << ma;
 }
@@ -882,7 +917,6 @@ GAPI_EXPORTS GRunArgs run_args_deserialize(IIStream& is) {
     return s;
 }
 
-
 } // namespace s11n
 } // namespace gapi
 } // namespace cv
diff --git a/modules/gapi/src/backends/common/serialization.hpp b/modules/gapi/src/backends/common/serialization.hpp
index 4c60e71d87..a3134d84d2 100644
--- a/modules/gapi/src/backends/common/serialization.hpp
+++ b/modules/gapi/src/backends/common/serialization.hpp
@@ -40,6 +40,8 @@ struct GSerialized {
 
 // G-API types /////////////////////////////////////////////////////////////////
 
+GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::GCompileArg& arg);
+
 GAPI_EXPORTS IOStream& operator<< (IOStream& os, cv::util::monostate  );
 GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::util::monostate &);
 
@@ -86,26 +88,6 @@ GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::GArrayDesc &);
 GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::GFrameDesc &);
 GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::GFrameDesc &);
 
-#if !defined(GAPI_STANDALONE)
-GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::UMat &);
-GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::UMat &);
-#endif // !defined(GAPI_STANDALONE)
-
-GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::RMat &r);
-GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::RMat &r);
-
-GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gapi::wip::IStreamSource::Ptr &);
-GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::gapi::wip::IStreamSource::Ptr &);
-
-GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::VectorRef &);
-GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::detail::VectorRef &);
-
-GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::OpaqueRef &);
-GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::detail::OpaqueRef &);
-
-GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::MediaFrame &);
-GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::MediaFrame &);
-
 GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gimpl::RcDesc &rc);
 GAPI_EXPORTS IIStream& operator>> (IIStream& is,       cv::gimpl::RcDesc &rc);
 
@@ -176,46 +158,6 @@ GAPI_EXPORTS void serialize( IOStream& os
 GAPI_EXPORTS GSerialized deserialize(IIStream& is);
 GAPI_EXPORTS void reconstruct(const GSerialized &s, ade::Graph &g);
 
-// Generic: variant serialization //////////////////////////////////////////////
-namespace detail { // FIXME: breaks old code
-template<typename V>
-IOStream& put_v(IOStream&, const V&, std::size_t) {
-    GAPI_Assert(false && "variant>>: requested index is invalid");
-};
-template<typename V, typename X, typename... Xs>
-IOStream& put_v(IOStream& os, const V& v, std::size_t x) {
-    return (x == 0u)
-        ? os << cv::util::get<X>(v)
-        : put_v<V, Xs...>(os, v, x-1);
-}
-template<typename V>
-IIStream& get_v(IIStream&, V&, std::size_t, std::size_t) {
-    GAPI_Assert(false && "variant<<: requested index is invalid");
-}
-template<typename V, typename X, typename... Xs>
-IIStream& get_v(IIStream& is, V& v, std::size_t i, std::size_t gi) {
-    if (i == gi) {
-        X x{};
-        is >> x;
-        v = std::move(x);
-        return is;
-    } else return get_v<V, Xs...>(is, v, i+1, gi);
-}
-} // namespace detail FIXME: breaks old code
-
-template<typename... Ts>
-IOStream& operator<< (IOStream& os, const cv::util::variant<Ts...> &v) {
-    os << (uint32_t)v.index();
-    return detail::put_v<cv::util::variant<Ts...>, Ts...>(os, v, v.index());
-}
-template<typename... Ts>
-IIStream& operator>> (IIStream& is, cv::util::variant<Ts...> &v) {
-    int idx = -1;
-    is >> idx;
-    GAPI_Assert(idx >= 0 && idx < (int)sizeof...(Ts));
-    return detail::get_v<cv::util::variant<Ts...>, Ts...>(is, v, 0u, idx);
-}
-
 // FIXME: Basic Stream implementaions //////////////////////////////////////////
 
 // Basic in-memory stream implementations.
@@ -268,6 +210,11 @@ public:
     virtual IIStream& operator>> (std::string &) override;
 };
 
+namespace detail {
+GAPI_EXPORTS std::unique_ptr<IIStream> getInStream(const std::vector<char> &p);
+} // namespace detail
+
+GAPI_EXPORTS void serialize(IOStream& os, const cv::GCompileArgs &ca);
 GAPI_EXPORTS void serialize(IOStream& os, const cv::GMetaArgs &ma);
 GAPI_EXPORTS void serialize(IOStream& os, const cv::GRunArgs &ra);
 GAPI_EXPORTS GMetaArgs meta_args_deserialize(IIStream& is);
diff --git a/modules/gapi/src/backends/cpu/gcpucore.cpp b/modules/gapi/src/backends/cpu/gcpucore.cpp
index f2b8f7077d..fc460149c6 100644
--- a/modules/gapi/src/backends/cpu/gcpucore.cpp
+++ b/modules/gapi/src/backends/cpu/gcpucore.cpp
@@ -625,7 +625,7 @@ GAPI_OCV_KERNEL(GCPUParseYolo, cv::gapi::nn::parsers::GParseYolo)
     }
 };
 
-GAPI_OCV_KERNEL(GCPUSize, cv::gapi::core::GSize)
+GAPI_OCV_KERNEL(GCPUSize, cv::gapi::streaming::GSize)
 {
     static void run(const cv::Mat& in, cv::Size& out)
     {
@@ -634,7 +634,7 @@ GAPI_OCV_KERNEL(GCPUSize, cv::gapi::core::GSize)
     }
 };
 
-GAPI_OCV_KERNEL(GCPUSizeR, cv::gapi::core::GSizeR)
+GAPI_OCV_KERNEL(GCPUSizeR, cv::gapi::streaming::GSizeR)
 {
     static void run(const cv::Rect& in, cv::Size& out)
     {
diff --git a/modules/gapi/src/backends/cpu/gcpuimgproc.cpp b/modules/gapi/src/backends/cpu/gcpuimgproc.cpp
index 8104565f03..6cbf0d32f0 100644
--- a/modules/gapi/src/backends/cpu/gcpuimgproc.cpp
+++ b/modules/gapi/src/backends/cpu/gcpuimgproc.cpp
@@ -145,6 +145,16 @@ GAPI_OCV_KERNEL(GCPUDilate, cv::gapi::imgproc::GDilate)
     }
 };
 
+GAPI_OCV_KERNEL(GCPUMorphologyEx, cv::gapi::imgproc::GMorphologyEx)
+{
+    static void run(const cv::Mat &in, const cv::MorphTypes op, const cv::Mat &kernel,
+                    const cv::Point &anchor, const int iterations,
+                    const cv::BorderTypes borderType, const cv::Scalar &borderValue, cv::Mat &out)
+    {
+        cv::morphologyEx(in, out, op, kernel, anchor, iterations, borderType, borderValue);
+    }
+};
+
 GAPI_OCV_KERNEL(GCPUSobel, cv::gapi::imgproc::GSobel)
 {
     static void run(const cv::Mat& in, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType,
@@ -211,6 +221,182 @@ GAPI_OCV_KERNEL(GCPUGoodFeatures, cv::gapi::imgproc::GGoodFeatures)
     }
 };
 
+GAPI_OCV_KERNEL(GCPUFindContours, cv::gapi::imgproc::GFindContours)
+{
+    static void run(const cv::Mat& image, const cv::RetrievalModes mode,
+                    const cv::ContourApproximationModes method, const cv::Point& offset,
+                    std::vector<std::vector<cv::Point>> &outConts)
+    {
+        cv::findContours(image, outConts, mode, method, offset);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFindContoursNoOffset, cv::gapi::imgproc::GFindContoursNoOffset)
+{
+    static void run(const cv::Mat& image, const cv::RetrievalModes mode,
+                    const cv::ContourApproximationModes method,
+                    std::vector<std::vector<cv::Point>> &outConts)
+    {
+        cv::findContours(image, outConts, mode, method);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFindContoursH, cv::gapi::imgproc::GFindContoursH)
+{
+    static void run(const cv::Mat& image, const cv::RetrievalModes mode,
+                    const cv::ContourApproximationModes method, const cv::Point& offset,
+                    std::vector<std::vector<cv::Point>> &outConts, std::vector<cv::Vec4i> &outHier)
+    {
+        cv::findContours(image, outConts, outHier, mode, method, offset);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFindContoursHNoOffset, cv::gapi::imgproc::GFindContoursHNoOffset)
+{
+    static void run(const cv::Mat& image, const cv::RetrievalModes mode,
+                    const cv::ContourApproximationModes method,
+                    std::vector<std::vector<cv::Point>> &outConts, std::vector<cv::Vec4i> &outHier)
+    {
+        cv::findContours(image, outConts, outHier, mode, method);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUBoundingRectMat, cv::gapi::imgproc::GBoundingRectMat)
+{
+    static void run(const cv::Mat& in, cv::Rect& out)
+    {
+        out = cv::boundingRect(in);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUBoundingRectVector32S, cv::gapi::imgproc::GBoundingRectVector32S)
+{
+    static void run(const std::vector<cv::Point2i>& in, cv::Rect& out)
+    {
+        out = cv::boundingRect(in);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUBoundingRectVector32F, cv::gapi::imgproc::GBoundingRectVector32F)
+{
+    static void run(const std::vector<cv::Point2f>& in, cv::Rect& out)
+    {
+        out = cv::boundingRect(in);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine2DMat, cv::gapi::imgproc::GFitLine2DMat)
+{
+    static void run(const cv::Mat& in, const cv::DistanceTypes distType, const double param,
+                    const double reps, const double aeps, cv::Vec4f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine2DVector32S, cv::gapi::imgproc::GFitLine2DVector32S)
+{
+    static void run(const std::vector<cv::Point2i>& in, const cv::DistanceTypes distType,
+                    const double param, const double reps, const double aeps, cv::Vec4f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine2DVector32F, cv::gapi::imgproc::GFitLine2DVector32F)
+{
+    static void run(const std::vector<cv::Point2f>& in, const cv::DistanceTypes distType,
+                    const double param, const double reps, const double aeps, cv::Vec4f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine2DVector64F, cv::gapi::imgproc::GFitLine2DVector64F)
+{
+    static void run(const std::vector<cv::Point2d>& in, const cv::DistanceTypes distType,
+                    const double param, const double reps, const double aeps, cv::Vec4f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine3DMat, cv::gapi::imgproc::GFitLine3DMat)
+{
+    static void run(const cv::Mat& in, const cv::DistanceTypes distType, const double param,
+                    const double reps, const double aeps, cv::Vec6f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine3DVector32S, cv::gapi::imgproc::GFitLine3DVector32S)
+{
+    static void run(const std::vector<cv::Point3i>& in, const cv::DistanceTypes distType,
+                    const double param, const double reps, const double aeps, cv::Vec6f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine3DVector32F, cv::gapi::imgproc::GFitLine3DVector32F)
+{
+    static void run(const std::vector<cv::Point3f>& in, const cv::DistanceTypes distType,
+                    const double param, const double reps, const double aeps, cv::Vec6f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUFitLine3DVector64F, cv::gapi::imgproc::GFitLine3DVector64F)
+{
+    static void run(const std::vector<cv::Point3d>& in, const cv::DistanceTypes distType,
+                    const double param, const double reps, const double aeps, cv::Vec6f& out)
+    {
+        cv::fitLine(in, out, distType, param, reps, aeps);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUBGR2RGB, cv::gapi::imgproc::GBGR2RGB)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_BGR2RGB);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUBGR2I420, cv::gapi::imgproc::GBGR2I420)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_BGR2YUV_I420);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPURGB2I420, cv::gapi::imgproc::GRGB2I420)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_RGB2YUV_I420);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUI4202BGR, cv::gapi::imgproc::GI4202BGR)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_YUV2BGR_I420);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUI4202RGB, cv::gapi::imgproc::GI4202RGB)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_YUV2RGB_I420);
+    }
+};
+
 GAPI_OCV_KERNEL(GCPURGB2YUV, cv::gapi::imgproc::GRGB2YUV)
 {
     static void run(const cv::Mat& in, cv::Mat &out)
@@ -438,6 +624,7 @@ cv::gapi::GKernelPackage cv::gapi::imgproc::cpu::kernels()
         , GCPUMedianBlur
         , GCPUErode
         , GCPUDilate
+        , GCPUMorphologyEx
         , GCPUSobel
         , GCPUSobelXY
         , GCPULaplacian
@@ -445,8 +632,28 @@ cv::gapi::GKernelPackage cv::gapi::imgproc::cpu::kernels()
         , GCPUCanny
         , GCPUGoodFeatures
         , GCPUEqualizeHist
+        , GCPUFindContours
+        , GCPUFindContoursNoOffset
+        , GCPUFindContoursH
+        , GCPUFindContoursHNoOffset
+        , GCPUBGR2RGB
         , GCPURGB2YUV
+        , GCPUBoundingRectMat
+        , GCPUBoundingRectVector32S
+        , GCPUBoundingRectVector32F
+        , GCPUFitLine2DMat
+        , GCPUFitLine2DVector32S
+        , GCPUFitLine2DVector32F
+        , GCPUFitLine2DVector64F
+        , GCPUFitLine3DMat
+        , GCPUFitLine3DVector32S
+        , GCPUFitLine3DVector32F
+        , GCPUFitLine3DVector64F
         , GCPUYUV2RGB
+        , GCPUBGR2I420
+        , GCPURGB2I420
+        , GCPUI4202BGR
+        , GCPUI4202RGB
         , GCPUNV12toRGB
         , GCPUNV12toBGR
         , GCPURGB2Lab
diff --git a/modules/gapi/src/backends/cpu/gnnparsers.cpp b/modules/gapi/src/backends/cpu/gnnparsers.cpp
index 234382d530..a5e4bf5f85 100644
--- a/modules/gapi/src/backends/cpu/gnnparsers.cpp
+++ b/modules/gapi/src/backends/cpu/gnnparsers.cpp
@@ -246,6 +246,28 @@ void parseSSD(const cv::Mat&  in_ssd_result,
     }
 }
 
+static void checkYoloDims(const MatSize& dims) {
+    const auto d = dims.dims();
+    // Accept 1x13x13xN and 13x13xN
+    GAPI_Assert(d >= 2);
+    if (d >= 3) {
+        if (dims[d-2] == 13) {
+            GAPI_Assert(dims[d-1]%5 == 0);
+            GAPI_Assert(dims[d-2] == 13);
+            GAPI_Assert(dims[d-3] == 13);
+            for (int i = 0; i < d-3; i++) {
+                GAPI_Assert(dims[i] == 1);
+            }
+            return;
+        }
+    }
+    // Accept 1x1x1xN, 1x1xN, 1xN
+    GAPI_Assert(dims[d-1]%(5*13*13) == 0);
+    for (int i = 0; i < d-1; i++) {
+        GAPI_Assert(dims[i] == 1);
+    }
+}
+
 void parseYolo(const cv::Mat&  in_yolo_result,
                const cv::Size& in_size,
                const float     confidence_threshold,
@@ -255,12 +277,12 @@ void parseYolo(const cv::Mat&  in_yolo_result,
                std::vector<int>&      out_labels)
 {
     const auto& dims = in_yolo_result.size;
-    GAPI_Assert(dims.dims() == 4);
-    GAPI_Assert(dims[0] == 1);
-    GAPI_Assert(dims[1] == 13);
-    GAPI_Assert(dims[2] == 13);
-    GAPI_Assert(dims[3] % 5 == 0); // 5 boxes
-    const auto num_classes = dims[3] / 5 - 5;
+    checkYoloDims(dims);
+    int acc = 1;
+    for (int i = 0; i < dims.dims(); i++) {
+        acc *= dims[i];
+    }
+    const auto num_classes = acc/(5*13*13)-5;
     GAPI_Assert(num_classes > 0);
     GAPI_Assert(0 < nms_threshold && nms_threshold <= 1);
     out_boxes.clear();
diff --git a/modules/gapi/src/backends/fluid/gfluidbackend.cpp b/modules/gapi/src/backends/fluid/gfluidbackend.cpp
index 9b95dff036..030bb10198 100644
--- a/modules/gapi/src/backends/fluid/gfluidbackend.cpp
+++ b/modules/gapi/src/backends/fluid/gfluidbackend.cpp
@@ -952,7 +952,7 @@ namespace
         using namespace cv::gimpl;
         GModel::Graph g(graph);
         GFluidModel fg(graph);
-        for (const auto node : g.nodes())
+        for (const auto& node : g.nodes())
         {
             if (g.metadata(node).get<NodeType>().t == NodeType::DATA)
             {
@@ -1440,7 +1440,7 @@ void GFluidBackendImpl::addMetaSensitiveBackendPasses(ade::ExecutionEngineSetupC
                 {
                     // Add FluidData to all data nodes inside island,
                     // set internal = true if node is not a slot in terms of higher-level GIslandModel
-                    for (const auto node : isl->contents())
+                    for (const auto& node : isl->contents())
                     {
                         if (g.metadata(node).get<NodeType>().t == NodeType::DATA &&
                             !fg.metadata(node).contains<FluidData>())
diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp
index a6f8d56e4c..edc91f0179 100644
--- a/modules/gapi/src/backends/fluid/gfluidcore.cpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp
@@ -151,6 +151,348 @@ GAPI_FLUID_KERNEL(GFluidAddW, cv::gapi::core::GAddW, false)
 
 enum Arithm { ARITHM_ABSDIFF, ARITHM_ADD, ARITHM_SUBTRACT, ARITHM_MULTIPLY, ARITHM_DIVIDE };
 
+#if CV_SIMD
+CV_ALWAYS_INLINE void absdiff_store(short out[], const v_int16& a, const v_int16& b, int x)
+{
+    vx_store(&out[x], v_absdiffs(a, b));
+}
+
+CV_ALWAYS_INLINE void absdiff_store(ushort out[], const v_uint16& a, const v_uint16& b, int x)
+{
+    vx_store(&out[x], v_absdiff(a, b));
+}
+
+CV_ALWAYS_INLINE void absdiff_store(uchar out[], const v_uint8& a, const v_uint8& b, int x)
+{
+    vx_store(&out[x], v_absdiff(a, b));
+}
+
+CV_ALWAYS_INLINE void absdiff_store(float out[], const v_float32& a, const v_float32& b, int x)
+{
+    vx_store(&out[x], v_absdiff(a, b));
+}
+
+template<typename T, typename VT>
+CV_ALWAYS_INLINE int absdiff_impl(const T in1[], const T in2[], T out[], int length)
+{
+    constexpr int nlanes = static_cast<int>(VT::nlanes);
+
+    if (length < nlanes)
+        return 0;
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - nlanes; x += nlanes)
+        {
+            VT a = vx_load(&in1[x]);
+            VT b = vx_load(&in2[x]);
+            absdiff_store(out, a, b, x);
+        }
+
+        if (x < length && (in1 != out) && (in2 != out))
+        {
+            x = length - nlanes;
+            continue;  // process one more time (unaligned tail)
+        }
+        break;
+    }
+
+    return x;
+}
+
+template<typename T>
+CV_ALWAYS_INLINE int absdiff_simd(const T in1[], const T in2[], T out[], int length)
+{
+    if (std::is_same<T, uchar>::value)
+    {
+        return absdiff_impl<uchar, v_uint8>(reinterpret_cast<const uchar*>(in1),
+                                            reinterpret_cast<const uchar*>(in2),
+                                            reinterpret_cast<uchar*>(out), length);
+    }
+    else if (std::is_same<T, ushort>::value)
+    {
+        return absdiff_impl<ushort, v_uint16>(reinterpret_cast<const ushort*>(in1),
+                                              reinterpret_cast<const ushort*>(in2),
+                                              reinterpret_cast<ushort*>(out), length);
+    }
+    else if (std::is_same<T, short>::value)
+    {
+        return absdiff_impl<short, v_int16>(reinterpret_cast<const short*>(in1),
+                                            reinterpret_cast<const short*>(in2),
+                                            reinterpret_cast<short*>(out), length);
+    }
+    else if (std::is_same<T, float>::value)
+    {
+        return absdiff_impl<float, v_float32>(reinterpret_cast<const float*>(in1),
+                                              reinterpret_cast<const float*>(in2),
+                                              reinterpret_cast<float*>(out), length);
+    }
+
+    return 0;
+}
+
+template<typename T, typename VT>
+CV_ALWAYS_INLINE int add_simd_sametype(const T in1[], const T in2[], T out[], int length)
+{
+    constexpr int nlanes = static_cast<int>(VT::nlanes);
+
+    if (length < nlanes)
+        return 0;
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - nlanes; x += nlanes)
+        {
+            VT a = vx_load(&in1[x]);
+            VT b = vx_load(&in2[x]);
+            vx_store(&out[x], a + b);
+        }
+
+        if (x < length && (in1 != out) && (in2 != out))
+        {
+            x = length - nlanes;
+            continue;  // process one more time (unaligned tail)
+        }
+        break;
+    }
+
+    return x;
+}
+
+template<typename SRC, typename DST>
+CV_ALWAYS_INLINE int add_simd(const SRC in1[], const SRC in2[], DST out[], int length)
+{
+    if (std::is_same<DST, float>::value && !std::is_same<SRC, float>::value)
+        return 0;
+
+    if (std::is_same<DST, SRC>::value)
+    {
+        if (std::is_same<DST, uchar>::value)
+        {
+            return add_simd_sametype<uchar, v_uint8>(reinterpret_cast<const uchar*>(in1),
+                                                     reinterpret_cast<const uchar*>(in2),
+                                                     reinterpret_cast<uchar*>(out), length);
+        }
+        else if (std::is_same<DST, short>::value)
+        {
+            return add_simd_sametype<short, v_int16>(reinterpret_cast<const short*>(in1),
+                                                     reinterpret_cast<const short*>(in2),
+                                                     reinterpret_cast<short*>(out), length);
+        }
+        else if (std::is_same<DST, float>::value)
+        {
+            return add_simd_sametype<float, v_float32>(reinterpret_cast<const float*>(in1),
+                                                       reinterpret_cast<const float*>(in2),
+                                                       reinterpret_cast<float*>(out), length);
+        }
+    }
+    else if (std::is_same<SRC, short>::value && std::is_same<DST, uchar>::value)
+    {
+        constexpr int nlanes = static_cast<int>(v_uint8::nlanes);
+
+        if (length < nlanes)
+            return 0;
+
+        int x = 0;
+        for (;;)
+        {
+            for (; x <= length - nlanes; x += nlanes)
+            {
+                v_int16 a1 = vx_load(reinterpret_cast<const short*>(&in1[x]));
+                v_int16 a2 = vx_load(reinterpret_cast<const short*>(&in1[x + nlanes / 2]));
+                v_int16 b1 = vx_load(reinterpret_cast<const short*>(&in2[x]));
+                v_int16 b2 = vx_load(reinterpret_cast<const short*>(&in2[x + nlanes / 2]));
+
+                vx_store(reinterpret_cast<uchar*>(&out[x]), v_pack_u(a1 + b1, a2 + b2));
+            }
+
+            if (x < length)
+            {
+                CV_DbgAssert((reinterpret_cast<const short*>(in1) != reinterpret_cast<const short*>(out)) &&
+                             (reinterpret_cast<const short*>(in2) != reinterpret_cast<const short*>(out)));
+                x = length - nlanes;
+                continue;  // process one more time (unaligned tail)
+            }
+            break;
+        }
+
+        return x;
+    }
+    else if (std::is_same<SRC, float>::value && std::is_same<DST, uchar>::value)
+    {
+        constexpr int nlanes = static_cast<int>(v_uint8::nlanes);
+
+        if (length < nlanes)
+            return 0;
+
+        int x = 0;
+        for (;;)
+        {
+            for (; x <= length - nlanes; x += nlanes)
+            {
+                v_float32 a1 = vx_load(reinterpret_cast<const float*>(&in1[x]));
+                v_float32 a2 = vx_load(reinterpret_cast<const float*>(&in1[x + nlanes / 4]));
+                v_float32 a3 = vx_load(reinterpret_cast<const float*>(&in1[x + 2 * nlanes / 4]));
+                v_float32 a4 = vx_load(reinterpret_cast<const float*>(&in1[x + 3 * nlanes / 4]));
+
+                v_float32 b1 = vx_load(reinterpret_cast<const float*>(&in2[x]));
+                v_float32 b2 = vx_load(reinterpret_cast<const float*>(&in2[x + nlanes / 4]));
+                v_float32 b3 = vx_load(reinterpret_cast<const float*>(&in2[x + 2 * nlanes / 4]));
+                v_float32 b4 = vx_load(reinterpret_cast<const float*>(&in2[x + 3 * nlanes / 4]));
+
+                vx_store(reinterpret_cast<uchar*>(&out[x]), v_pack_u(v_pack(v_round(a1 + b1), v_round(a2 + b2)),
+                                                                     v_pack(v_round(a3 + b3), v_round(a4 + b4))));
+            }
+
+            if (x < length)
+            {
+                CV_DbgAssert((reinterpret_cast<const float*>(in1) != reinterpret_cast<const float*>(out)) &&
+                             (reinterpret_cast<const float*>(in2) != reinterpret_cast<const float*>(out)));
+                x = length - nlanes;
+                continue;  // process one more time (unaligned tail)
+            }
+            break;
+        }
+
+        return x;
+    }
+
+    return 0;
+}
+
+template<typename T, typename VT>
+CV_ALWAYS_INLINE int sub_simd_sametype(const T in1[], const T in2[], T out[], int length)
+{
+    constexpr int nlanes = static_cast<int>(VT::nlanes);
+
+    if (length < nlanes)
+        return 0;
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - nlanes; x += nlanes)
+        {
+            VT a = vx_load(&in1[x]);
+            VT b = vx_load(&in2[x]);
+            vx_store(&out[x], a - b);
+        }
+
+        if (x < length && (in1 != out) && (in2 != out))
+        {
+            x = length - nlanes;
+            continue;  // process one more time (unaligned tail)
+        }
+        break;
+    }
+
+    return x;
+}
+
+template<typename SRC, typename DST>
+CV_ALWAYS_INLINE int sub_simd(const SRC in1[], const SRC in2[], DST out[], int length)
+{
+    if (std::is_same<DST, float>::value && !std::is_same<SRC, float>::value)
+        return 0;
+
+    if (std::is_same<DST, SRC>::value)
+    {
+        if (std::is_same<DST, uchar>::value)
+        {
+            return sub_simd_sametype<uchar, v_uint8>(reinterpret_cast<const uchar*>(in1),
+                                                     reinterpret_cast<const uchar*>(in2),
+                                                     reinterpret_cast<uchar*>(out), length);
+        }
+        else if (std::is_same<DST, short>::value)
+        {
+            return sub_simd_sametype<short, v_int16>(reinterpret_cast<const short*>(in1),
+                                                     reinterpret_cast<const short*>(in2),
+                                                     reinterpret_cast<short*>(out), length);
+        }
+        else if (std::is_same<DST, float>::value)
+        {
+            return sub_simd_sametype<float, v_float32>(reinterpret_cast<const float*>(in1),
+                                                       reinterpret_cast<const float*>(in2),
+                                                       reinterpret_cast<float*>(out), length);
+        }
+    }
+    else if (std::is_same<SRC, short>::value && std::is_same<DST, uchar>::value)
+    {
+        constexpr int nlanes = static_cast<int>(v_uint8::nlanes);
+
+        if (length < nlanes)
+            return 0;
+
+        int x = 0;
+        for (;;)
+        {
+            for (; x <= length - nlanes; x += nlanes)
+            {
+                v_int16 a1 = vx_load(reinterpret_cast<const short*>(&in1[x]));
+                v_int16 a2 = vx_load(reinterpret_cast<const short*>(&in1[x + nlanes / 2]));
+                v_int16 b1 = vx_load(reinterpret_cast<const short*>(&in2[x]));
+                v_int16 b2 = vx_load(reinterpret_cast<const short*>(&in2[x + nlanes / 2]));
+
+                vx_store(reinterpret_cast<uchar*>(&out[x]), v_pack_u(a1 - b1, a2 - b2));
+            }
+
+            if (x < length)
+            {
+                CV_DbgAssert((reinterpret_cast<const short*>(in1) != reinterpret_cast<const short*>(out)) &&
+                             (reinterpret_cast<const short*>(in2) != reinterpret_cast<const short*>(out)));
+                x = length - nlanes;
+                continue;  // process one more time (unaligned tail)
+            }
+            break;
+        }
+
+        return x;
+    }
+    else if (std::is_same<SRC, float>::value && std::is_same<DST, uchar>::value)
+    {
+        constexpr int nlanes = static_cast<int>(v_uint8::nlanes);
+
+        if (length < nlanes)
+            return 0;
+
+        int x = 0;
+        for (;;)
+        {
+            for (; x <= length - nlanes; x += nlanes)
+            {
+                v_float32 a1 = vx_load(reinterpret_cast<const float*>(&in1[x]));
+                v_float32 a2 = vx_load(reinterpret_cast<const float*>(&in1[x + nlanes / 4]));
+                v_float32 a3 = vx_load(reinterpret_cast<const float*>(&in1[x + 2 * nlanes / 4]));
+                v_float32 a4 = vx_load(reinterpret_cast<const float*>(&in1[x + 3 * nlanes / 4]));
+
+                v_float32 b1 = vx_load(reinterpret_cast<const float*>(&in2[x]));
+                v_float32 b2 = vx_load(reinterpret_cast<const float*>(&in2[x + nlanes / 4]));
+                v_float32 b3 = vx_load(reinterpret_cast<const float*>(&in2[x + 2 * nlanes / 4]));
+                v_float32 b4 = vx_load(reinterpret_cast<const float*>(&in2[x + 3 * nlanes / 4]));
+
+                vx_store(reinterpret_cast<uchar*>(&out[x]), v_pack_u(v_pack(v_round(a1 - b1), v_round(a2 - b2)),
+                                                                     v_pack(v_round(a3 - b3), v_round(a4 - b4))));
+            }
+
+            if (x < length)
+            {
+                CV_DbgAssert((reinterpret_cast<const float*>(in1) != reinterpret_cast<const float*>(out)) &&
+                             (reinterpret_cast<const float*>(in2) != reinterpret_cast<const float*>(out)));
+                x = length - nlanes;
+                continue;  // process one more time (unaligned tail)
+            }
+            break;
+        }
+
+        return x;
+    }
+
+    return 0;
+}
+#endif
+
 template<typename DST, typename SRC1, typename SRC2>
 static void run_arithm(Buffer &dst, const View &src1, const View &src2, Arithm arithm,
                        double scale=1)
@@ -168,29 +510,37 @@ static void run_arithm(Buffer &dst, const View &src1, const View &src2, Arithm a
     // NB: assume in/out types are not 64-bits
     float _scale = static_cast<float>( scale );
 
+    int x = 0;
+
     switch (arithm)
     {
-    case ARITHM_ABSDIFF:
-        for (int l=0; l < length; l++)
-            out[l] = absdiff<DST>(in1[l], in2[l]);
-        break;
-    case ARITHM_ADD:
-        for (int l=0; l < length; l++)
-            out[l] = add<DST>(in1[l], in2[l]);
-        break;
-    case ARITHM_SUBTRACT:
-        for (int l=0; l < length; l++)
-            out[l] = sub<DST>(in1[l], in2[l]);
-        break;
-    case ARITHM_MULTIPLY:
-        for (int l=0; l < length; l++)
-            out[l] = mul<DST>(in1[l], in2[l], _scale);
-        break;
-    case ARITHM_DIVIDE:
-        for (int l=0; l < length; l++)
-            out[l] = div<DST>(in1[l], in2[l], _scale);
-        break;
-    default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
+        case ARITHM_ADD:
+        {
+#if CV_SIMD
+            x = add_simd(in1, in2, out, length);
+#endif
+            for (; x < length; ++x)
+                out[x] = add<DST>(in1[x], in2[x]);
+            break;
+        }
+        case ARITHM_SUBTRACT:
+        {
+#if CV_SIMD
+            x = sub_simd(in1, in2, out, length);
+#endif
+            for (; x < length; ++x)
+                out[x] = sub<DST>(in1[x], in2[x]);
+            break;
+        }
+        case ARITHM_MULTIPLY:
+            for (; x < length; ++x)
+                out[x] = mul<DST>(in1[x], in2[x], _scale);
+            break;
+        case ARITHM_DIVIDE:
+            for (; x < length; ++x)
+                out[x] = div<DST>(in1[x], in2[x], _scale);
+            break;
+        default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
     }
 }
 
@@ -270,6 +620,29 @@ GAPI_FLUID_KERNEL(GFluidDiv, cv::gapi::core::GDiv, false)
     }
 };
 
+template<typename DST, typename SRC1, typename SRC2>
+static void run_absdiff(Buffer &dst, const View &src1, const View &src2)
+{
+    static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");
+    static_assert(std::is_same<SRC1, DST>::value, "wrong types");
+
+    const auto *in1 = src1.InLine<SRC1>(0);
+    const auto *in2 = src2.InLine<SRC2>(0);
+    auto *out = dst.OutLine<DST>();
+
+    int width = dst.length();
+    int chan = dst.meta().chan;
+    int length = width * chan;
+
+    int x = 0;
+
+#if CV_SIMD
+    x = absdiff_simd(in1, in2, out, length);
+#endif
+    for (; x < length; ++x)
+        out[x] = absdiff<DST>(in1[x], in2[x]);
+}
+
 GAPI_FLUID_KERNEL(GFluidAbsDiff, cv::gapi::core::GAbsDiff, false)
 {
     static const int Window = 1;
@@ -277,10 +650,10 @@ GAPI_FLUID_KERNEL(GFluidAbsDiff, cv::gapi::core::GAbsDiff, false)
     static void run(const View &src1, const View &src2, Buffer &dst)
     {
         //      DST     SRC1    SRC2    OP          __VA_ARGS__
-        BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
-        BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
-        BINARY_( short,  short,  short, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
-        BINARY_( float,  float,  float, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
+        BINARY_(uchar , uchar , uchar , run_absdiff, dst, src1, src2);
+        BINARY_(ushort, ushort, ushort, run_absdiff, dst, src1, src2);
+        BINARY_( short,  short,  short, run_absdiff, dst, src1, src2);
+        BINARY_( float,  float,  float, run_absdiff, dst, src1, src2);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
diff --git a/modules/gapi/src/backends/ie/bindings_ie.cpp b/modules/gapi/src/backends/ie/bindings_ie.cpp
new file mode 100644
index 0000000000..35191d7bcb
--- /dev/null
+++ b/modules/gapi/src/backends/ie/bindings_ie.cpp
@@ -0,0 +1,39 @@
+#include <opencv2/gapi/infer/bindings_ie.hpp>
+
+cv::gapi::ie::PyParams::PyParams(const std::string &tag,
+                                 const std::string &model,
+                                 const std::string &weights,
+                                 const std::string &device)
+    : m_priv(std::make_shared<Params<cv::gapi::Generic>>(tag, model, weights, device)) {
+}
+
+cv::gapi::ie::PyParams::PyParams(const std::string &tag,
+                                 const std::string &model,
+                                 const std::string &device)
+    : m_priv(std::make_shared<Params<cv::gapi::Generic>>(tag, model, device)) {
+}
+
+cv::gapi::GBackend cv::gapi::ie::PyParams::backend() const {
+    return m_priv->backend();
+}
+
+std::string cv::gapi::ie::PyParams::tag() const {
+    return m_priv->tag();
+}
+
+cv::util::any cv::gapi::ie::PyParams::params() const {
+    return m_priv->params();
+}
+
+cv::gapi::ie::PyParams cv::gapi::ie::params(const std::string &tag,
+                                            const std::string &model,
+                                            const std::string &weights,
+                                            const std::string &device) {
+    return {tag, model, weights, device};
+}
+
+cv::gapi::ie::PyParams cv::gapi::ie::params(const std::string &tag,
+                                            const std::string &model,
+                                            const std::string &device) {
+    return {tag, model, device};
+}
diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp
index 1565d03aec..85c0236ff1 100644
--- a/modules/gapi/src/backends/ie/giebackend.cpp
+++ b/modules/gapi/src/backends/ie/giebackend.cpp
@@ -175,11 +175,27 @@ struct IEUnit {
     IE::InputsDataMap inputs;
     IE::OutputsDataMap outputs;
 
+    IE::ExecutableNetwork this_network;
+    cv::gimpl::ie::wrap::Plugin this_plugin;
+
     explicit IEUnit(const cv::gapi::ie::detail::ParamDesc &pp)
         : params(pp) {
-        net = cv::gimpl::ie::wrap::readNetwork(params);
-        inputs  = net.getInputsInfo();
-        outputs = net.getOutputsInfo();
+        if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) {
+            net = cv::gimpl::ie::wrap::readNetwork(params);
+            inputs  = net.getInputsInfo();
+            outputs = net.getOutputsInfo();
+        } else if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import) {
+            this_plugin = cv::gimpl::ie::wrap::getPlugin(params);
+            this_plugin.SetConfig(params.config);
+            this_network = cv::gimpl::ie::wrap::importNetwork(this_plugin, params);
+            // FIXME: ICNNetwork returns InputsDataMap/OutputsDataMap,
+            // but ExecutableNetwork returns ConstInputsDataMap/ConstOutputsDataMap
+            inputs  = cv::gimpl::ie::wrap::toInputsDataMap(this_network.GetInputsInfo());
+            outputs = cv::gimpl::ie::wrap::toOutputsDataMap(this_network.GetOutputsInfo());
+        } else {
+            cv::util::throw_error(std::logic_error("Unsupported ParamDesc::Kind"));
+        }
+
         // The practice shows that not all inputs and not all outputs
         // are mandatory to specify in IE model.
         // So what we're concerned here about is:
@@ -205,10 +221,16 @@ struct IEUnit {
 
     // This method is [supposed to be] called at Island compilation stage
     cv::gimpl::ie::IECompiled compile() const {
-        auto plugin       = cv::gimpl::ie::wrap::getPlugin(params);
-        auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params);
-        auto this_request = this_network.CreateInferRequest();
+        IEUnit* non_const_this = const_cast<IEUnit*>(this);
+        if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) {
+            // FIXME: In case importNetwork for fill inputs/outputs need to obtain ExecutableNetwork, but
+            // for loadNetwork they can be obtained by using readNetwork
+            non_const_this->this_plugin  = cv::gimpl::ie::wrap::getPlugin(params);
+            non_const_this->this_plugin.SetConfig(params.config);
+            non_const_this->this_network = cv::gimpl::ie::wrap::loadNetwork(non_const_this->this_plugin, net, params);
+        }
 
+        auto this_request = non_const_this->this_network.CreateInferRequest();
         // Bind const data to infer request
         for (auto &&p : params.const_inputs) {
             // FIXME: SetBlob is known to be inefficient,
@@ -217,7 +239,16 @@ struct IEUnit {
             // Still, constant data is to set only once.
             this_request.SetBlob(p.first, wrapIE(p.second.first, p.second.second));
         }
-        return {plugin, this_network, this_request};
+        // Bind const data to infer request
+        for (auto &&p : params.const_inputs) {
+            // FIXME: SetBlob is known to be inefficient,
+            // it is worth to make a customizable "initializer" and pass the
+            // cv::Mat-wrapped blob there to support IE's optimal "GetBlob idiom"
+            // Still, constant data is to set only once.
+            this_request.SetBlob(p.first, wrapIE(p.second.first, p.second.second));
+        }
+
+        return {this_plugin, this_network, this_request};
     }
 };
 
@@ -490,6 +521,65 @@ struct Infer: public cv::detail::KernelTag {
     }
 };
 
+struct InferROI: public cv::detail::KernelTag {
+    using API = cv::GInferROIBase;
+    static cv::gapi::GBackend backend()  { return cv::gapi::ie::backend(); }
+    static KImpl kernel()                { return KImpl{outMeta, run}; }
+
+    static cv::GMetaArgs outMeta(const ade::Graph      &gr,
+                                 const ade::NodeHandle &nh,
+                                 const cv::GMetaArgs   &in_metas,
+                                 const cv::GArgs       &/*in_args*/) {
+        cv::GMetaArgs result;
+
+        GConstGIEModel gm(gr);
+        const auto &uu = gm.metadata(nh).get<IEUnit>();
+
+        // Initialize input information
+        // FIXME: So far it is pretty limited
+        GAPI_Assert(1u == uu.params.input_names.size());
+        GAPI_Assert(2u == in_metas.size());
+
+        // 0th is ROI, 1st is in0put image
+        auto       &&ii = uu.inputs.at(uu.params.input_names.at(0));
+        const auto &meta = util::get<cv::GMatDesc>(in_metas.at(1));
+        ii->setPrecision(toIE(meta.depth));
+        ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+
+        // FIXME: It would be nice here to have an exact number of network's
+        // input/output parameters. Probably GCall should store it here for us.
+        // It doesn't, as far as I know..
+        for (const auto &out_name : uu.params.output_names) {
+            // NOTE: our output_names vector follows the API order
+            // of this operation's outputs
+            const IE::DataPtr& ie_out = uu.outputs.at(out_name);
+            const IE::SizeVector dims = ie_out->getTensorDesc().getDims();
+
+            cv::GMatDesc outm(toCV(ie_out->getPrecision()),
+                              toCV(ie_out->getTensorDesc().getDims()));
+            result.emplace_back(outm);
+        }
+        return result;
+    }
+
+    static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) {
+        // non-generic version for now, per the InferROI's definition
+        GAPI_Assert(uu.params.num_in == 1);
+        const auto& this_roi = ctx.inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
+        const auto  this_mat = ctx.inMat(1);
+        IE::Blob::Ptr this_blob = wrapIE(this_mat, cv::gapi::ie::TraitAs::IMAGE);
+        IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(this_roi));
+        iec.this_request.SetBlob(*uu.params.input_names.begin(), roi_blob);
+        iec.this_request.Infer();
+        for (auto i : ade::util::iota(uu.params.num_out)) {
+            cv::Mat& out_mat = ctx.outMatR(i);
+            IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]);
+            copyFromIE(out_blob, out_mat);
+        }
+    }
+};
+
+
 struct InferList: public cv::detail::KernelTag {
     using API = cv::GInferListBase;
     static cv::gapi::GBackend backend()  { return cv::gapi::ie::backend(); }
@@ -721,9 +811,23 @@ namespace {
             // FIXME: Introduce a DNNBackend interface which'd specify
             // the framework for this???
             GIEModel gm(gr);
-            const auto &np = gm.metadata(nh).get<NetworkParams>();
-            const auto &pp = cv::util::any_cast<cv::gapi::ie::detail::ParamDesc>(np.opaque);
+            auto &np = gm.metadata(nh).get<NetworkParams>();
+            auto &pp = cv::util::any_cast<cv::gapi::ie::detail::ParamDesc>(np.opaque);
             const auto &ki = cv::util::any_cast<KImpl>(ii.opaque);
+
+            GModel::Graph model(gr);
+            auto& op = model.metadata(nh).get<Op>();
+
+            // NB: In case generic infer, info about in/out names is stored in operation (op.params)
+            if (pp.is_generic)
+            {
+                auto& info      = cv::util::any_cast<cv::InOutInfo>(op.params);
+                pp.input_names  = info.in_names;
+                pp.output_names = info.out_names;
+                pp.num_in       = info.in_names.size();
+                pp.num_out      = info.out_names.size();
+            }
+
             gm.metadata(nh).set(IEUnit{pp});
             gm.metadata(nh).set(IECallable{ki.run});
             gm.metadata(nh).set(CustomMetaFunction{ki.customMetaFunc});
@@ -737,6 +841,7 @@ namespace {
 
         virtual cv::gapi::GKernelPackage auxiliaryKernels() const override {
             return cv::gapi::kernels< cv::gimpl::ie::Infer
+                                    , cv::gimpl::ie::InferROI
                                     , cv::gimpl::ie::InferList
                                     , cv::gimpl::ie::InferList2
                                     >();
diff --git a/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp b/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp
index 444d9553e7..ba0632d4f0 100644
--- a/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp
+++ b/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp
@@ -22,6 +22,24 @@ namespace IE = InferenceEngine;
 namespace giewrap = cv::gimpl::ie::wrap;
 using GIEParam = cv::gapi::ie::detail::ParamDesc;
 
+IE::InputsDataMap giewrap::toInputsDataMap (const IE::ConstInputsDataMap& inputs) {
+    IE::InputsDataMap transformed;
+    auto convert = [](const std::pair<std::string, IE::InputInfo::CPtr>& p) {
+        return std::make_pair(p.first, std::const_pointer_cast<IE::InputInfo>(p.second));
+    };
+    std::transform(inputs.begin(), inputs.end(), std::inserter(transformed, transformed.end()), convert);
+    return transformed;
+}
+
+IE::OutputsDataMap giewrap::toOutputsDataMap (const IE::ConstOutputsDataMap& outputs) {
+    IE::OutputsDataMap transformed;
+    auto convert = [](const std::pair<std::string, IE::CDataPtr>& p) {
+        return std::make_pair(p.first, std::const_pointer_cast<IE::Data>(p.second));
+    };
+    std::transform(outputs.begin(), outputs.end(), std::inserter(transformed, transformed.end()), convert);
+    return transformed;
+}
+
 #if INF_ENGINE_RELEASE < 2020000000  // < 2020.1
 // Load extensions (taken from DNN module)
 std::vector<std::string> giewrap::getExtensions(const GIEParam& params) {
diff --git a/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp b/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp
index 7871942d26..3927c802b7 100644
--- a/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp
+++ b/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp
@@ -28,7 +28,11 @@ namespace wrap {
 GAPI_EXPORTS std::vector<std::string> getExtensions(const GIEParam& params);
 GAPI_EXPORTS IE::CNNNetwork readNetwork(const GIEParam& params);
 
+IE::InputsDataMap  toInputsDataMap (const IE::ConstInputsDataMap& inputs);
+IE::OutputsDataMap toOutputsDataMap(const IE::ConstOutputsDataMap& outputs);
+
 #if INF_ENGINE_RELEASE < 2019020000  // < 2019.R2
+using Plugin = IE::InferencePlugin;
 GAPI_EXPORTS IE::InferencePlugin getPlugin(const GIEParam& params);
 GAPI_EXPORTS inline IE::ExecutableNetwork loadNetwork(      IE::InferencePlugin& plugin,
                                                       const IE::CNNNetwork&      net,
@@ -36,7 +40,12 @@ GAPI_EXPORTS inline IE::ExecutableNetwork loadNetwork(      IE::InferencePlugin&
     return plugin.LoadNetwork(net, {}); // FIXME: 2nd parameter to be
                                         // configurable via the API
 }
+GAPI_EXPORTS inline IE::ExecutableNetwork importNetwork(      IE::CNNNetwork& plugin,
+                                                        const GIEParam& param) {
+    return plugin.ImportNetwork(param.model_path, param.device_id, {});
+}
 #else // >= 2019.R2
+using Plugin = IE::Core;
 GAPI_EXPORTS IE::Core getCore();
 GAPI_EXPORTS IE::Core getPlugin(const GIEParam& params);
 GAPI_EXPORTS inline IE::ExecutableNetwork loadNetwork(      IE::Core&       core,
@@ -44,6 +53,10 @@ GAPI_EXPORTS inline IE::ExecutableNetwork loadNetwork(      IE::Core&       core
                                                       const GIEParam& params) {
     return core.LoadNetwork(net, params.device_id);
 }
+GAPI_EXPORTS inline IE::ExecutableNetwork importNetwork(      IE::Core& core,
+                                                        const GIEParam& param) {
+    return core.ImportNetwork(param.model_path, param.device_id, {});
+}
 #endif // INF_ENGINE_RELEASE < 2019020000
 }}}}
 
diff --git a/modules/gapi/src/backends/ocl/goclbackend.cpp b/modules/gapi/src/backends/ocl/goclbackend.cpp
index 34dba01afe..847b802fd2 100644
--- a/modules/gapi/src/backends/ocl/goclbackend.cpp
+++ b/modules/gapi/src/backends/ocl/goclbackend.cpp
@@ -272,4 +272,8 @@ void cv::gimpl::GOCLExecutable::run(std::vector<InObj>  &&input_objs,
             GAPI_Assert((out_arg_data == (mag_mat.getMat(ACCESS_RW).data)) && " data for output parameters was reallocated ?");
         }
     }
+
+    // In/Out args clean-up is mandatory now with RMat
+    for (auto &it : input_objs) magazine::unbind(m_res, it.first);
+    for (auto &it : output_objs) magazine::unbind(m_res, it.first);
 }
diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.cpp b/modules/gapi/src/backends/onnx/gonnxbackend.cpp
new file mode 100644
index 0000000000..7ab386ecab
--- /dev/null
+++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp
@@ -0,0 +1,963 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "precomp.hpp"
+#include "backends/onnx/gonnxbackend.hpp"
+
+#ifdef HAVE_ONNX
+
+#include <ade/util/algorithm.hpp> // any_of
+#include <ade/util/zip_range.hpp>
+#include <opencv2/gapi/infer.hpp>
+#include <opencv2/gapi/own/convert.hpp>
+
+#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
+
+namespace cv {
+namespace gimpl {
+namespace onnx {
+
+enum TensorPosition : int {
+    INPUT,
+    OUTPUT
+};
+
+struct TensorInfo {
+    TensorInfo() = default;
+    explicit TensorInfo(const Ort::TensorTypeAndShapeInfo& info)
+        : dims(info.GetShape())
+        , type(info.GetElementType())
+        , is_dynamic(std::find(dims.begin(), dims.end(), -1) != dims.end()) {
+        if (!is_dynamic) {
+            size = std::accumulate(dims.begin(),
+                                   dims.end(),
+                                   static_cast<int64_t>(1),
+                                   std::multiplies<int64_t>());
+        }
+        // Heuristic: check if the tensor is grayscale input
+        if (dims.size() == 4u
+            && dims[0]  == 1
+            && dims[1]  == 1
+            && dims[2]   > 1
+            && dims[3]   > 1) {
+            is_grayscale = true;
+        }
+    }
+
+    std::string name;
+    std::vector<int64_t> dims;
+    ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+    int64_t size = -1;
+
+    bool normalize = true;
+
+    bool is_dynamic = false;
+    bool is_grayscale = false;
+
+    struct MeanStdev {
+        cv::Scalar mean;
+        cv::Scalar stdev;
+    };
+    cv::util::optional<MeanStdev> mstd;
+};
+
+class ONNXCompiled {
+    // ONNX Resources
+    // NOTE: Env must live with the session, otherwise segfaults.
+    Ort::Env this_env{nullptr};
+    Ort::Session this_session{nullptr};
+    Ort::MemoryInfo this_memory_info{nullptr};
+
+    std::vector<TensorInfo> in_tensor_info;
+    std::vector<TensorInfo> out_tensor_info;
+    bool is_dynamic = false;
+
+    // G-API <Net> description
+    gapi::onnx::detail::ParamDesc params;
+
+    // Input/output tensor information
+    std::vector<TensorInfo> getTensorInfo(TensorPosition pos);
+
+    // Run-time data structures
+    std::vector<cv::Mat> in_data;
+    std::vector<cv::Mat> out_data;
+
+    void Run(const std::vector<cv::Mat>& ins,
+             const std::vector<cv::Mat>& outs);
+
+public:
+    explicit ONNXCompiled(const gapi::onnx::detail::ParamDesc &pp);
+
+    // Extract the information about output layer #i
+    cv::GMatDesc outMeta(int i) const;
+
+    // Assign input/output info
+    std::size_t numInputs() const { return params.num_in; }
+    std::size_t numOutputs() const { return params.num_out; }
+    void setInput(int i, const cv::Mat &m);
+    void setOutput(int i, cv::Mat &m);
+    cv::Mat allocOutput(int i) const;
+
+    // Run with the assigned inputs/outputs
+    void run();
+};
+
+} // namespace onnx
+} // namespace gimpl
+} // namespace cv
+
+namespace {
+
+inline std::vector<const char*> getCharNames(const std::vector<std::string>& names) {
+    std::vector<const char*> out_vec;
+    for (const auto& el : names) {
+            out_vec.push_back(el.data());
+    }
+    return out_vec;
+}
+
+inline int getIdxByName(const std::vector<cv::gimpl::onnx::TensorInfo>& info, const std::string& name) {
+    // FIXME: Cache the ordering
+    const auto it = std::find_if(info.begin(), info.end(), [&](const cv::gimpl::onnx::TensorInfo &i) {
+            return i.name == name;
+        });
+    GAPI_Assert(it != info.end());
+    return std::distance(info.begin(), it);
+}
+
+inline int toCV(ONNXTensorElementDataType prec) {
+    switch (prec) {
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: return CV_8U;
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: return CV_32F;
+    default: GAPI_Assert(false && "Unsupported data type");
+    }
+    return -1;
+}
+
+inline std::vector<int> toCV(const std::vector<int64_t> &vsz) {
+    std::vector<int> result;
+    result.reserve(vsz.size());
+    for (auto sz : vsz) {
+        result.push_back(ade::util::checked_cast<int>(sz));
+    }
+    return result;
+}
+
+inline cv::Mat toCV(Ort::Value &v) {
+    auto info = v.GetTensorTypeAndShapeInfo();
+    return cv::Mat(toCV(info.GetShape()),
+                   toCV(info.GetElementType()),
+                   reinterpret_cast<void*>(v.GetTensorMutableData<uint8_t*>()));
+}
+
+inline std::vector<int64_t> toORT(const cv::MatSize &sz) {
+    return cv::to_own<int64_t>(sz);
+}
+
+inline void preprocess(const cv::Mat& src,
+                       const cv::gimpl::onnx::TensorInfo& ti,
+                             cv::Mat& dst) {
+    GAPI_Assert(src.depth() == CV_32F || src.depth() == CV_8U);
+
+    if (src.depth() == CV_32F) {
+        // Just pass the tensor as-is.
+        // No layout or dimension transformations done here!
+        // TODO: This needs to be aligned across all NN backends.
+        GAPI_Assert(toCV(ti.type) == CV_32F && "Only 32F model input is supported for 32F data");
+        const auto tensor_dims = toORT(src.size);
+        if (tensor_dims.size() == ti.dims.size()) {
+            for (size_t i = 0; i < ti.dims.size(); ++i) {
+                GAPI_Assert((ti.dims[i] == -1 || ti.dims[i] == tensor_dims[i]) &&
+                            "32F tensor dimensions should match with all non-dynamic NN input dimensions");
+            }
+        } else {
+            GAPI_Assert(false && "32F tensor size should match with NN input");
+        }
+
+        dst = src;
+    } else {
+        // 8U input: full preprocessing path
+        GAPI_Assert(src.depth()   == CV_8U && "Only 8U data type is supported for preproc");
+        GAPI_Assert(ti.dims.size() == 4u && "Only NCHW/NHWC layouts are supported for preproc");
+
+        const auto ddepth = toCV(ti.type);
+        GAPI_Assert((ddepth == CV_8U || ddepth == CV_32F)
+                    && "Only 8U and 32F model input is supported for 8U data");
+
+        // Assess the expected input layout
+        const bool is_hwc = [&](int ch) {
+            if (ti.is_grayscale)       return false; // 1,1,h,w
+            else if (ti.dims[3] == ch) return true;  // _,_,_,c
+            else if (ti.dims[1] == ch) return false; // _,c,_,_
+            else cv::util::throw_error(std::logic_error("Couldn't identify input tensor layout"));
+        } (src.channels());
+
+        int new_c = src.channels();
+        cv::Mat csc;
+        if (ti.is_grayscale && new_c == 3) {
+            cv::cvtColor(src, csc, cv::COLOR_BGR2GRAY);
+            new_c = 1;
+        } else {
+            csc = src;
+        }
+
+        // NHWC vs NCHW
+        int new_h = -1, new_w = -1;
+        if (ti.is_dynamic) {
+            // reuse h & w from the input image
+            new_h = src.rows;
+            new_w = src.cols;
+        } else {
+            // take h & w from the ONNX tensor info
+            new_h = ti.dims[is_hwc ? 1 : 2];
+            new_w = ti.dims[is_hwc ? 2 : 3];
+        }
+        GAPI_Assert(new_h != -1 && new_w != -1);
+
+        cv::Mat rsz, pp;
+        cv::resize(csc, rsz, cv::Size(new_w, new_h));
+        if (src.depth() == CV_8U && ddepth == CV_32F) {
+            rsz.convertTo(pp, ddepth, ti.normalize ? 1.f / 255 : 1.f);
+            if (ti.mstd.has_value()) {
+                pp -= ti.mstd->mean;
+                pp /= ti.mstd->stdev;
+            }
+        } else {
+            pp = rsz;
+        }
+
+        if (!is_hwc && new_c > 1) {
+            // Convert to CHW
+            dst.create(cv::Size(new_w, new_h * new_c), ddepth);
+            std::vector<cv::Mat> planes(new_c);
+            for (int ch = 0; ch < new_c; ++ch) {
+                planes[ch] = dst.rowRange(ch * new_h, (ch + 1) * new_h);
+            }
+            cv::split(pp, planes);
+        } else {
+            // Keep HWC
+            dst = pp;
+        }
+
+        // Ensure dst is a tensor shape (not a 2D image)
+        if (ti.is_dynamic) {
+            // Reshape to input dimensions
+            const std::vector<int> out_dims = is_hwc
+                ? std::vector<int>{1, new_h, new_w, new_c}
+                : std::vector<int>{1, new_c, new_h, new_w};
+            dst = dst.reshape(1, out_dims);
+        } else {
+            // Reshape to ONNX dimensions (no -1s there!)
+            dst = dst.reshape(1, toCV(ti.dims));
+        }
+    }
+}
+
+template <typename T>
+inline Ort::Value createTensor(const Ort::MemoryInfo& memory_info,
+                               const cv::gimpl::onnx::TensorInfo& tensor_params,
+                               const cv::Mat& data) {
+    (void) tensor_params;
+    auto ort_dims = toORT(data.size);
+    return Ort::Value::CreateTensor<T>(memory_info,
+                                       const_cast<T*>(data.ptr<T>()),
+                                       data.total(),
+                                       ort_dims.data(),
+                                       ort_dims.size());
+}
+
+inline Ort::Value createTensor(const Ort::MemoryInfo& memory_info,
+                               const cv::gimpl::onnx::TensorInfo& tensor_params,
+                               const cv::Mat& data) {
+    GAPI_Assert(data.isContinuous ());
+    switch (tensor_params.type) {
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
+        return createTensor<uint8_t>(memory_info, tensor_params, data);
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
+        return createTensor<float>(memory_info, tensor_params, data);
+    default:
+        GAPI_Assert(false && "Unsupported data type");
+    }
+    return Ort::Value{nullptr};
+}
+
+struct ONNXUnit {
+    static const char *name() { return "ONNXModelConfig"; }
+
+    std::shared_ptr<cv::gimpl::onnx::ONNXCompiled> oc;
+
+    explicit ONNXUnit(const cv::gapi::onnx::detail::ParamDesc &pp)
+        : oc(new cv::gimpl::onnx::ONNXCompiled(pp)) {
+    }
+};
+
+struct ONNXCallContext {
+    // Input parameters passed to an inference operation.
+    std::vector<cv::GArg> args;
+
+    //FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
+    //to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
+    //once on enter for input and output arguments, and once before return for output arguments only
+    //FIXME: check if the above applies to this backend (taken from CPU)
+    std::unordered_map<std::size_t, cv::GRunArgP> results;
+
+    // Generic accessor API
+    template<typename T>
+    const T& inArg(std::size_t input) { return args.at(input).get<T>(); }
+
+    // Syntax sugar
+    const cv::Mat&   inMat(std::size_t input) {
+        return inArg<cv::Mat>(input);
+    }
+    cv::Mat&         outMatR(std::size_t output) {
+        return *cv::util::get<cv::Mat*>(results.at(output));
+    }
+
+    template<typename T> std::vector<T>& outVecR(std::size_t output) { // FIXME: the same issue
+        return outVecRef(output).wref<T>();
+    }
+    cv::detail::VectorRef& outVecRef(std::size_t output) {
+        return cv::util::get<cv::detail::VectorRef>(results.at(output));
+    }
+};
+
+struct ONNXCallable {
+    static const char *name() { return "ONNXRequestCallable"; }
+    using Run = std::function<void(const ONNXUnit &, ONNXCallContext &)>;
+    Run run;
+};
+
+struct KImpl {
+    cv::gimpl::CustomMetaFunction::CM customMetaFunc;
+    ONNXCallable::Run run;
+};
+
+// FIXME: Is there a way to take a typed graph (our GModel),
+// and create a new typed graph _ATOP_ of that (by extending with a couple of
+// new types?).
+// Alternatively, is there a way to compose types graphs?
+//
+// If not, we need to introduce that!
+using GONNXModel = ade::TypedGraph
+    < cv::gimpl::Protocol
+    , cv::gimpl::Op
+    , cv::gimpl::NetworkParams
+    , cv::gimpl::CustomMetaFunction
+    , ONNXUnit
+    , ONNXCallable
+    >;
+
+// FIXME: Same issue with Typed and ConstTyped
+using GConstGONNXModel = ade::ConstTypedGraph
+    < cv::gimpl::Protocol
+    , cv::gimpl::Op
+    , cv::gimpl::NetworkParams
+    , cv::gimpl::CustomMetaFunction
+    , ONNXUnit
+    , ONNXCallable
+    >;
+} // anonymous namespace
+
+// GCPUExcecutable implementation //////////////////////////////////////////////
+cv::gimpl::onnx::GONNXExecutable::GONNXExecutable(const ade::Graph &g,
+                                                  const std::vector<ade::NodeHandle> &nodes)
+    : m_g(g), m_gm(m_g) {
+    // FIXME: Currently this backend is capable to run a single inference node only.
+    // Need to extend our island fusion with merge/not-to-merge decision making parametrization
+    GConstGONNXModel iem(g);
+
+    for (auto &nh : nodes) {
+        switch (m_gm.metadata(nh).get<NodeType>().t) {
+        case NodeType::OP:
+            if (this_nh == nullptr) {
+                this_nh = nh;
+            }
+            else {
+                util::throw_error(std::logic_error("Multi-node inference is not supported!"));
+            }
+            break;
+
+        case NodeType::DATA: {
+            m_dataNodes.push_back(nh);
+            const auto &desc = m_gm.metadata(nh).get<Data>();
+            if (desc.storage == Data::Storage::CONST_VAL) {
+                util::throw_error(std::logic_error("No const data supported in backend!"));
+            }
+            if (desc.storage == Data::Storage::INTERNAL) {
+                util::throw_error(std::logic_error("No internal data supported in backend!"));
+            }
+            break;
+        }
+        default: util::throw_error(std::logic_error("Unsupported NodeType"));
+        }
+    }
+}
+
+// FIXME: Document what it does
+cv::GArg cv::gimpl::onnx::GONNXExecutable::packArg(const cv::GArg &arg) {
+    // No API placeholders allowed at this point
+    // FIXME: this check has to be done somewhere in compilation stage.
+    GAPI_Assert(   arg.kind != cv::detail::ArgKind::GMAT
+                && arg.kind != cv::detail::ArgKind::GSCALAR
+                && arg.kind != cv::detail::ArgKind::GARRAY
+                && arg.kind != cv::detail::ArgKind::GOPAQUE);
+
+    if (arg.kind != cv::detail::ArgKind::GOBJREF) {
+        util::throw_error(std::logic_error("Inference supports G-types ONLY!"));
+    }
+    GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF);
+
+    // Wrap associated CPU object (either host or an internal one)
+    // FIXME: object can be moved out!!! GExecutor faced that.
+    const cv::gimpl::RcDesc &ref = arg.get<cv::gimpl::RcDesc>();
+    switch (ref.shape)
+    {
+    case GShape::GMAT:    return GArg(m_res.slot<cv::Mat>()[ref.id]);
+
+    // Note: .at() is intentional for GArray as object MUST be already there
+    //   (and constructed by either bindIn/Out or resetInternal)
+    case GShape::GARRAY:  return GArg(m_res.slot<cv::detail::VectorRef>().at(ref.id));
+
+    // Note: .at() is intentional for GOpaque as object MUST be already there
+    //   (and constructed by either bindIn/Out or resetInternal)
+    case GShape::GOPAQUE:  return GArg(m_res.slot<cv::detail::OpaqueRef>().at(ref.id));
+
+    default:
+        util::throw_error(std::logic_error("Unsupported GShape type"));
+        break;
+    }
+}
+
+void cv::gimpl::onnx::GONNXExecutable::run(std::vector<InObj>  &&input_objs,
+                                           std::vector<OutObj> &&output_objs) {
+    // Update resources with run-time information - what this Island
+    // has received from user (or from another Island, or mix...)
+    // FIXME: Check input/output objects against GIsland protocol
+
+    for (auto& it : input_objs)   magazine::bindInArg (m_res, it.first, it.second);
+    for (auto& it : output_objs)  magazine::bindOutArg(m_res, it.first, it.second);
+
+    // FIXME: Running just a single node now.
+    // Not sure if need to support many of them, though
+    // FIXME: Make this island-unmergeable?
+    const auto &op = m_gm.metadata(this_nh).get<Op>();
+
+    // Initialize kernel's execution context:
+    // - Input parameters
+    ONNXCallContext context;
+    context.args.reserve(op.args.size());
+    using namespace std::placeholders;
+    ade::util::transform(op.args,
+                          std::back_inserter(context.args),
+                          std::bind(&GONNXExecutable::packArg, this, _1));
+
+    // - Output parameters.
+    for (const auto &out_it : ade::util::indexed(op.outs)) {
+        // FIXME: Can the same GArg type resolution mechanism be reused here?
+        const auto out_port  = ade::util::index(out_it);
+        const auto out_desc  = ade::util::value(out_it);
+        context.results[out_port] = magazine::getObjPtr(m_res, out_desc);
+    }
+
+    // And now trigger the execution
+    GConstGONNXModel giem(m_g);
+    const auto &uu = giem.metadata(this_nh).get<ONNXUnit>();
+    const auto &kk = giem.metadata(this_nh).get<ONNXCallable>();
+    kk.run(uu, context);
+
+    for (auto &it : output_objs) magazine::writeBack(m_res, it.first, it.second);
+}
+
+namespace cv {
+namespace gimpl {
+namespace onnx {
+
+ONNXCompiled::ONNXCompiled(const gapi::onnx::detail::ParamDesc &pp)
+    : params(pp) {
+
+    // Validate input parameters before allocating any resources
+    if (params.num_in > 1u && params.num_in != params.input_names.size()) {
+        cv::util::throw_error(std::logic_error("Please specify input layer names for "
+                                               + params.model_path));
+    }
+    if (params.num_out > 1u && params.num_out != params.output_names.size()) {
+        cv::util::throw_error(std::logic_error("Please specify output layer names for "
+                                               + params.model_path));
+    }
+
+    // Create and initialize the ONNX session
+    Ort::SessionOptions session_options;
+    this_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "");
+    this_session = Ort::Session(this_env, params.model_path.data(), session_options);
+    this_memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+
+    in_tensor_info = getTensorInfo(INPUT);
+    out_tensor_info = getTensorInfo(OUTPUT);
+
+    const auto is_dyn = [](const TensorInfo &ti) {
+        return ti.is_dynamic;
+    };
+    is_dynamic = ade::util::any_of(in_tensor_info, is_dyn)
+              || ade::util::any_of(out_tensor_info, is_dyn);
+    if (is_dynamic && !params.custom_post_proc) {
+        util::throw_error(std::logic_error("This network has dynamic shapes. "
+                                           "Please provide a custom post-processing function "
+                                           "(.cfgPostProc) in network parameters"));
+    }
+
+    // Update parameters based on session information
+    if (params.num_in == 1u && params.input_names.empty()) {
+        params.input_names = { in_tensor_info.front().name };
+    }
+    if (params.num_out == 1u && params.output_names.empty()) {
+        params.output_names = { out_tensor_info.front().name };
+    }
+
+    // Validate what is supported currently
+    GAPI_Assert(params.const_inputs.empty()
+                && "Const inputs are not currently supported");
+    GAPI_Assert(std::all_of(in_tensor_info.begin(),
+                            in_tensor_info.end(),
+                            [](const cv::gimpl::onnx::TensorInfo &p) {
+                                return p.type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
+                                    || p.type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
+                            })
+                && "Only FP32 and U8 inputs for NN are supported");
+
+    // Put mean and std in appropriate tensor params
+    if (!params.mean.empty() || !params.stdev.empty()) {
+        GAPI_Assert(params.mean.size() == params.stdev.size() &&
+                    params.mean.size() == params.input_names.size());
+        for (auto idx : ade::util::iota(params.num_in)) {
+            const auto ort_idx = getIdxByName(in_tensor_info, params.input_names[idx]);
+            using M = TensorInfo::MeanStdev;
+            in_tensor_info[ort_idx].mstd = util::make_optional(M{ params.mean[idx]
+                                                                , params.stdev[idx] });
+        }
+    }
+
+    // Update normalize flags for input tensors
+    if (!params.normalize.empty()) {
+        for (auto idx : ade::util::iota(params.num_in)) {
+            const auto ort_idx = getIdxByName(in_tensor_info, params.input_names[idx]);
+            in_tensor_info[ort_idx].normalize = params.normalize[idx];
+        }
+    }
+
+    // Pre-allocate vectors (not buffers) for runtime info
+    in_data.resize(params.num_in);
+    out_data.resize(params.num_out);
+}
+
+std::vector<TensorInfo> ONNXCompiled::getTensorInfo(TensorPosition pos) {
+    GAPI_Assert(pos == INPUT || pos == OUTPUT);
+
+    const auto num_nodes = pos == INPUT
+        ? this_session.GetInputCount()
+        : this_session.GetOutputCount();
+
+    std::vector<TensorInfo> tensor_info;
+    tensor_info.reserve(num_nodes);
+
+    Ort::AllocatorWithDefaultOptions allocator;
+    for (auto i : ade::util::iota(num_nodes)) {
+        const auto info = pos == INPUT
+            ? this_session.GetInputTypeInfo(i)
+            : this_session.GetOutputTypeInfo(i);
+        tensor_info.emplace_back(info.GetTensorTypeAndShapeInfo());
+
+        char *name_p = pos == INPUT
+            ? this_session.GetInputName(i, allocator)
+            : this_session.GetOutputName(i, allocator);
+        tensor_info.back().name = name_p;
+        allocator.Free(name_p);
+    }
+
+    return tensor_info;
+}
+
+cv::GMatDesc ONNXCompiled::outMeta(int idx) const {
+    if (is_dynamic) {
+        GAPI_Assert(!params.out_metas.empty()
+                    && "Metadata must be specified if NN has dynamic inputs!");
+        return params.out_metas.at(idx);
+    }
+    const auto ort_idx = getIdxByName(out_tensor_info, params.output_names[idx]);
+    return cv::GMatDesc(toCV(out_tensor_info[ort_idx].type),
+                        toCV(out_tensor_info[ort_idx].dims));
+}
+
+void ONNXCompiled::setInput(int i, const cv::Mat &m) {
+    const auto in_idx  = i;
+    const auto in_name = params.input_names[in_idx];
+    const auto ort_idx = getIdxByName(in_tensor_info, in_name);
+    preprocess(m, in_tensor_info[ort_idx], in_data[in_idx]);
+}
+
+void ONNXCompiled::setOutput(int i, cv::Mat &m) {
+    // FIXME: No need in double-indexing?
+    out_data[i] = m;
+}
+
+cv::Mat ONNXCompiled::allocOutput(int i) const {
+    cv::Mat m;
+    m.create(toCV(out_tensor_info[i].dims),
+             toCV(out_tensor_info[i].type));
+    return m;
+}
+
+void ONNXCompiled::Run(const std::vector<cv::Mat>& ins,
+                       const std::vector<cv::Mat>& outs) {
+    std::vector<Ort::Value> in_tensors, out_tensors;
+
+    auto in_run_names  = getCharNames(params.input_names);
+
+    for (const auto it : ade::util::indexed(params.input_names)) {
+        auto i         = ade::util::index(it);
+        auto in_name   = ade::util::value(it);
+        const auto idx = getIdxByName(in_tensor_info, in_name);
+        in_tensors.emplace_back(createTensor(this_memory_info,
+                                             in_tensor_info[idx],
+                                             ins[i]));
+    }
+
+    if (!is_dynamic) {
+        // Easy path - just run the session which is bound to G-API's
+        // internal data
+        for (auto i : ade::util::iota(params.output_names.size())) {
+        out_tensors.emplace_back(createTensor(this_memory_info,
+                                              out_tensor_info[i],
+                                              outs[i]));
+        }
+        auto out_run_names = getCharNames(params.output_names);
+        this_session.Run(Ort::RunOptions{nullptr},
+                         in_run_names.data(),
+                         &in_tensors.front(),
+                         params.input_names.size(),
+                         out_run_names.data(),
+                         &out_tensors.front(),
+                         params.output_names.size());
+    } else {
+        // Hard path - run session & user-defined post-processing
+        // NOTE: use another list of output names here
+        std::vector<const char*> out_names;
+        for (auto &&ti : out_tensor_info) {
+            out_names.push_back(ti.name.c_str());
+        }
+
+        auto outputs = this_session.Run(Ort::RunOptions{nullptr},
+                                        in_run_names.data(),
+                                        &in_tensors.front(),
+                                        params.input_names.size(),
+                                        out_names.data(),
+                                        out_names.size());
+        std::unordered_map<std::string, cv::Mat> onnx_outputs;
+        std::unordered_map<std::string, cv::Mat> gapi_outputs;
+
+        GAPI_Assert(outputs.size() == out_names.size());
+        // Fill in ONNX tensors
+        for (auto &&iter : ade::util::zip(ade::util::toRange(out_tensor_info),
+                                          ade::util::toRange(outputs))) {
+            const auto &out_name   = std::get<0>(iter).name;
+                  auto &out_tensor = std::get<1>(iter);
+            onnx_outputs[out_name] = toCV(out_tensor);
+        }
+
+        // Fill in G-API outputs
+        for (auto &&it: ade::util::indexed(params.output_names)) {
+            gapi_outputs[ade::util::value(it)] = outs[ade::util::index(it)];
+        }
+        params.custom_post_proc(onnx_outputs, gapi_outputs);
+    }
+}
+
+void ONNXCompiled::run() {
+    Run(in_data, out_data);
+}
+
+struct Infer: public cv::detail::KernelTag {
+    using API = cv::GInferBase;
+    static cv::gapi::GBackend backend()  { return cv::gapi::onnx::backend(); }
+    static KImpl kernel()                { return KImpl{outMeta, run}; }
+
+    static cv::GMetaArgs outMeta(const ade::Graph      &gr,
+                                 const ade::NodeHandle &nh,
+                                 const cv::GMetaArgs   &in_metas,
+                                 const cv::GArgs       &/*in_args*/) {
+        cv::GMetaArgs result;
+
+        GConstGONNXModel gm(gr);
+        const auto &uu = gm.metadata(nh).get<ONNXUnit>();
+
+        GAPI_Assert(uu.oc->numInputs() == in_metas.size()
+                    && "Known input layers count doesn't match input meta count");
+        for (auto &&mm : in_metas) {
+            GAPI_Assert(util::holds_alternative<cv::GMatDesc>(mm)
+                        && "Non-GMat inputs are not supported");
+        }
+        for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
+            result.emplace_back(uu.oc->outMeta(idx));
+        }
+        return result;
+    }
+
+    static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        for (auto &&idx : ade::util::iota(uu.oc->numInputs())) {
+            uu.oc->setInput(idx, ctx.inMat(idx));
+        }
+        for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
+            uu.oc->setOutput(idx, ctx.outMatR(idx));
+        }
+        uu.oc->run();
+    }
+};
+
+struct InferROI: public cv::detail::KernelTag {
+    using API = cv::GInferROIBase;
+    static cv::gapi::GBackend backend()  { return cv::gapi::onnx::backend(); }
+    static KImpl kernel()                { return KImpl{outMeta, run}; }
+
+    static cv::GMetaArgs outMeta(const ade::Graph      &gr,
+                                 const ade::NodeHandle &nh,
+                                 const cv::GMetaArgs   &in_metas,
+                                 const cv::GArgs       &/*in_args*/) {
+        cv::GMetaArgs result;
+
+        GConstGONNXModel gm(gr);
+        const auto &uu = gm.metadata(nh).get<ONNXUnit>();
+        GAPI_Assert(1u == uu.oc->numInputs());
+        GAPI_Assert(2u == in_metas.size());
+
+        for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
+            result.emplace_back(uu.oc->outMeta(idx));
+        }
+        return result;
+    }
+
+    static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        // non-generic version for now, per the InferROI's definition
+        GAPI_Assert(uu.oc->numInputs() == 1u);
+        const auto& this_roi = ctx.inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
+        const auto  this_mat = ctx.inMat(1);
+
+        uu.oc->setInput(0, this_mat(this_roi));
+        for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
+            uu.oc->setOutput(idx, ctx.outMatR(idx));
+        }
+        uu.oc->run();
+    }
+};
+
+struct InferList: public cv::detail::KernelTag {
+    using API = cv::GInferListBase;
+    static cv::gapi::GBackend backend()  { return cv::gapi::onnx::backend(); }
+    static KImpl kernel()                { return KImpl{outMeta, run}; }
+
+    static cv::GMetaArgs outMeta(const ade::Graph      &gr,
+                                 const ade::NodeHandle &nh,
+                                 const cv::GMetaArgs   &in_metas,
+                                 const cv::GArgs       &/*in_args*/) {
+        GConstGONNXModel gm(gr);
+        const auto &uu = gm.metadata(nh).get<ONNXUnit>();
+
+        // Note our input layers list order matches the API order and so
+        // meta order.
+        GAPI_Assert(uu.oc->numInputs() == (in_metas.size() - 1u)
+                    && "Known input layers count doesn't match input meta count");
+
+        for (auto i : ade::util::iota(uu.oc->numInputs())) {
+            const auto & mm = in_metas[i + 1];
+
+            GAPI_Assert(util::holds_alternative<cv::GMatDesc>(mm)
+                        && "Non-GMat inputs are not supported");
+        }
+
+        // roi-list version is much easier at the moment.
+        // All our outputs are vectors which don't have
+        // metadata at the moment - so just create a vector of
+        // "empty" array metadatas of the required size.
+        return cv::GMetaArgs(uu.oc->numOutputs(),
+                             cv::GMetaArg{cv::empty_array_desc()});
+    }
+
+    static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        // non-generic version for now:
+        // - assumes input 0 is always ROI list
+        // - assumes all inputs/outputs are always Mats
+        GAPI_Assert(uu.oc->numInputs() == 1); // roi list is not counted in net's inputs
+
+        const auto& in_roi_vec = ctx.inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
+        const cv::Mat this_mat = ctx.inMat(1u);
+
+        for (auto i : ade::util::iota(uu.oc->numOutputs())) {
+            ctx.outVecR<cv::Mat>(i).clear();
+        }
+        for (const auto &rc : in_roi_vec) {
+            uu.oc->setInput(0, this_mat(rc));
+            std::vector<cv::Mat> out_mats(uu.oc->numOutputs());
+            for (auto i : ade::util::iota(uu.oc->numOutputs())) {
+                out_mats[i] = uu.oc->allocOutput(i);
+                uu.oc->setOutput(i, out_mats[i]);
+            }
+            uu.oc->run();
+            for (auto i : ade::util::iota(uu.oc->numOutputs())) {
+                std::vector<cv::Mat> &out_vec = ctx.outVecR<cv::Mat>(i);
+                out_vec.push_back(std::move(out_mats[i]));
+            }
+        }
+    }
+};
+
+struct InferList2: public cv::detail::KernelTag {
+    using API = cv::GInferList2Base;
+    static cv::gapi::GBackend backend()  { return cv::gapi::onnx::backend(); }
+    static KImpl kernel()                { return KImpl{outMeta, run}; }
+
+    static cv::GMetaArgs outMeta(const ade::Graph      &gr,
+                                 const ade::NodeHandle &nh,
+                                 const cv::GMetaArgs   &in_metas,
+                                 const cv::GArgs       &/*in_args*/) {
+
+        GConstGONNXModel gm(gr);
+        const auto &uu = gm.metadata(nh).get<ONNXUnit>();
+
+        // Note our input layers list order matches the API order and so
+        // meta order.
+        GAPI_Assert(uu.oc->numInputs() == (in_metas.size() - 1u)
+                    && "Known input layers count doesn't match input meta count");
+
+        // In contrast to InferList, the InferList2 has only one
+        // "full-frame" image argument, and all the rest are arrays of
+        // ether ROI or blobs. So here we set the 0th arg image format
+        // to all inputs which are ROI-based (skipping the
+        // "blob"-based ones)
+        // FIXME: this is filtering not done, actually! GArrayDesc has
+        // no hint for type!
+        const auto &mm_0   = in_metas[0u];
+        const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
+        GAPI_Assert(   !meta_0.isND()
+                    && !meta_0.planar
+                    && "Only images are supported as the 0th argument");
+        for (auto i : ade::util::iota(uu.oc->numInputs())) {
+            const auto &mm = in_metas[i + 1];
+            GAPI_Assert(util::holds_alternative<cv::GArrayDesc>(mm)
+                        && "Non-array inputs are not supported");
+        }
+
+        // roi-list version is much easier at the moment.
+        // All our outputs are vectors which don't have
+        // metadata at the moment - so just create a vector of
+        // "empty" array metadatas of the required size.
+        return cv::GMetaArgs(uu.oc->numOutputs(),
+                             cv::GMetaArg{cv::empty_array_desc()});
+    }
+
+    static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        GAPI_Assert(ctx.args.size() > 1u
+                    && "This operation must have at least two arguments");
+
+        // Since we do a ROI list inference, always assume our input buffer is image
+        const cv::Mat mat_0  = ctx.inMat(0u);
+        // Take the next argument, which must be vector (of any kind).
+        // Use this only to obtain the ROI list size (sizes of all
+        // other vectors must be equal to this one)
+        const auto list_size = ctx.inArg<cv::detail::VectorRef>(1u).size();
+
+        for (auto i : ade::util::iota(uu.oc->numOutputs())) {
+            ctx.outVecR<cv::Mat>(i).clear();
+        }
+        // For every ROI in the list {{{
+        for (const auto &list_idx : ade::util::iota(list_size)) {
+            std::vector<Ort::Value> in_tensors, out_tensors;
+            std::vector<cv::Mat> in_mats(uu.oc->numInputs());
+            // For every input of the net {{{
+            for (auto in_idx : ade::util::iota(uu.oc->numInputs())) {
+                const auto &this_vec = ctx.inArg<cv::detail::VectorRef>(in_idx+1u);
+                GAPI_Assert(this_vec.size() == list_size);
+                // Prepare input {{{
+                //   FIXME: Terrible run-time logic based on RTTI!
+                //   FIXME: Will never work on non-RTTI systems!
+                //   FIXME: Need to replace with a static type tags
+                //   (like with serialization) instead!
+                if (this_vec.holds<cv::Rect>()) {
+                    // ROI case - create an ROI blob
+                    const auto &vec = this_vec.rref<cv::Rect>();
+                    uu.oc->setInput(in_idx, mat_0(vec[list_idx]));
+                } else if (this_vec.holds<cv::Mat>()) {
+                    // Mat case - create a regular blob
+                    // FIXME: NOW Assume Mats are always BLOBS (not
+                    // images)
+                    const auto &vec = this_vec.rref<cv::Mat>();
+                    uu.oc->setInput(in_idx, vec[list_idx]);
+                } else {
+                    GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!");
+                }
+                // }}} (Preapre input)
+            } // }}} (For every input of the net)
+
+            std::vector<cv::Mat> out_mats(uu.oc->numOutputs());
+            for (auto i : ade::util::iota(uu.oc->numOutputs())) {
+                out_mats[i] = uu.oc->allocOutput(i);
+                uu.oc->setOutput(i, out_mats[i]);
+            }
+            uu.oc->run();
+
+            for (auto i : ade::util::iota(uu.oc->numOutputs())) {
+                std::vector<cv::Mat> &out_vec = ctx.outVecR<cv::Mat>(i);
+                out_vec.push_back(std::move(out_mats[i]));
+            }
+        } // }}} (For every ROI in the list)
+    }
+};
+
+} // namespace onnx
+} // namespace gapi
+} // namespace cv
+
+namespace {
+    class GONNXBackendImpl final: public cv::gapi::GBackend::Priv {
+        virtual void unpackKernel(ade::Graph            &gr,
+                                  const ade::NodeHandle &nh,
+                                  const cv::GKernelImpl &ii) override {
+            using namespace cv::gimpl;
+            // FIXME: Introduce a DNNBackend interface which'd specify
+            // the framework for this???
+            GONNXModel gm(gr);
+            const auto &np = gm.metadata(nh).get<NetworkParams>();
+            const auto &pp = cv::util::any_cast<cv::gapi::onnx::detail::ParamDesc>(np.opaque);
+            const auto &ki = cv::util::any_cast<KImpl>(ii.opaque);
+            gm.metadata(nh).set(ONNXUnit{pp});
+            gm.metadata(nh).set(ONNXCallable{ki.run});
+            gm.metadata(nh).set(CustomMetaFunction{ki.customMetaFunc});
+        }
+
+        virtual EPtr compile(const ade::Graph &graph,
+                             const cv::GCompileArgs &,
+                             const std::vector<ade::NodeHandle> &nodes) const override {
+            return EPtr{new cv::gimpl::onnx::GONNXExecutable(graph, nodes)};
+        }
+
+        virtual cv::gapi::GKernelPackage auxiliaryKernels() const override {
+            return cv::gapi::kernels< cv::gimpl::onnx::Infer
+                                    , cv::gimpl::onnx::InferROI
+                                    , cv::gimpl::onnx::InferList
+                                    , cv::gimpl::onnx::InferList2
+                                    >();
+        }
+    };
+}
+
+cv::gapi::GBackend cv::gapi::onnx::backend() {
+    static cv::gapi::GBackend this_backend(std::make_shared<GONNXBackendImpl>());
+    return this_backend;
+}
+#else // HAVE_ONNX
+
+cv::gapi::GBackend cv::gapi::onnx::backend() {
+    // Still provide this symbol to avoid linking issues
+    util::throw_error(std::runtime_error("G-API has been compiled without ONNX support"));
+}
+#endif // HAVE_ONNX
diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.hpp b/modules/gapi/src/backends/onnx/gonnxbackend.hpp
new file mode 100644
index 0000000000..a3cc897030
--- /dev/null
+++ b/modules/gapi/src/backends/onnx/gonnxbackend.hpp
@@ -0,0 +1,56 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_GONNXBACKEND_HPP
+#define OPENCV_GAPI_GONNXBACKEND_HPP
+
+#include "opencv2/gapi/infer/onnx.hpp"
+#ifdef HAVE_ONNX
+
+#include <onnxruntime_cxx_api.h>
+#include <ade/util/algorithm.hpp> // type_list_index
+
+#include "backends/common/gbackend.hpp"
+
+namespace cv {
+namespace gimpl {
+namespace onnx {
+
+class GONNXExecutable final: public GIslandExecutable
+{
+    const ade::Graph &m_g;
+    GModel::ConstGraph m_gm;
+
+    // The only executable stuff in this graph
+    // (assuming it is always single-op)
+    ade::NodeHandle this_nh;
+
+    // List of all resources in graph (both internal and external)
+    std::vector<ade::NodeHandle> m_dataNodes;
+
+    // Actual data of all resources in graph (both internal and external)
+    Mag m_res;
+
+    // Execution helpers
+    GArg packArg(const GArg &arg);
+
+public:
+    GONNXExecutable(const ade::Graph                   &graph,
+                    const std::vector<ade::NodeHandle> &nodes);
+
+    virtual inline bool canReshape() const override { return false; }
+    virtual inline void reshape(ade::Graph&, const GCompileArgs&) override {
+        GAPI_Assert(false); // Not implemented yet
+    }
+
+    virtual void run(std::vector<InObj>  &&input_objs,
+                     std::vector<OutObj> &&output_objs) override;
+};
+
+}}} // namespace cv::gimpl::onnx
+
+#endif // HAVE_ONNX
+#endif // OPENCV_GAPI_GONNXBACKEND_HPP
diff --git a/modules/gapi/src/api/ft_render.cpp b/modules/gapi/src/backends/render/ft_render.cpp
similarity index 92%
rename from modules/gapi/src/api/ft_render.cpp
rename to modules/gapi/src/backends/render/ft_render.cpp
index 7561dff833..fcf84713ff 100644
--- a/modules/gapi/src/api/ft_render.cpp
+++ b/modules/gapi/src/backends/render/ft_render.cpp
@@ -5,11 +5,11 @@
 // Copyright (C) 2019 Intel Corporation
 
 #include "precomp.hpp"
+#include "ft_render.hpp"
 
 #ifdef HAVE_FREETYPE
 
-#include "api/ft_render.hpp"
-#include "api/ft_render_priv.hpp"
+#include "ft_render_priv.hpp"
 
 #include <opencv2/gapi/util/throw.hpp>
 #include <opencv2/gapi/own/assert.hpp>
@@ -166,6 +166,11 @@ void cv::gapi::wip::draw::FTTextRender::Priv::putText(cv::Mat& mat,
                     "Failed to load char");
         FT_Bitmap *bitmap = &(m_face->glyph->bitmap);
 
+        // FIXME: Skip glyph, if size is 0
+        if (bitmap->rows == 0 || bitmap->width == 0) {
+            continue;
+        }
+
         cv::Mat glyph(bitmap->rows, bitmap->width, CV_8UC1, bitmap->buffer, bitmap->pitch);
 
         int left    = m_face->glyph->bitmap_left;
@@ -211,4 +216,21 @@ void cv::gapi::wip::draw::FTTextRender::putText(cv::Mat& mat,
     m_priv->putText(mat, text, org, fh);
 }
 
+#else
+
+cv::Size cv::gapi::wip::draw::FTTextRender::getTextSize(const std::wstring&, int, int*)
+{
+    cv::util::throw_error(std::runtime_error("Freetype not found"));
+}
+
+void cv::gapi::wip::draw::FTTextRender::putText(cv::Mat&, const std::wstring&, const cv::Point&, int)
+{
+    cv::util::throw_error(std::runtime_error("Freetype not found"));
+}
+
+cv::gapi::wip::draw::FTTextRender::FTTextRender(const std::string&)
+{
+    cv::util::throw_error(std::runtime_error("Freetype not found"));
+}
+
 #endif // HAVE_FREETYPE
diff --git a/modules/gapi/src/api/ft_render.hpp b/modules/gapi/src/backends/render/ft_render.hpp
similarity index 91%
rename from modules/gapi/src/api/ft_render.hpp
rename to modules/gapi/src/backends/render/ft_render.hpp
index 2556c7269c..068c0d4d3f 100644
--- a/modules/gapi/src/api/ft_render.hpp
+++ b/modules/gapi/src/backends/render/ft_render.hpp
@@ -23,8 +23,6 @@ namespace wip
 namespace draw
 {
 
-#ifdef HAVE_FREETYPE
-
 class GAPI_EXPORTS FTTextRender
 {
 public:
@@ -38,12 +36,6 @@ private:
     std::shared_ptr<Priv> m_priv;
 };
 
-#else
-
-class GAPI_EXPORTS FTTextRender {};
-
-#endif // HAVE_FREETYPE
-
 } // namespace draw
 } // namespace wip
 } // namespace gapi
diff --git a/modules/gapi/src/api/ft_render_priv.hpp b/modules/gapi/src/backends/render/ft_render_priv.hpp
similarity index 96%
rename from modules/gapi/src/api/ft_render_priv.hpp
rename to modules/gapi/src/backends/render/ft_render_priv.hpp
index 5a0679dd99..903f439b96 100644
--- a/modules/gapi/src/api/ft_render_priv.hpp
+++ b/modules/gapi/src/backends/render/ft_render_priv.hpp
@@ -10,7 +10,7 @@
 #ifndef OPENCV_FT_RENDER_PRIV_HPP
 #define OPENCV_FT_RENDER_PRIV_HPP
 
-#include "api/ft_render.hpp"
+#include "ft_render.hpp"
 
 #include <ft2build.h>
 #include FT_FREETYPE_H
diff --git a/modules/gapi/src/backends/render/grenderocv.cpp b/modules/gapi/src/backends/render/grenderocv.cpp
index cb4fd1be3a..71be889d79 100644
--- a/modules/gapi/src/backends/render/grenderocv.cpp
+++ b/modules/gapi/src/backends/render/grenderocv.cpp
@@ -1,16 +1,21 @@
 #include <opencv2/imgproc.hpp>
 
 #include "api/render_ocv.hpp"
-#include "backends/render/grenderocv.hpp"
 
 #include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <opencv2/gapi/fluid/core.hpp>
 
-GAPI_RENDER_OCV_KERNEL(RenderBGROCVImpl, cv::gapi::wip::draw::GRenderBGR)
+struct RenderOCVState
+{
+    std::shared_ptr<cv::gapi::wip::draw::FTTextRender> ftpr;
+};
+
+GAPI_OCV_KERNEL_ST(RenderBGROCVImpl, cv::gapi::wip::draw::GRenderBGR, RenderOCVState)
 {
     static void run(const cv::Mat& in,
                     const cv::gapi::wip::draw::Prims& prims,
-                    cv::gapi::wip::draw::FTTextRender* ftpr,
-                    cv::Mat& out)
+                    cv::Mat& out,
+                    RenderOCVState& state)
     {
         // NB: If in and out cv::Mats are the same object
         // we can avoid copy and render on out cv::Mat
@@ -19,18 +24,33 @@ GAPI_RENDER_OCV_KERNEL(RenderBGROCVImpl, cv::gapi::wip::draw::GRenderBGR)
             in.copyTo(out);
         }
 
-        cv::gapi::wip::draw::drawPrimitivesOCVBGR(out, prims, ftpr);
+        cv::gapi::wip::draw::drawPrimitivesOCVBGR(out, prims, state.ftpr);
+    }
+
+    static void setup(const cv::GMatDesc& /* in */,
+                      const cv::GArrayDesc& /* prims */,
+                      std::shared_ptr<RenderOCVState>& state,
+                      const cv::GCompileArgs& args)
+    {
+        using namespace cv::gapi::wip::draw;
+        auto opt_freetype_font = cv::gapi::getCompileArg<freetype_font>(args);
+        state = std::make_shared<RenderOCVState>();
+
+        if (opt_freetype_font.has_value())
+        {
+            state->ftpr = std::make_shared<FTTextRender>(opt_freetype_font->path);
+        }
     }
 };
 
-GAPI_RENDER_OCV_KERNEL(RenderNV12OCVImpl, cv::gapi::wip::draw::GRenderNV12)
+GAPI_OCV_KERNEL_ST(RenderNV12OCVImpl, cv::gapi::wip::draw::GRenderNV12, RenderOCVState)
 {
     static void run(const cv::Mat& in_y,
                     const cv::Mat& in_uv,
                     const cv::gapi::wip::draw::Prims& prims,
-                    cv::gapi::wip::draw::FTTextRender* ftpr,
                     cv::Mat& out_y,
-                    cv::Mat& out_uv)
+                    cv::Mat& out_uv,
+                    RenderOCVState& state)
     {
         // NB: If in and out cv::Mats are the same object
         // we can avoid copy and render on out cv::Mat
@@ -67,7 +87,7 @@ GAPI_RENDER_OCV_KERNEL(RenderNV12OCVImpl, cv::gapi::wip::draw::GRenderNV12)
         cv::resize(in_uv, upsample_uv, in_uv.size() * 2, cv::INTER_LINEAR);
         cv::merge(std::vector<cv::Mat>{in_y, upsample_uv}, yuv);
 
-        cv::gapi::wip::draw::drawPrimitivesOCVYUV(yuv, prims, ftpr);
+        cv::gapi::wip::draw::drawPrimitivesOCVYUV(yuv, prims, state.ftpr);
 
         // YUV -> NV12
         cv::Mat out_u, out_v, uv_plane;
@@ -76,6 +96,22 @@ GAPI_RENDER_OCV_KERNEL(RenderNV12OCVImpl, cv::gapi::wip::draw::GRenderNV12)
         cv::merge(std::vector<cv::Mat>{chs[1], chs[2]}, uv_plane);
         cv::resize(uv_plane, out_uv, uv_plane.size() / 2, cv::INTER_LINEAR);
     }
+
+    static void setup(const cv::GMatDesc&   /* in_y  */,
+                      const cv::GMatDesc&   /* in_uv */,
+                      const cv::GArrayDesc& /* prims */,
+                      std::shared_ptr<RenderOCVState>& state,
+                      const cv::GCompileArgs& args)
+    {
+        using namespace cv::gapi::wip::draw;
+        auto has_freetype_font = cv::gapi::getCompileArg<freetype_font>(args);
+        state = std::make_shared<RenderOCVState>();
+
+        if (has_freetype_font)
+        {
+            state->ftpr = std::make_shared<FTTextRender>(has_freetype_font->path);
+        }
+    }
 };
 
 cv::gapi::GKernelPackage cv::gapi::render::ocv::kernels()
diff --git a/modules/gapi/src/backends/render/grenderocv.hpp b/modules/gapi/src/backends/render/grenderocv.hpp
deleted file mode 100644
index e5091042b2..0000000000
--- a/modules/gapi/src/backends/render/grenderocv.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-//
-// Copyright (C) 2019 Intel Corporation
-
-#ifndef OPENCV_GAPI_GRENDEROCV_HPP
-#define OPENCV_GAPI_GRENDEROCV_HPP
-
-#include <opencv2/gapi/cpu/gcpukernel.hpp>
-#include "api/render_priv.hpp"
-#include "api/ft_render.hpp"
-
-namespace cv
-{
-namespace gapi
-{
-namespace render
-{
-namespace ocv
-{
-
-GAPI_EXPORTS cv::gapi::GBackend backend();
-
-template<typename, typename>
-struct add_type_to_tuple;
-
-template<typename P, typename ...Ts>
-struct add_type_to_tuple<P, std::tuple<Ts...>>
-{
-    using type = std::tuple<Ts..., P>;
-};
-
-template<class Impl, class K>
-class GRenderKernelImpl: public cv::detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
-                         public cv::detail::KernelTag
-{
-    using InArgs = typename add_type_to_tuple<cv::gapi::wip::draw::FTTextRender*, typename K::InArgs>::type;
-    using P      = detail::OCVCallHelper<Impl, InArgs, typename K::OutArgs>;
-
-public:
-    using API = K;
-
-    static cv::gapi::GBackend backend()  { return cv::gapi::render::ocv::backend(); }
-    static cv::GCPUKernel     kernel()   { return GCPUKernel(&P::call);             }
-};
-
-#define GAPI_RENDER_OCV_KERNEL(Name, API) struct Name: public cv::gapi::render::ocv::GRenderKernelImpl<Name, API>
-
-} // namespace ocv
-} // namespace render
-} // namespace gapi
-} // namespace cv
-
-#endif // OPENCV_GAPI_GRENDEROCV_HPP
diff --git a/modules/gapi/src/backends/render/grenderocvbackend.cpp b/modules/gapi/src/backends/render/grenderocvbackend.cpp
deleted file mode 100644
index 413d0c3f9c..0000000000
--- a/modules/gapi/src/backends/render/grenderocvbackend.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-//
-// Copyright (C) 2018-2020 Intel Corporation
-
-#include "precomp.hpp"
-
-#include <functional>
-#include <unordered_set>
-
-#include <ade/util/algorithm.hpp>
-
-#include <ade/util/range.hpp>
-#include <ade/util/zip_range.hpp>
-#include <ade/util/chain_range.hpp>
-#include <ade/typed_graph.hpp>
-
-#include <opencv2/gapi/gcommon.hpp>
-#include <opencv2/gapi/garray.hpp>
-#include <opencv2/gapi/util/any.hpp>
-#include <opencv2/gapi/gtype_traits.hpp>
-
-#include "compiler/gobjref.hpp"
-#include "compiler/gmodel.hpp"
-
-#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
-#include "api/render_ocv.hpp"
-
-#include "backends/render/grenderocvbackend.hpp"
-
-#include <opencv2/gapi/render/render.hpp>
-#include "api/ocv_mask_creator.hpp"
-#include "api/ft_render.hpp"
-
-
-using GRenderModel = ade::TypedGraph
-    < cv::gimpl::render::ocv::RenderUnit
-    >;
-
-// FIXME: Same issue with Typed and ConstTyped
-using GConstRenderModel = ade::ConstTypedGraph
-    < cv::gimpl::render::ocv::RenderUnit
-    >;
-
-cv::gimpl::render::ocv::GRenderExecutable::GRenderExecutable(const ade::Graph &g,
-                                                             const std::vector<ade::NodeHandle> &nodes,
-                                                             std::unique_ptr<cv::gapi::wip::draw::FTTextRender>&& ftpr)
-    : m_g(g), m_gm(m_g), m_ftpr(std::move(ftpr)) {
-        GConstRenderModel gcm(m_g);
-
-        auto is_op = [&](ade::NodeHandle nh) {
-            return m_gm.metadata(nh).get<NodeType>().t == NodeType::OP;
-        };
-
-        auto it = ade::util::find_if(nodes, is_op);
-
-        GAPI_Assert(it != nodes.end());
-        this_nh = *it;
-
-        if (!std::none_of(std::next(it), nodes.end(), is_op)) {
-            util::throw_error(std::logic_error("Multi-node rendering is not supported!"));
-        }
-}
-
-void cv::gimpl::render::ocv::GRenderExecutable::run(std::vector<InObj>  &&input_objs,
-                                                    std::vector<OutObj> &&output_objs) {
-    GConstRenderModel gcm(m_g);
-
-    for (auto& it : input_objs)   magazine::bindInArg (m_res, it.first, it.second);
-    for (auto& it : output_objs)  magazine::bindOutArg(m_res, it.first, it.second);
-
-    const auto &op = m_gm.metadata(this_nh).get<Op>();
-
-    // Initialize kernel's execution context:
-    // - Input parameters
-    GCPUContext context;
-    context.m_args.reserve(op.args.size());
-    using namespace std::placeholders;
-    ade::util::transform(op.args,
-                          std::back_inserter(context.m_args),
-                          std::bind(&GRenderExecutable::packArg, this, _1));
-
-    // - Output parameters.
-    for (const auto &out_it : ade::util::indexed(op.outs)) {
-        // FIXME: Can the same GArg type resolution mechanism be reused here?
-        const auto out_port  = ade::util::index(out_it);
-        const auto out_desc  = ade::util::value(out_it);
-        context.m_results[out_port] = magazine::getObjPtr(m_res, out_desc);
-    }
-
-    auto k = gcm.metadata(this_nh).get<RenderUnit>().k;
-
-    context.m_args.emplace_back(m_ftpr.get());
-
-    k.m_runF(context);
-
-    for (auto &it : output_objs) magazine::writeBack(m_res, it.first, it.second);
-
-    // In/Out args clean-up is mandatory now with RMat
-    for (auto &it : input_objs) magazine::unbind(m_res, it.first);
-    for (auto &it : output_objs) magazine::unbind(m_res, it.first);
-}
-
-cv::GArg cv::gimpl::render::ocv::GRenderExecutable::packArg(const cv::GArg &arg) {
-    // No API placeholders allowed at this point
-    // FIXME: this check has to be done somewhere in compilation stage.
-    GAPI_Assert(   arg.kind != cv::detail::ArgKind::GMAT
-                && arg.kind != cv::detail::ArgKind::GSCALAR
-                && arg.kind != cv::detail::ArgKind::GARRAY);
-
-    if (arg.kind != cv::detail::ArgKind::GOBJREF) {
-        util::throw_error(std::logic_error("Render supports G-types ONLY!"));
-    }
-    GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF);
-
-    const cv::gimpl::RcDesc &ref = arg.get<cv::gimpl::RcDesc>();
-    switch (ref.shape)
-    {
-    case GShape::GMAT:   return GArg(m_res.slot<cv::Mat>()[ref.id]);
-    case GShape::GARRAY: return GArg(m_res.slot<cv::detail::VectorRef>().at(ref.id));
-    default:
-        util::throw_error(std::logic_error("Unsupported GShape type"));
-        break;
-    }
-}
-
-namespace {
-    class GRenderBackendImpl final: public cv::gapi::GBackend::Priv {
-        virtual void unpackKernel(ade::Graph &gr,
-                                  const ade::NodeHandle &op_node,
-                                  const cv::GKernelImpl &impl) override {
-            GRenderModel rm(gr);
-            auto render_impl = cv::util::any_cast<cv::GCPUKernel>(impl.opaque);
-            rm.metadata(op_node).set(cv::gimpl::render::ocv::RenderUnit{render_impl});
-        }
-
-        virtual EPtr compile(const ade::Graph &graph,
-                             const cv::GCompileArgs& args,
-                             const std::vector<ade::NodeHandle> &nodes) const override {
-
-            using namespace cv::gapi::wip::draw;
-            auto has_freetype_font = cv::gapi::getCompileArg<freetype_font>(args);
-            std::unique_ptr<FTTextRender> ftpr;
-            if (has_freetype_font)
-            {
-#ifndef HAVE_FREETYPE
-                throw std::runtime_error("Freetype not found");
-#else
-                ftpr.reset(new FTTextRender(has_freetype_font.value().path));
-#endif
-            }
-            return EPtr{new cv::gimpl::render::ocv::GRenderExecutable(graph, nodes, std::move(ftpr))};
-        }
-    };
-}
-
-cv::gapi::GBackend cv::gapi::render::ocv::backend() {
-    static cv::gapi::GBackend this_backend(std::make_shared<GRenderBackendImpl>());
-    return this_backend;
-}
diff --git a/modules/gapi/src/backends/render/grenderocvbackend.hpp b/modules/gapi/src/backends/render/grenderocvbackend.hpp
deleted file mode 100644
index 69d388ffe6..0000000000
--- a/modules/gapi/src/backends/render/grenderocvbackend.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-//
-// Copyright (C) 2019 Intel Corporation
-
-#ifndef OPENCV_GAPI_GRENDEROCVBACKEND_HPP
-#define OPENCV_GAPI_GRENDEROCVBACKEND_HPP
-
-#include <opencv2/gapi/garg.hpp>
-#include <opencv2/gapi/gproto.hpp>
-#include <opencv2/gapi/render/render.hpp>
-
-#include "api/gorigin.hpp"
-#include "backends/common/gbackend.hpp"
-#include "compiler/gislandmodel.hpp"
-
-#include "backends/render/grenderocv.hpp"
-
-#include <opencv2/gapi/cpu/gcpukernel.hpp>
-
-namespace cv
-{
-namespace gimpl
-{
-namespace render
-{
-namespace ocv
-{
-
-struct RenderUnit
-{
-    static const char *name() { return "RenderUnit"; }
-    GCPUKernel k;
-};
-
-class GRenderExecutable final: public GIslandExecutable
-{
-    const ade::Graph &m_g;
-    GModel::ConstGraph m_gm;
-    std::unique_ptr<cv::gapi::wip::draw::FTTextRender> m_ftpr;
-
-    // The only executable stuff in this graph
-    // (assuming it is always single-op)
-    ade::NodeHandle this_nh;
-
-    //// Actual data of all resources in graph (both internal and external)
-    Mag m_res;
-
-    //// Execution helpers
-    GArg packArg(const GArg &arg);
-
-public:
-    GRenderExecutable(const ade::Graph                   &graph,
-                      const std::vector<ade::NodeHandle> &nodes,
-                      std::unique_ptr<cv::gapi::wip::draw::FTTextRender>&& ftpr);
-
-    virtual inline bool canReshape() const override { return false; }
-
-    virtual inline void reshape(ade::Graph&, const GCompileArgs&) override {
-        GAPI_Assert(false); // Not implemented yet
-    }
-
-    virtual void run(std::vector<InObj>  &&input_objs,
-                     std::vector<OutObj> &&output_objs) override;
-};
-
-} // namespace ocv
-} // namespace render
-} // namespace gimpl
-} // namespace cv
-
-#endif // OPENCV_GAPI_GRENDEROCVBACKEND_HPP
diff --git a/modules/gapi/src/compiler/gcompiled_priv.hpp b/modules/gapi/src/compiler/gcompiled_priv.hpp
index f21bfc80bc..b08b1f9c59 100644
--- a/modules/gapi/src/compiler/gcompiled_priv.hpp
+++ b/modules/gapi/src/compiler/gcompiled_priv.hpp
@@ -38,6 +38,10 @@ class GAPI_EXPORTS GCompiled::Priv
     GMetaArgs  m_outMetas; // inferred by compiler
     std::unique_ptr<cv::gimpl::GExecutor> m_exec;
 
+    // NB: Used by python wrapper to clarify input/output types
+    GTypesInfo m_out_info;
+    GTypesInfo m_in_info;
+
     void checkArgs(const cv::gimpl::GRuntimeArgs &args) const;
 
 public:
@@ -55,6 +59,12 @@ public:
     const GMetaArgs& outMetas() const;
 
     const cv::gimpl::GModel::Graph& model() const;
+
+    void setOutInfo(const GTypesInfo& info) { m_out_info = std::move(info); }
+    const GTypesInfo& outInfo() const { return m_out_info; }
+
+    void setInInfo(const GTypesInfo& info) { m_in_info = std::move(info); }
+    const GTypesInfo& inInfo() const { return m_in_info; }
 };
 
 }
diff --git a/modules/gapi/src/compiler/gcompiler.cpp b/modules/gapi/src/compiler/gcompiler.cpp
index 2f46ea873b..4d050dbabd 100644
--- a/modules/gapi/src/compiler/gcompiler.cpp
+++ b/modules/gapi/src/compiler/gcompiler.cpp
@@ -35,6 +35,7 @@
 #include "executor/gexecutor.hpp"
 #include "executor/gstreamingexecutor.hpp"
 #include "backends/common/gbackend.hpp"
+#include "backends/common/gmetabackend.hpp"
 
 // <FIXME:>
 #if !defined(GAPI_STANDALONE)
@@ -58,7 +59,8 @@ namespace
             for (const auto &b : pkg.backends()) {
                 aux_pkg = combine(aux_pkg, b.priv().auxiliaryKernels());
             }
-            return combine(pkg, aux_pkg);
+            // Always include built-in meta<> implementation
+            return combine(pkg, aux_pkg, cv::gimpl::meta::kernels());
         };
 
         auto has_use_only = cv::gapi::getCompileArg<cv::gapi::use_only>(args);
@@ -238,6 +240,11 @@ cv::gimpl::GCompiler::GCompiler(const cv::GComputation &c,
                                                       // (no compound backend present here)
     m_e.addPass("kernels", "check_islands_content", passes::checkIslandsContent);
 
+    // Special stage for intrinsics handling
+    m_e.addPassStage("intrin");
+    m_e.addPass("intrin", "desync",         passes::intrinDesync);
+    m_e.addPass("intrin", "finalizeIntrin", passes::intrinFinalize);
+
     //Input metas may be empty when a graph is compiled for streaming
     m_e.addPassStage("meta");
     if (!m_metas.empty())
@@ -384,6 +391,9 @@ cv::gimpl::GCompiler::GPtr cv::gimpl::GCompiler::generateGraph()
     {
         GModel::Graph(*g).metadata().set(OriginalInputMeta{m_metas});
     }
+    // FIXME: remove m_args, remove GCompileArgs from backends' method signatures,
+    // rework backends to access GCompileArgs from graph metadata
+    GModel::Graph(*g).metadata().set(CompileArgs{m_args});
     return g;
 }
 
@@ -407,6 +417,19 @@ void cv::gimpl::GCompiler::compileIslands(ade::Graph &g, const cv::GCompileArgs
     GIslandModel::compileIslands(gim, g, args);
 }
 
+static cv::GTypesInfo collectInfo(const cv::gimpl::GModel::ConstGraph& g,
+                                  const std::vector<ade::NodeHandle>& nhs) {
+    cv::GTypesInfo info;
+    info.reserve(nhs.size());
+
+    ade::util::transform(nhs, std::back_inserter(info), [&g](const ade::NodeHandle& nh) {
+        const auto& data = g.metadata(nh).get<cv::gimpl::Data>();
+        return cv::GTypeInfo{data.shape, data.kind};
+    });
+
+    return info;
+}
+
 cv::GCompiled cv::gimpl::GCompiler::produceCompiled(GPtr &&pg)
 {
     // This is the final compilation step. Here:
@@ -425,6 +448,8 @@ cv::GCompiled cv::gimpl::GCompiler::produceCompiled(GPtr &&pg)
     //     an execution plan for it (backend-specific execution)
     // ...before call to produceCompiled();
 
+    GModel::ConstGraph cgr(*pg);
+
     const auto &outMetas = GModel::ConstGraph(*pg).metadata()
         .get<OutputMeta>().outMeta;
     std::unique_ptr<GExecutor> pE(new GExecutor(std::move(pg)));
@@ -433,6 +458,14 @@ cv::GCompiled cv::gimpl::GCompiler::produceCompiled(GPtr &&pg)
 
     GCompiled compiled;
     compiled.priv().setup(m_metas, outMetas, std::move(pE));
+
+    // NB: Need to store input/output GTypeInfo to allocate output arrays for python bindings
+    auto out_meta = collectInfo(cgr, cgr.metadata().get<cv::gimpl::Protocol>().out_nhs);
+    auto in_meta  = collectInfo(cgr, cgr.metadata().get<cv::gimpl::Protocol>().in_nhs);
+
+    compiled.priv().setOutInfo(std::move(out_meta));
+    compiled.priv().setInInfo(std::move(in_meta));
+
     return compiled;
 }
 
@@ -448,6 +481,16 @@ cv::GStreamingCompiled cv::gimpl::GCompiler::produceStreamingCompiled(GPtr &&pg)
         outMetas = GModel::ConstGraph(*pg).metadata().get<OutputMeta>().outMeta;
     }
 
+
+    GModel::ConstGraph cgr(*pg);
+
+    // NB: Need to store input/output GTypeInfo to allocate output arrays for python bindings
+    auto out_meta = collectInfo(cgr, cgr.metadata().get<cv::gimpl::Protocol>().out_nhs);
+    auto in_meta  = collectInfo(cgr, cgr.metadata().get<cv::gimpl::Protocol>().in_nhs);
+
+    compiled.priv().setOutInfo(std::move(out_meta));
+    compiled.priv().setInInfo(std::move(in_meta));
+
     std::unique_ptr<GStreamingExecutor> pE(new GStreamingExecutor(std::move(pg),
                                                                   m_args));
     if (!m_metas.empty() && !outMetas.empty())
diff --git a/modules/gapi/src/compiler/gislandmodel.cpp b/modules/gapi/src/compiler/gislandmodel.cpp
index aee0477e08..4d0feaea71 100644
--- a/modules/gapi/src/compiler/gislandmodel.cpp
+++ b/modules/gapi/src/compiler/gislandmodel.cpp
@@ -175,13 +175,26 @@ void GIslandModel::generateInitial(GIslandModel::Graph &g,
         {
             auto src_data_nh = in_edge->srcNode();
             auto isl_slot_nh = data_to_slot.at(src_data_nh);
-            g.link(isl_slot_nh, nh); // no other data stored yet
+            auto isl_new_eh  = g.link(isl_slot_nh, nh); // no other data stored yet
+            // Propagate some special metadata from the GModel to GIslandModel
+            // TODO: Make it a single place (a function) for both inputs/outputs?
+            // (since it is duplicated in the below code block)
+            if (src_g.metadata(in_edge).contains<DesyncEdge>())
+            {
+                const auto idx = src_g.metadata(in_edge).get<DesyncEdge>().index;
+                g.metadata(isl_new_eh).set(DesyncIslEdge{idx});
+            }
         }
         for (auto out_edge : src_op_nh->outEdges())
         {
             auto dst_data_nh = out_edge->dstNode();
             auto isl_slot_nh = data_to_slot.at(dst_data_nh);
-            g.link(nh, isl_slot_nh);
+            auto isl_new_eh  = g.link(nh, isl_slot_nh);
+            if (src_g.metadata(out_edge).contains<DesyncEdge>())
+            {
+                const auto idx = src_g.metadata(out_edge).get<DesyncEdge>().index;
+                g.metadata(isl_new_eh).set(DesyncIslEdge{idx});
+            }
         }
     } // for(all_operations)
 }
@@ -254,6 +267,9 @@ void GIslandModel::syncIslandTags(Graph &g, ade::Graph &orig_g)
 void GIslandModel::compileIslands(Graph &g, const ade::Graph &orig_g, const GCompileArgs &args)
 {
     GModel::ConstGraph gm(orig_g);
+    if (gm.metadata().contains<HasIntrinsics>()) {
+        util::throw_error(std::logic_error("FATAL: The graph has unresolved intrinsics"));
+    }
 
     auto original_sorted = gm.metadata().get<ade::passes::TopologicalSortData>();
     for (auto nh : g.nodes())
@@ -341,26 +357,21 @@ void GIslandExecutable::run(GIslandExecutable::IInput &in, GIslandExecutable::IO
     for (auto &&it: ade::util::zip(ade::util::toRange(in_desc),
                                    ade::util::toRange(in_vector)))
     {
-        // FIXME: Not every Island expects a cv::Mat instead of own::Mat on input
-        // This kludge should go as a result of de-ownification
         const cv::GRunArg& in_data_orig = std::get<1>(it);
         cv::GRunArg in_data;
-#if !defined(GAPI_STANDALONE)
         switch (in_data_orig.index())
         {
         case cv::GRunArg::index_of<cv::Mat>():
-            in_data = cv::GRunArg{cv::make_rmat<cv::gimpl::RMatAdapter>(cv::util::get<cv::Mat>(in_data_orig))};
-            break;
-        case cv::GRunArg::index_of<cv::Scalar>():
-            in_data = cv::GRunArg{(cv::util::get<cv::Scalar>(in_data_orig))};
+            // FIXME: This whole construct is ugly, from
+            // its writing to a need in this in general
+            in_data = cv::GRunArg{ cv::make_rmat<cv::gimpl::RMatAdapter>(cv::util::get<cv::Mat>(in_data_orig))
+                                 , in_data_orig.meta
+                                 };
             break;
         default:
             in_data = in_data_orig;
             break;
         }
-#else
-        in_data = in_data_orig;
-#endif // GAPI_STANDALONE
         in_objs.emplace_back(std::get<0>(it), std::move(in_data));
     }
     for (auto &&it: ade::util::indexed(ade::util::toRange(out_desc)))
@@ -369,9 +380,27 @@ void GIslandExecutable::run(GIslandExecutable::IInput &in, GIslandExecutable::IO
                               out.get(ade::util::checked_cast<int>(ade::util::index(it))));
     }
     run(std::move(in_objs), std::move(out_objs));
+
+    // Propagate in-graph meta down to the graph
+    // Note: this is not a complete implementation! Mainly this is a stub
+    // and the proper implementation should come later.
+    //
+    // Propagating the meta information here has its pros and cons.
+    // Pros: it works here uniformly for both regular and streaming cases,
+    //   also for the majority of old-fashioned (synchronous) backends
+    // Cons: backends implementing the asynchronous run(IInput,IOutput)
+    //   won't get it out of the box
+    cv::GRunArg::Meta stub_meta;
+    for (auto &&in_arg : in_vector)
+    {
+        stub_meta.insert(in_arg.meta.begin(), in_arg.meta.end());
+    }
+    // Report output objects as "ready" to the executor, also post
+    // calculated in-graph meta for the objects
     for (auto &&it: out_objs)
     {
-        out.post(std::move(it.second)); // report output objects as "ready" to the executor
+        out.meta(it.second, stub_meta);
+        out.post(std::move(it.second));
     }
 }
 
diff --git a/modules/gapi/src/compiler/gislandmodel.hpp b/modules/gapi/src/compiler/gislandmodel.hpp
index 6cf8f98667..e8eb73692b 100644
--- a/modules/gapi/src/compiler/gislandmodel.hpp
+++ b/modules/gapi/src/compiler/gislandmodel.hpp
@@ -142,6 +142,14 @@ public:
     // at that stage.
     virtual void handleNewStream() {}; // do nothing here by default
 
+    // This method is called for every IslandExecutable when
+    // the stream-based execution is stopped.
+    // All processing is guaranteed to be stopped by this moment,
+    // with no pending or running 'run()' processes ran in background.
+    // FIXME: This method is tightly bound to the GStreamingExecutor
+    // now.
+    virtual void handleStopStream() {} // do nothing here by default
+
     virtual ~GIslandExecutable() = default;
 };
 
@@ -164,6 +172,10 @@ struct GIslandExecutable::IOutput: public GIslandExecutable::IODesc {
     virtual GRunArgP get(int idx) = 0;  // Allocate (wrap) a new data object for output idx
     virtual void post(GRunArgP&&) = 0;  // Release the object back to the framework (mark available)
     virtual void post(EndOfStream&&) = 0; // Post end-of-stream marker back to the framework
+
+    // Assign accumulated metadata to the given output object.
+    // This method can only be called after get() and before post().
+    virtual void meta(const GRunArgP&, const GRunArg::Meta &) = 0;
 };
 
 // GIslandEmitter - a backend-specific thing which feeds data into
@@ -222,8 +234,19 @@ struct IslandsCompiled
     static const char *name() { return "IslandsCompiled"; }
 };
 
+// This flag marks an edge in an GIslandModel as "desynchronized"
+// i.e. it starts a new desynchronized subgraph
+struct DesyncIslEdge
+{
+    static const char *name() { return "DesynchronizedIslandEdge"; }
+
+    // Projection from GModel/DesyncEdge.index
+    int index;
+};
+
 namespace GIslandModel
 {
+
     using Graph = ade::TypedGraph
         < NodeKind
         , FusedIsland
@@ -232,6 +255,7 @@ namespace GIslandModel
         , Emitter
         , Sink
         , IslandsCompiled
+        , DesyncIslEdge
         , ade::passes::TopologicalSortData
         >;
 
@@ -244,6 +268,7 @@ namespace GIslandModel
         , Emitter
         , Sink
         , IslandsCompiled
+        , DesyncIslEdge
         , ade::passes::TopologicalSortData
         >;
 
diff --git a/modules/gapi/src/compiler/gmodel.cpp b/modules/gapi/src/compiler/gmodel.cpp
index 39dc1da33b..ea4eb880a4 100644
--- a/modules/gapi/src/compiler/gmodel.cpp
+++ b/modules/gapi/src/compiler/gmodel.cpp
@@ -23,12 +23,16 @@
 
 namespace cv { namespace gimpl {
 
-ade::NodeHandle GModel::mkOpNode(GModel::Graph &g, const GKernel &k, const std::vector<GArg> &args, const std::string &island)
+ade::NodeHandle GModel::mkOpNode(GModel::Graph &g,
+                                 const GKernel &k,
+                                 const std::vector<GArg> &args,
+                                 const cv::util::any &params,
+                                 const std::string &island)
 {
     ade::NodeHandle op_h = g.createNode();
     g.metadata(op_h).set(NodeType{NodeType::OP});
     //These extra empty {} are to please GCC (-Wmissing-field-initializers)
-    g.metadata(op_h).set(Op{k, args, {}, {}});
+    g.metadata(op_h).set(Op{k, args, {}, {}, params});
     if (!island.empty())
         g.metadata(op_h).set(Island{island});
     return op_h;
@@ -73,7 +77,7 @@ ade::NodeHandle GModel::mkDataNode(GModel::Graph &g, const GShape shape)
     return data_h;
 }
 
-void GModel::linkIn(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t in_port)
+ade::EdgeHandle GModel::linkIn(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t in_port)
 {
     // Check if input is already connected
     for (const auto& in_e : opH->inEdges())
@@ -92,9 +96,11 @@ void GModel::linkIn(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::si
 
     // Replace an API object with a REF (G* -> GOBJREF)
     op.args[in_port] = cv::GArg(RcDesc{gm.rc, gm.shape, {}});
+
+    return eh;
 }
 
-void GModel::linkOut(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t out_port)
+ade::EdgeHandle GModel::linkOut(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t out_port)
 {
     // FIXME: check validity using kernel prototype
 
@@ -117,6 +123,8 @@ void GModel::linkOut(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::s
     const auto min_out_size = std::max(op.outs.size(), storage_with_port);
     op.outs.resize(min_out_size, RcDesc{-1,GShape::GMAT,{}}); // FIXME: Invalid shape instead?
     op.outs[out_port] = RcDesc{gm.rc, gm.shape, {}};
+
+    return eh;
 }
 
 std::vector<ade::NodeHandle> GModel::orderedInputs(const ConstGraph &g, ade::NodeHandle nh)
@@ -206,26 +214,29 @@ ade::NodeHandle GModel::detail::dataNodeOf(const ConstLayoutGraph &g, const GOri
     return g.metadata().get<Layout>().object_nodes.at(origin);
 }
 
-void GModel::redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
+std::vector<ade::EdgeHandle> GModel::redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
 {
     std::vector<ade::EdgeHandle> ehh(from->outEdges().begin(), from->outEdges().end());
+    std::vector<ade::EdgeHandle> ohh;
+    ohh.reserve(ehh.size());
     for (auto e : ehh)
     {
         auto dst = e->dstNode();
         auto input = g.metadata(e).get<Input>();
         g.erase(e);
-        linkIn(g, dst, to, input.port);
+        ohh.push_back(linkIn(g, dst, to, input.port));
     }
+    return ohh;
 }
 
-void GModel::redirectWriter(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
+ade::EdgeHandle GModel::redirectWriter(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
 {
     GAPI_Assert(from->inEdges().size() == 1);
     auto e = from->inEdges().front();
     auto op = e->srcNode();
     auto output = g.metadata(e).get<Output>();
     g.erase(e);
-    linkOut(g, op, to, output.port);
+    return linkOut(g, op, to, output.port);
 }
 
 GMetaArgs GModel::collectInputMeta(const GModel::ConstGraph &cg, ade::NodeHandle node)
diff --git a/modules/gapi/src/compiler/gmodel.hpp b/modules/gapi/src/compiler/gmodel.hpp
index 8f78ba49b7..d016766fb5 100644
--- a/modules/gapi/src/compiler/gmodel.hpp
+++ b/modules/gapi/src/compiler/gmodel.hpp
@@ -61,6 +61,7 @@ struct Op
     std::vector<RcDesc> outs; // TODO: Introduce a new type for resource references
 
     cv::gapi::GBackend  backend;
+    cv::util::any params; // Operation specific information
 };
 
 struct Data
@@ -210,6 +211,58 @@ struct CustomMetaFunction
     CM customOutMeta;
 };
 
+// This is a general flag indicating that this GModel has intrinsics.
+// In the beginning of the compilation, it is a quick check to
+// indicate there are intrinsics.
+//
+// In the end of the compilation, having this flag is fatal -- all
+// intrinsics must be resolved.
+struct HasIntrinsics
+{
+    static const char *name() { return "HasIntrinsicsFlag"; }
+};
+
+// This is a special tag for both DATA and OP nodes indicating
+// which desynchronized path this node belongs to.
+// This tag is set by a special complex pass intrinDesync/accept.
+struct DesyncPath
+{
+    static const char *name() { return "DesynchronizedPath"; }
+
+    // A zero-based index of the desynchronized path in the graph.
+    // Set by intrinDesync() compiler pass
+    int index;
+};
+
+// This is a special tag for graph Edges indicating that this
+// particular edge starts a desynchronized path in the graph.
+// At the execution stage, the data coming "through" these edges
+// (virtually, of course, since our GModel edges never transfer the
+// actual data, they just represent these transfers) is desynchronized
+// from the rest of the pipeline, i.e. may be "lost" (stay unconsumed
+// and then overwritten with some new data when streaming).
+struct DesyncEdge
+{
+    static const char *name() { return "DesynchronizedEdge"; }
+
+    // A zero-based index of the desynchronized path in the graph.
+    // Set by intrinDesync/apply() compiler pass
+    int index;
+};
+
+// This flag marks the island graph as "desynchronized"
+struct Desynchronized
+{
+    static const char *name() { return "Desynchronized"; }
+};
+
+// Reference to compile args of the computation
+struct CompileArgs
+{
+    static const char *name() { return "CompileArgs"; }
+    GCompileArgs args;
+};
+
 namespace GModel
 {
     using Graph = ade::TypedGraph
@@ -231,6 +284,11 @@ namespace GModel
         , CustomMetaFunction
         , Streaming
         , Deserialized
+        , HasIntrinsics
+        , DesyncPath
+        , DesyncEdge
+        , Desynchronized
+        , CompileArgs
         >;
 
     // FIXME: How to define it based on GModel???
@@ -253,6 +311,11 @@ namespace GModel
         , CustomMetaFunction
         , Streaming
         , Deserialized
+        , HasIntrinsics
+        , DesyncPath
+        , DesyncEdge
+        , Desynchronized
+        , CompileArgs
         >;
 
     // FIXME:
@@ -262,7 +325,11 @@ namespace GModel
     // GAPI_EXPORTS for tests
     GAPI_EXPORTS void init (Graph& g);
 
-    GAPI_EXPORTS ade::NodeHandle mkOpNode(Graph &g, const GKernel &k, const std::vector<GArg>& args, const std::string &island);
+    GAPI_EXPORTS ade::NodeHandle mkOpNode(Graph &g,
+                                          const GKernel &k,
+                                          const std::vector<GArg>& args,
+                                          const cv::util::any& params,
+                                          const std::string &island);
     // Isn't used by the framework or default backends, required for external backend development
     GAPI_EXPORTS ade::NodeHandle mkDataNode(Graph &g, const GShape shape);
 
@@ -273,11 +340,11 @@ namespace GModel
     // Clears logged messages of a node.
     GAPI_EXPORTS void log_clear(Graph &g, ade::NodeHandle node);
 
-    GAPI_EXPORTS void linkIn   (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t in_port);
-    GAPI_EXPORTS void linkOut  (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t out_port);
+    GAPI_EXPORTS ade::EdgeHandle linkIn   (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t in_port);
+    GAPI_EXPORTS ade::EdgeHandle linkOut  (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t out_port);
 
-    GAPI_EXPORTS void redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to);
-    GAPI_EXPORTS void redirectWriter (Graph &g, ade::NodeHandle from, ade::NodeHandle to);
+    GAPI_EXPORTS std::vector<ade::EdgeHandle> redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to);
+    GAPI_EXPORTS             ade::EdgeHandle  redirectWriter (Graph &g, ade::NodeHandle from, ade::NodeHandle to);
 
     GAPI_EXPORTS std::vector<ade::NodeHandle> orderedInputs (const ConstGraph &g, ade::NodeHandle nh);
     GAPI_EXPORTS std::vector<ade::NodeHandle> orderedOutputs(const ConstGraph &g, ade::NodeHandle nh);
diff --git a/modules/gapi/src/compiler/gmodelbuilder.cpp b/modules/gapi/src/compiler/gmodelbuilder.cpp
index 87e9ab55b8..5f8f3518fc 100644
--- a/modules/gapi/src/compiler/gmodelbuilder.cpp
+++ b/modules/gapi/src/compiler/gmodelbuilder.cpp
@@ -134,12 +134,19 @@ cv::gimpl::Unrolled cv::gimpl::unrollExpr(const GProtoArgs &ins,
 
                 // Put the outputs object description of the node
                 // so that they are not lost if they are not consumed by other operations
+                GAPI_Assert(call_p.m_k.outCtors.size() == call_p.m_k.outShapes.size());
                 for (const auto &it : ade::util::indexed(call_p.m_k.outShapes))
                 {
                     std::size_t port  = ade::util::index(it);
                     GShape shape      = ade::util::value(it);
 
-                    GOrigin org { shape, node, port, {}, origin.kind };
+                    // FIXME: then use ZIP
+                    HostCtor ctor     = call_p.m_k.outCtors[port];
+
+                    // NB: Probably this fixes all other "missing host ctor"
+                    // problems.
+                    // TODO: Clean-up the old workarounds if it really is.
+                    GOrigin org {shape, node, port, std::move(ctor), origin.kind};
                     origins.insert(org);
                 }
 
@@ -286,7 +293,7 @@ ade::NodeHandle cv::gimpl::GModelBuilder::put_OpNode(const cv::GNode &node)
     {
         GAPI_Assert(node.shape() == GNode::NodeShape::CALL);
         const auto &call_p = node.call().priv();
-        auto nh = cv::gimpl::GModel::mkOpNode(m_gm, call_p.m_k, call_p.m_args, node_p.m_island);
+        auto nh = cv::gimpl::GModel::mkOpNode(m_gm, call_p.m_k, call_p.m_args, call_p.m_params, node_p.m_island);
         m_graph_ops[&node_p] = nh;
         return nh;
     }
diff --git a/modules/gapi/src/compiler/gobjref.hpp b/modules/gapi/src/compiler/gobjref.hpp
index dd0939c439..bca6fa525e 100644
--- a/modules/gapi/src/compiler/gobjref.hpp
+++ b/modules/gapi/src/compiler/gobjref.hpp
@@ -16,15 +16,9 @@ namespace cv
 
 namespace gimpl
 {
-    // Union type for various user-defined type constructors (GArray<T>, GOpaque<T>, etc)
-    // FIXME: Replace construct-only API with a more generic one
-    //    (probably with bits of introspection)
-    // Not required for non-user-defined types (GMat, GScalar, etc)
-    using HostCtor = util::variant
-    < util::monostate
-    , detail::ConstructVec
-    , detail::ConstructOpaque
-    >;
+    // HostCtor was there, but then moved to public
+    // Redeclare here to avoid changing tons of code
+    using HostCtor = cv::detail::HostCtor;
 
     using ConstVal = util::variant
     < util::monostate
diff --git a/modules/gapi/src/compiler/gstreaming.cpp b/modules/gapi/src/compiler/gstreaming.cpp
index 2e9c016ceb..fa736d592e 100644
--- a/modules/gapi/src/compiler/gstreaming.cpp
+++ b/modules/gapi/src/compiler/gstreaming.cpp
@@ -8,6 +8,7 @@
 #include "precomp.hpp"
 
 #include <ade/graph.hpp>
+#include <ade/util/zip_range.hpp>   // util::indexed
 
 #include <opencv2/gapi/gproto.hpp> // can_describe
 #include <opencv2/gapi/gcompiled.hpp>
@@ -69,6 +70,11 @@ bool cv::GStreamingCompiled::Priv::pull(cv::GRunArgsP &&outs)
     return m_exec->pull(std::move(outs));
 }
 
+bool cv::GStreamingCompiled::Priv::pull(cv::GOptRunArgsP &&outs)
+{
+    return m_exec->pull(std::move(outs));
+}
+
 bool cv::GStreamingCompiled::Priv::try_pull(cv::GRunArgsP &&outs)
 {
     return m_exec->try_pull(std::move(outs));
@@ -111,6 +117,58 @@ bool cv::GStreamingCompiled::pull(cv::GRunArgsP &&outs)
     return m_priv->pull(std::move(outs));
 }
 
+std::tuple<bool, cv::GRunArgs> cv::GStreamingCompiled::pull()
+{
+    // FIXME: Why it is not @ priv??
+    GRunArgs run_args;
+    GRunArgsP outs;
+    const auto& out_info = m_priv->outInfo();
+    run_args.reserve(out_info.size());
+    outs.reserve(out_info.size());
+
+    for (auto&& info : out_info)
+    {
+        switch (info.shape)
+        {
+            case cv::GShape::GMAT:
+            {
+                run_args.emplace_back(cv::Mat{});
+                outs.emplace_back(&cv::util::get<cv::Mat>(run_args.back()));
+                break;
+            }
+            case cv::GShape::GSCALAR:
+            {
+                run_args.emplace_back(cv::Scalar{});
+                outs.emplace_back(&cv::util::get<cv::Scalar>(run_args.back()));
+                break;
+            }
+            case cv::GShape::GARRAY:
+            {
+                switch (info.kind)
+                {
+                    case cv::detail::OpaqueKind::CV_POINT2F:
+                        run_args.emplace_back(cv::detail::VectorRef{std::vector<cv::Point2f>{}});
+                        outs.emplace_back(cv::util::get<cv::detail::VectorRef>(run_args.back()));
+                        break;
+                    default:
+                        util::throw_error(std::logic_error("Unsupported kind for GArray"));
+                }
+                break;
+            }
+            default:
+                util::throw_error(std::logic_error("Only cv::GMat and cv::GScalar are supported for python output"));
+        }
+    }
+
+    bool is_over = m_priv->pull(std::move(outs));
+    return std::make_tuple(is_over, run_args);
+}
+
+bool cv::GStreamingCompiled::pull(cv::GOptRunArgsP &&outs)
+{
+    return m_priv->pull(std::move(outs));
+}
+
 bool cv::GStreamingCompiled::try_pull(cv::GRunArgsP &&outs)
 {
     return m_priv->try_pull(std::move(outs));
diff --git a/modules/gapi/src/compiler/gstreaming_priv.hpp b/modules/gapi/src/compiler/gstreaming_priv.hpp
index 447bcda76e..be0869e663 100644
--- a/modules/gapi/src/compiler/gstreaming_priv.hpp
+++ b/modules/gapi/src/compiler/gstreaming_priv.hpp
@@ -28,6 +28,10 @@ class GAPI_EXPORTS GStreamingCompiled::Priv
     GMetaArgs  m_outMetas; // inferred by compiler
     std::unique_ptr<cv::gimpl::GStreamingExecutor> m_exec;
 
+    // NB: Used by python wrapper to clarify input/output types
+    GTypesInfo m_out_info;
+    GTypesInfo m_in_info;
+
 public:
     void setup(const GMetaArgs &metaArgs,
                const GMetaArgs &outMetas,
@@ -41,10 +45,17 @@ public:
     void setSource(GRunArgs &&args);
     void start();
     bool pull(cv::GRunArgsP &&outs);
+    bool pull(cv::GOptRunArgsP &&outs);
     bool try_pull(cv::GRunArgsP &&outs);
     void stop();
 
     bool running() const;
+
+    void setOutInfo(const GTypesInfo& info) { m_out_info = std::move(info); }
+    const GTypesInfo& outInfo() const { return m_out_info; }
+
+    void setInInfo(const GTypesInfo& info) { m_in_info = std::move(info); }
+    const GTypesInfo& inInfo() const { return m_in_info; }
 };
 
 } // namespace cv
diff --git a/modules/gapi/src/compiler/passes/exec.cpp b/modules/gapi/src/compiler/passes/exec.cpp
index 755538bb46..f6a73489eb 100644
--- a/modules/gapi/src/compiler/passes/exec.cpp
+++ b/modules/gapi/src/compiler/passes/exec.cpp
@@ -20,6 +20,7 @@
 #include <opencv2/gapi/util/optional.hpp>  // util::optional
 #include "logger.hpp"    // GAPI_LOG
 
+#include "api/gbackend_priv.hpp" // for canMerge()
 #include "compiler/gmodel.hpp"
 #include "compiler/gislandmodel.hpp"
 #include "compiler/passes/passes.hpp"
@@ -54,11 +55,28 @@ namespace
         // Also check the cases backend can't handle
         // (e.x. GScalar connecting two fluid ops should split the graph)
         const GModel::ConstGraph g(src_graph);
+        if (g.metadata().contains<Desynchronized>()) {
+            // Fusion of a graph having a desynchronized path is
+            // definitely non-trivial
+            return false;
+        }
         const auto& active_backends = g.metadata().get<ActiveBackends>().backends;
-        return active_backends.size() == 1 &&
-                ade::util::all_of(g.nodes(), [&](ade::NodeHandle nh) {
-            return !g.metadata(nh).contains<Island>();
-        });
+        if (active_backends.size() != 1u) {
+            // More than 1 backend involved - non-trivial
+            return false;
+        }
+        const auto& has_island_tags = [&](ade::NodeHandle nh) {
+            return g.metadata(nh).contains<Island>();
+        };
+        if (ade::util::any_of(g.nodes(), has_island_tags)) {
+            // There are user-defined islands - non-trivial
+            return false;
+        }
+        if (active_backends.begin()->priv().controlsMerge()) {
+            // If the only backend controls Island Fusion on its own - non-trivial
+            return false;
+        }
+        return true;
     }
 
     void fuseTrivial(GIslandModel::Graph &g, const ade::Graph &src_graph)
@@ -71,12 +89,12 @@ namespace
 
         all.insert(src_g.nodes().begin(), src_g.nodes().end());
 
-        for (const auto nh : proto.in_nhs)
+        for (const auto& nh : proto.in_nhs)
         {
             all.erase(nh);
             in_ops.insert(nh->outNodes().begin(), nh->outNodes().end());
         }
-        for (const auto nh : proto.out_nhs)
+        for (const auto& nh : proto.out_nhs)
         {
             all.erase(nh);
             out_ops.insert(nh->inNodes().begin(), nh->inNodes().end());
@@ -90,12 +108,12 @@ namespace
 
         auto ih = GIslandModel::mkIslandNode(g, std::move(isl));
 
-        for (const auto nh : proto.in_nhs)
+        for (const auto& nh : proto.in_nhs)
         {
             auto slot = GIslandModel::mkSlotNode(g, nh);
             g.link(slot, ih);
         }
-        for (const auto nh : proto.out_nhs)
+        for (const auto& nh : proto.out_nhs)
         {
             auto slot = GIslandModel::mkSlotNode(g, nh);
             g.link(ih, slot);
@@ -125,9 +143,9 @@ namespace
     };
 
     bool canMerge(const GIslandModel::Graph &g,
-                  const ade::NodeHandle a_nh,
-                  const ade::NodeHandle /*slot_nh*/,
-                  const ade::NodeHandle b_nh,
+                  const ade::NodeHandle &a_nh,
+                  const ade::NodeHandle &slot_nh,
+                  const ade::NodeHandle &b_nh,
                   const MergeContext &ctx = MergeContext())
     {
         auto a_ptr = g.metadata(a_nh).get<FusedIsland>().object;
@@ -142,8 +160,8 @@ namespace
         // Islands which cause a cycle can't be merged as well
         // (since the flag is set, the procedure already tried to
         // merge these islands in the past)
-        if (ade::util::contains(ctx.cycle_causers, std::make_pair(a_ptr, b_ptr))||
-            ade::util::contains(ctx.cycle_causers, std::make_pair(b_ptr, a_ptr)))
+        if (   ade::util::contains(ctx.cycle_causers, std::make_pair(a_ptr, b_ptr))
+            || ade::util::contains(ctx.cycle_causers, std::make_pair(b_ptr, a_ptr)))
             return false;
 
         // There may be user-defined islands. Initially user-defined
@@ -163,7 +181,13 @@ namespace
                 return false;
         }
 
-        // FIXME: add a backend-specified merge checker
+        // If available, run the backend-specified merge checker
+        const auto &this_backend_p = a_ptr->backend().priv();
+        if (    this_backend_p.controlsMerge()
+            && !this_backend_p.allowsMerge(g, a_nh, slot_nh, b_nh))
+        {
+            return false;
+        }
         return true;
     }
 
@@ -205,10 +229,31 @@ namespace
     {
         using namespace std::placeholders;
 
+        // Before checking for candidates, find and ban neighbor nodes
+        // (input or outputs) which are connected via desynchronized
+        // edges.
+        GIsland::node_set nodes_with_desync_edges;
+        for (const auto& in_eh : nh->inEdges()) {
+            if (g.metadata(in_eh).contains<DesyncIslEdge>()) {
+                nodes_with_desync_edges.insert(in_eh->srcNode());
+            }
+        }
+        for (const auto& output_data_nh : nh->outNodes()) {
+            for (const auto &out_reader_eh : output_data_nh->outEdges()) {
+                if (g.metadata(out_reader_eh).contains<DesyncIslEdge>()) {
+                    nodes_with_desync_edges.insert(out_reader_eh->dstNode());
+                }
+            }
+        }
+
         // Find a first matching candidate GIsland for merge
         // among inputs
-        for (const auto& input_data_nh : nh->inNodes())
+        for (const auto& in_eh : nh->inEdges())
         {
+            if (ade::util::contains(nodes_with_desync_edges, in_eh->srcNode())) {
+                continue; // desync edges can never be fused
+            }
+            const auto& input_data_nh = in_eh->srcNode();
             if (input_data_nh->inNodes().size() != 0)
             {
                 // Data node must have a single producer only
@@ -224,14 +269,17 @@ namespace
         // Ok, now try to find it among the outputs
         for (const auto& output_data_nh : nh->outNodes())
         {
-            auto mergeTest = [&](ade::NodeHandle cons_nh) -> bool {
-                return canMerge(g, nh, output_data_nh, cons_nh, ctx);
+            auto mergeTest = [&](ade::EdgeHandle cons_eh) -> bool {
+                if (ade::util::contains(nodes_with_desync_edges, cons_eh->dstNode())) {
+                    return false;  // desync edges can never be fused
+                }
+                return canMerge(g, nh, output_data_nh, cons_eh->dstNode(), ctx);
             };
-            auto cand_it = std::find_if(output_data_nh->outNodes().begin(),
-                                        output_data_nh->outNodes().end(),
+            auto cand_it = std::find_if(output_data_nh->outEdges().begin(),
+                                        output_data_nh->outEdges().end(),
                                         mergeTest);
-            if (cand_it != output_data_nh->outNodes().end())
-                return std::make_tuple(*cand_it,
+            if (cand_it != output_data_nh->outEdges().end())
+                return std::make_tuple((*cand_it)->dstNode(),
                                        output_data_nh,
                                        Direction::Out);
         } // for(outNodes)
@@ -251,6 +299,7 @@ namespace
         ade::NodeHandle m_slot;
         ade::NodeHandle m_cons;
 
+        using Change = ChangeT<DesyncIslEdge>;
         Change::List m_changes;
 
         struct MergeObjects
@@ -423,10 +472,10 @@ namespace
         auto backend = m_gim.metadata(m_prod).get<FusedIsland>()
             .object->backend();
         auto merged = std::make_shared<GIsland>(backend,
-                                                           std::move(mo.all),
-                                                           std::move(mo.in_ops),
-                                                           std::move(mo.out_ops),
-                                                           std::move(maybe_user_tag));
+                                                std::move(mo.all),
+                                                std::move(mo.in_ops),
+                                                std::move(mo.out_ops),
+                                                std::move(maybe_user_tag));
         // FIXME: move this debugging to some user-controllable log-level
 #ifdef DEBUG_MERGE
         merged->debug();
@@ -440,7 +489,9 @@ namespace
                                                  m_prod->inEdges().end());
         for (auto in_edge : input_edges)
         {
-            m_changes.enqueue<Change::NewLink>(m_g, in_edge->srcNode(), new_nh);
+            // FIXME: Introduce a Relink primitive instead?
+            // (combining the both actions into one?)
+            m_changes.enqueue<Change::NewLink>(m_g, in_edge->srcNode(), new_nh, in_edge);
             m_changes.enqueue<Change::DropLink>(m_g, m_prod, in_edge);
         }
 
@@ -450,7 +501,7 @@ namespace
                                                   m_cons->outEdges().end());
         for (auto out_edge : output_edges)
         {
-            m_changes.enqueue<Change::NewLink>(m_g, new_nh, out_edge->dstNode());
+            m_changes.enqueue<Change::NewLink>(m_g, new_nh, out_edge->dstNode(), out_edge);
             m_changes.enqueue<Change::DropLink>(m_g, m_cons, out_edge);
         }
 
@@ -491,6 +542,10 @@ namespace
                     m_changes.enqueue<Change::DropLink>(m_g, non_opt_slot_nh, eh);
                 }
             }
+            // FIXME: No metadata copied here (from where??)
+            // For DesyncIslEdges it still works, as these tags are
+            // placed to Data->Op edges and this one is an Op->Data
+            // edge.
             m_changes.enqueue<Change::NewLink>(m_g, new_nh, non_opt_slot_nh);
         }
 
@@ -502,7 +557,7 @@ namespace
              m_prod->outEdges().end());
         for (auto extra_out : prod_extra_out_edges)
         {
-            m_changes.enqueue<Change::NewLink>(m_g, new_nh, extra_out->dstNode());
+            m_changes.enqueue<Change::NewLink>(m_g, new_nh, extra_out->dstNode(), extra_out);
             m_changes.enqueue<Change::DropLink>(m_g, m_prod, extra_out);
         }
 
@@ -514,7 +569,7 @@ namespace
              m_cons->inEdges().end());
         for (auto extra_in : cons_extra_in_edges)
         {
-            m_changes.enqueue<Change::NewLink>(m_g, extra_in->srcNode(), new_nh);
+            m_changes.enqueue<Change::NewLink>(m_g, extra_in->srcNode(), new_nh, extra_in);
             m_changes.enqueue<Change::DropLink>(m_g, m_cons, extra_in);
         }
 
@@ -557,10 +612,10 @@ namespace
             there_was_a_merge = false;
 
             // FIXME: move this debugging to some user-controllable log level
-    #ifdef DEBUG_MERGE
+#ifdef DEBUG_MERGE
             GAPI_LOG_INFO(NULL, "Before next merge attempt " << iteration << "...");
             merge_debug(g, iteration);
-    #endif
+#endif
             iteration++;
             auto sorted = pass_helpers::topoSort(im);
             for (auto nh : sorted)
@@ -600,9 +655,9 @@ namespace
                                           "merge(" << l_obj->name() << "," << r_obj->name() <<
                                           ") was successful!");
                             action.commit();
-    #ifdef DEBUG_MERGE
+#ifdef DEBUG_MERGE
                             GIslandModel::syncIslandTags(gim, g);
-    #endif
+#endif
                             there_was_a_merge = true;
                             break; // start do{}while from the beginning
                         }
diff --git a/modules/gapi/src/compiler/passes/intrin.cpp b/modules/gapi/src/compiler/passes/intrin.cpp
new file mode 100644
index 0000000000..5d2707570a
--- /dev/null
+++ b/modules/gapi/src/compiler/passes/intrin.cpp
@@ -0,0 +1,305 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+
+#include "precomp.hpp"
+
+#include <ade/util/algorithm.hpp>
+#include <ade/util/zip_range.hpp>
+#include <opencv2/gapi/streaming/desync.hpp>// GDesync intrinsic
+
+#include "compiler/gmodel.hpp"
+#include "compiler/passes/passes.hpp"
+
+namespace desync {
+namespace {
+
+// Drop the desynchronized node `nh` from the graph, reconnect the
+// graph structure properly.  This is a helper function which is used
+// in both drop(g) and apply(g) passes.
+//
+// @return a vector of new edge handles connecting the "main" graph
+// with its desynchronized part.
+std::vector<ade::EdgeHandle> drop(cv::gimpl::GModel::Graph &g,
+                                  ade::NodeHandle nh) {
+    using namespace cv::gimpl;
+
+    // What we need to do here:
+    // 1. Connect the readers of its produced data objects
+    //    to the input data objects of desync;
+    // 2. Drop the data object it produces.
+    // 3. Drop the desync operation itself;
+    std::vector<ade::NodeHandle> in_data_objs = GModel::orderedInputs(g, nh);
+    std::vector<ade::NodeHandle> out_data_objs = GModel::orderedOutputs(g, nh);
+    std::vector<ade::EdgeHandle> new_links;
+    GAPI_Assert(in_data_objs.size() == out_data_objs.size());
+    GAPI_DbgAssert(ade::util::all_of
+                   (out_data_objs,
+                    [&](const ade::NodeHandle &oh) {
+                       return g.metadata(oh).contains<Data>();
+                   }));
+    // (1)
+    for (auto &&it: ade::util::zip(ade::util::toRange(in_data_objs),
+                                   ade::util::toRange(out_data_objs))) {
+        auto these_new_links = GModel::redirectReaders(g,
+                                                       std::get<1>(it),
+                                                       std::get<0>(it));
+        new_links.insert(new_links.end(),
+                         these_new_links.begin(),
+                         these_new_links.end());
+    }
+    // (2)
+    for (auto &&old_out_nh : out_data_objs) {
+        g.erase(old_out_nh);
+    }
+    // (3)
+    g.erase(nh);
+
+    return new_links;
+}
+
+// Tracing a desynchronizing subgraph is somewhat tricky and happens
+// in both directions: downwards and upwards.
+//
+// The downward process is the basic one: we start with a "desync"
+// OP node and go down to the graph using the "output" edges. We check
+// if all nodes on this path [can] belong to this desynchronized path
+// and don't overlap with others.
+//
+// An important contract to maintain is that the desynchronized part
+// can't have any input references from the "main" graph part or any
+// other desynchronized part in the graph. This contract is validated
+// by checking every node's input which must belong to the same
+// desynchronized part.
+//
+// Here is the pitfall of this check:
+//
+//       v
+//     GMat_0
+//       v
+//   +----------+
+//   | desync() |      <- This point originates the traceDown process
+//   +----------+
+//       v
+//     GMat_0'         <- This node will be tagged for this desync at
+//       :--------.       step 0/1
+//       v        :    <- The order how output nodes are visited is not
+//   +----------+ :       specified, we can visit Op2() first (as there
+//   | Op1()    | :       is a direct link) bypassing visiting and tagging
+//   +----------+ :       Op1() and GMat_1
+//       v        :
+//     GMat_1     :
+//       :    .---'
+//       v    v        <- When we visit Op2() via the 2nd edge on this
+//   +----------+         graph, we check if all inputs belong to the same
+//   | Op2()    |         desynchronized graph and GMat_1 fails this check
+//   +----------+         (since the traceDown() process haven't visited
+//                        it yet).
+//
+// Cases like this originate the traceUp() process: if we find an
+// input node in our desynchronized path which doesn't belong to this
+// path YET, it is not 100% a problem, and we need to trace it back
+// (upwards) to see if it is really a case.
+
+// This recursive function checks the desync_id in the graph upwards.
+// The process doesn't continue for nodes which have a valid
+// desync_id already.
+// The process only continues for nodes which have no desync_id
+// assigned. If there's no such nodes anymore, the procedure is
+// considered complete and a list of nodes to tag is returned to the
+// caller.
+//
+// If NO inputs of this node have a valid desync_id, the desync
+// invariant is broken and the function throws.
+void traceUp(cv::gimpl::GModel::Graph &g,
+             const ade::NodeHandle &nh,
+             int desync_id,
+             std::vector<ade::NodeHandle> &path) {
+    using namespace cv::gimpl;
+
+    GAPI_Assert(!nh->inNodes().empty()
+                && "traceUp: a desynchronized part of the graph is not isolated?");
+
+    if (g.metadata(nh).contains<DesyncPath>()) {
+        // We may face nodes which have DesyncPath already visited during
+        // this recursive process (e.g. via some other output or branch in the
+        // subgraph)
+        if (g.metadata(nh).get<DesyncPath>().index != desync_id) {
+            GAPI_Assert(false && "Desynchronization can't be nested!");
+        }
+        return; // This object belongs to the desync path - exit early.
+    }
+
+    // Regardless of the result, put this nh to the path
+    path.push_back(nh);
+
+    // Check if the input nodes are OK
+    std::vector<ade::NodeHandle> nodes_to_trace;
+    nodes_to_trace.reserve(nh->inNodes().size());
+    for (auto &&in_nh : nh->inNodes()) {
+        if (g.metadata(in_nh).contains<DesyncPath>()) {
+            // We may face nodes which have DesyncPath already visited during
+            // this recursive process (e.g. via some other output or branch in the
+            // subgraph)
+            GAPI_Assert(g.metadata(in_nh).get<DesyncPath>().index == desync_id
+                        && "Desynchronization can't be nested!");
+        } else {
+            nodes_to_trace.push_back(in_nh);
+        }
+    }
+
+    // If there are nodes to trace, continue the recursion
+    for (auto &&up_nh : nodes_to_trace) {
+        traceUp(g, up_nh, desync_id, path);
+    }
+}
+
+// This recursive function propagates the desync_id down to the graph
+// starting at nh, and also checks:
+// - if this desync path is isolated;
+// - if this desync path is not overlapped.
+// It also originates the traceUp() process at the points of
+// uncertainty (as described in the comment above).
+void traceDown(cv::gimpl::GModel::Graph &g,
+               const ade::NodeHandle &nh,
+               int desync_id) {
+    using namespace cv::gimpl;
+
+    if (g.metadata(nh).contains<DesyncPath>()) {
+        // We may face nodes which have DesyncPath already visited during
+        // this recursive process (e.g. via some other output or branch in the
+        // subgraph)
+        GAPI_Assert(g.metadata(nh).get<DesyncPath>().index == desync_id
+                    && "Desynchronization can't be nested!");
+    } else {
+        g.metadata(nh).set(DesyncPath{desync_id});
+    }
+
+    // All inputs of this data object must belong to the same
+    // desync path.
+    for (auto &&in_nh : nh->inNodes()) {
+        // If an input object is not assigned to this desync path,
+        // it does not means that the object doesn't belong to
+        // this path. Check it.
+        std::vector<ade::NodeHandle> path_up;
+        traceUp(g, in_nh, desync_id, path_up);
+        // We get here on success. Just set the proper tags for
+        // the identified input path.
+        for (auto &&up_nh : path_up) {
+            g.metadata(up_nh).set(DesyncPath{desync_id});
+        }
+    }
+
+    // Propagate the tag & check down
+    for (auto &&out_nh : nh->outNodes()) {
+        traceDown(g, out_nh, desync_id);
+    }
+}
+
+// Streaming case: ensure the graph has proper isolation of the
+// desynchronized parts, set proper Edge metadata hints for
+// GStreamingExecutable
+void apply(cv::gimpl::GModel::Graph &g) {
+    using namespace cv::gimpl;
+
+    // Stage 0. Trace down the desync operations in the graph.
+    // Tag them with their unique (per graph) identifiers.
+    int total_desync = 0;
+    for (auto &&nh : g.nodes()) {
+        if (g.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = g.metadata(nh).get<Op>();
+            if (op.k.name == cv::gapi::streaming::detail::GDesync::id()) {
+                GAPI_Assert(!g.metadata(nh).contains<DesyncPath>()
+                            && "Desynchronization can't be nested!");
+                const int this_desync_id = total_desync++;
+                g.metadata(nh).set(DesyncPath{this_desync_id});
+                for (auto &&out_nh: nh->outNodes()) {
+                    traceDown(g, out_nh, this_desync_id);
+                }
+            } // if (desync)
+        } // if(OP)
+    } // for(nodes)
+
+    // Tracing is done for all desync ops in the graph now.
+    // Stage 1. Drop the desync operations from the graph, but mark
+    // the desynchronized edges a special way.
+    // The desynchronized edge is the edge which connects a main
+    // subgraph data with a desynchronized subgraph data.
+    std::vector<ade::NodeHandle> nodes(g.nodes().begin(), g.nodes().end());
+    for (auto &&nh : nodes) {
+        if (nh == nullptr) {
+            // Some nodes could be dropped already during the procedure
+            // thanks ADE their NodeHandles updated automatically
+            continue;
+        }
+        if (g.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = g.metadata(nh).get<Op>();
+            if (op.k.name == cv::gapi::streaming::detail::GDesync::id()) {
+                auto index = g.metadata(nh).get<DesyncPath>().index;
+                auto new_links = drop(g, nh);
+                for (auto &&eh : new_links) {
+                    g.metadata(eh).set(DesyncEdge{index});
+                }
+            } // if (desync)
+        } // if (Op)
+    } // for(nodes)
+
+    // Stage 2. Put a synchronized tag if there were changes applied
+    if (total_desync > 0) {
+        g.metadata().set(Desynchronized{});
+    }
+}
+
+// Probably the simplest case: desync makes no sense in the regular
+// compilation process, so just drop all its occurences in the graph,
+// reconnecting nodes properly.
+void drop(cv::gimpl::GModel::Graph &g) {
+    // FIXME: LOG here that we're dropping the desync operations as
+    // they have no sense when compiling in the regular mode.
+    using namespace cv::gimpl;
+    std::vector<ade::NodeHandle> nodes(g.nodes().begin(), g.nodes().end());
+    for (auto &&nh : nodes) {
+        if (nh == nullptr) {
+            // Some nodes could be dropped already during the procedure
+            // thanks ADE their NodeHandles updated automatically
+            continue;
+        }
+        if (g.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = g.metadata(nh).get<Op>();
+            if (op.k.name == cv::gapi::streaming::detail::GDesync::id()) {
+                drop(g, nh);
+            } // if (desync)
+        } // if (Op)
+    } // for(nodes)
+}
+
+} // anonymous namespace
+} // namespace desync
+
+void cv::gimpl::passes::intrinDesync(ade::passes::PassContext &ctx) {
+    GModel::Graph gr(ctx.graph);
+    if (!gr.metadata().contains<HasIntrinsics>())
+        return;
+
+    gr.metadata().contains<Streaming>()
+        ? desync::apply(gr) // Streaming compilation
+        : desync::drop(gr); // Regular compilation
+}
+
+// Clears the HasIntrinsics flag if all intrinsics have been handled.
+void cv::gimpl::passes::intrinFinalize(ade::passes::PassContext &ctx) {
+    GModel::Graph gr(ctx.graph);
+    for (auto &&nh : gr.nodes()) {
+        if (gr.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = gr.metadata(nh).get<Op>();
+            if (is_intrinsic(op.k.name)) {
+                return;
+            }
+        }
+    }
+    // If reached here, really clear the flag
+    gr.metadata().erase<HasIntrinsics>();
+}
diff --git a/modules/gapi/src/compiler/passes/kernels.cpp b/modules/gapi/src/compiler/passes/kernels.cpp
index 69b339fb1e..837e21f19a 100644
--- a/modules/gapi/src/compiler/passes/kernels.cpp
+++ b/modules/gapi/src/compiler/passes/kernels.cpp
@@ -14,6 +14,7 @@
 #include <opencv2/gapi/gcompoundkernel.hpp> // compound::backend()
 #include <opencv2/gapi/gkernel.hpp>         // GKernelPackage
 #include <opencv2/gapi/infer.hpp>           // GNetPackage
+#include <opencv2/gapi/streaming/desync.hpp>// GDesync intrinsic
 
 #include "compiler/gmodel.hpp"
 #include "compiler/passes/passes.hpp"
@@ -24,6 +25,20 @@
 #include "logger.hpp"    // GAPI_LOG
 #include "api/gproto_priv.hpp" // is_dynamic, rewrap
 
+namespace
+{
+    // FIXME: This may be not the right design choice, but so far it works
+    const std::vector<std::string> known_intrinsics = {
+        cv::gapi::streaming::detail::GDesync::id()
+    };
+}
+bool cv::gimpl::is_intrinsic(const std::string &s) {
+    // FIXME: This search might be better in time once we start using string
+    return std::find(known_intrinsics.begin(),
+                     known_intrinsics.end(),
+                     s) != known_intrinsics.end();
+}
+
 namespace
 {
     struct ImplInfo
@@ -126,12 +141,18 @@ void cv::gimpl::passes::bindNetParams(ade::passes::PassContext &ctx,
                 continue;
 
             pgr.metadata(nh).set(NetworkParams{it->params});
+            op.backend = it->backend;
         }
     }
 }
 
-// This pass, given the kernel package, selects a kernel implementation
-// for every operation in the graph
+// This pass, given the kernel package, selects a kernel
+// implementation for every operation in the graph
+//
+// Starting OpenCV 4.3, G-API may have some special "intrinsic"
+// operations.  Those can be implemented by backends as regular
+// kernels, but if not, they are handled by the framework itself in
+// its optimization/execution passes.
 void cv::gimpl::passes::resolveKernels(ade::passes::PassContext   &ctx,
                                        const gapi::GKernelPackage &kernels)
 {
@@ -142,14 +163,44 @@ void cv::gimpl::passes::resolveKernels(ade::passes::PassContext   &ctx,
     {
         if (gr.metadata(nh).get<NodeType>().t == NodeType::OP)
         {
+            // If the operation is known to be intrinsic and is NOT
+            // implemented in the package, just skip it - there should
+            // be some pass which handles it.
             auto &op = gr.metadata(nh).get<Op>();
-            cv::gapi::GBackend selected_backend;
-            cv::GKernelImpl    selected_impl;
-            std::tie(selected_backend, selected_impl) = kernels.lookup(op.k.name);
+            if (is_intrinsic(op.k.name) && !kernels.includesAPI(op.k.name)) {
+                gr.metadata().set(HasIntrinsics{});
+                continue;
+            }
+            // FIXME: And this logic is terribly wrong. The right
+            // thing is to assign an intrinsic to a particular island
+            // if and only if it is:
+            // (a) surrounded by nodes of backend X, AND
+            // (b) is supported by backend X.
+            // Here we may have multiple backends supporting an
+            // intrinsic but only one of those gets selected.  And
+            // this is exactly a situation we need multiple versions
+            // of the same kernel to be presented in the kernel
+            // package (as it was designed originally).
 
-            selected_backend.priv().unpackKernel(ctx.graph, nh, selected_impl);
-            op.backend = selected_backend;
-            active_backends.insert(selected_backend);
+            cv::GKernelImpl selected_impl;
+
+            if (op.backend == cv::gapi::GBackend()) {
+                std::tie(op.backend, selected_impl) = kernels.lookup(op.k.name);
+            } else {
+                // FIXME: This needs to be reworked properly
+                // Lookup for implementation from the pre-assinged backend
+                cv::gapi::GBackend dummy;
+                std::tie(dummy, selected_impl) = op.backend.priv()
+                    .auxiliaryKernels().lookup(op.k.name);
+                // FIXME: Warning here!
+                // This situation may happen when NN (infer) backend was assigned
+                // by tag in bindNetParams (see above) but at this stage the operation
+                // lookup resulted in another backend (and it is perfectly valid when
+                // we have multiple NN backends available).
+            }
+
+            op.backend.priv().unpackKernel(ctx.graph, nh, selected_impl);
+            active_backends.insert(op.backend);
 
             if (gr.metadata().contains<Deserialized>())
             {
@@ -181,6 +232,12 @@ void cv::gimpl::passes::expandKernels(ade::passes::PassContext &ctx, const gapi:
             if (gr.metadata(nh).get<NodeType>().t == NodeType::OP)
             {
                 const auto& op = gr.metadata(nh).get<Op>();
+                // FIXME: Essentially the same problem as in the above resolveKernels
+                if (is_intrinsic(op.k.name) && !kernels.includesAPI(op.k.name)) {
+                    // Note: There's no need to set HasIntrinsics flag here
+                    // since resolveKernels would do it later.
+                    continue;
+                }
 
                 cv::gapi::GBackend selected_backend;
                 cv::GKernelImpl    selected_impl;
diff --git a/modules/gapi/src/compiler/passes/passes.hpp b/modules/gapi/src/compiler/passes/passes.hpp
index 84142fc055..8f187f6bb7 100644
--- a/modules/gapi/src/compiler/passes/passes.hpp
+++ b/modules/gapi/src/compiler/passes/passes.hpp
@@ -31,7 +31,11 @@ namespace gapi {
     struct GNetPackage;
 }  // namespace gapi
 
-namespace gimpl { namespace passes {
+namespace gimpl {
+
+bool is_intrinsic(const std::string &op_name);
+
+namespace passes {
 
 void dumpDot(const ade::Graph &g, std::ostream& os);
 void dumpDot(ade::passes::PassContext &ctx, std::ostream& os);
@@ -66,6 +70,9 @@ void applyTransformations(ade::passes::PassContext &ctx,
 
 void addStreaming(ade::passes::PassContext &ctx);
 
+void intrinDesync(ade::passes::PassContext &ctx);
+void intrinFinalize(ade::passes::PassContext &ctx);
+
 }} // namespace gimpl::passes
 
 } // namespace cv
diff --git a/modules/gapi/src/compiler/transactions.hpp b/modules/gapi/src/compiler/transactions.hpp
index 54af8a6e69..bdc1723e19 100644
--- a/modules/gapi/src/compiler/transactions.hpp
+++ b/modules/gapi/src/compiler/transactions.hpp
@@ -14,6 +14,7 @@
 
 #include <ade/graph.hpp>
 
+#include "opencv2/gapi/util/util.hpp" // Seq
 #include "opencv2/gapi/own/assert.hpp"
 
 enum class Direction: int {Invalid, In, Out};
@@ -21,8 +22,50 @@ enum class Direction: int {Invalid, In, Out};
 ////////////////////////////////////////////////////////////////////////////
 ////
 // TODO: Probably it can be moved to ADE
+template<class H, class... Metatypes>
+class Preserved
+{
+    using S = typename cv::detail::MkSeq<sizeof...(Metatypes)>::type;
+    std::tuple<cv::util::optional<Metatypes>...> m_data;
 
-namespace Change
+    template<class T>
+    cv::util::optional<T> get(ade::ConstTypedGraph<Metatypes...> g, H h) {
+        return g.metadata(h).template contains<T>()
+            ? cv::util::make_optional(g.metadata(h).template get<T>())
+            : cv::util::optional<T>{};
+    }
+    template<std::size_t Id>
+    int set(ade::TypedGraph<Metatypes...> &g, H &h) {
+        const auto &opt = std::get<Id>(m_data);
+        if (opt.has_value())
+            g.metadata(h).set(opt.value());
+        return 0;
+    }
+    template<int... IIs>
+    void copyTo_impl(ade::TypedGraph<Metatypes...> &g, H h, cv::detail::Seq<IIs...>) {
+        int unused[] = {0, set<IIs>(g, h)...};
+        (void) unused;
+    }
+public:
+    Preserved(const ade::Graph &g, H h) {
+        ade::ConstTypedGraph<Metatypes...> tg(g);
+        m_data = std::make_tuple(get<Metatypes>(tg, h)...);
+    }
+    void copyTo(ade::Graph &g, H h) {
+        ade::TypedGraph<Metatypes...> tg(g);
+        copyTo_impl(tg, h, S{});
+    }
+};
+// Do nothing if there's no metadata
+template<class H>
+class Preserved<H> {
+public:
+    Preserved(const ade::Graph &, H) {}
+    void copyTo(ade::Graph &, H) {}
+};
+
+template<class... Metatypes>
+struct ChangeT
 {
     struct Base
     {
@@ -31,6 +74,8 @@ namespace Change
         virtual ~Base() = default;
     };
 
+    template<typename H> using Preserved = ::Preserved<H, Metatypes...>;
+
     class NodeCreated final: public Base
     {
         ade::NodeHandle m_node;
@@ -39,11 +84,7 @@ namespace Change
         virtual void rollback(ade::Graph &g) override { g.erase(m_node); }
     };
 
-    // NB: Drops all metadata stored in the EdgeHandle,
-    // which is not restored even in the rollback
-
-    // FIXME: either add a way for users to preserve meta manually
-    // or extend ADE to manipulate with meta such way
+    // FIXME: maybe extend ADE to clone/copy the whole metadata?
     class DropLink final: public Base
     {
         ade::NodeHandle m_node;
@@ -51,13 +92,15 @@ namespace Change
 
         ade::NodeHandle m_sibling;
 
+        Preserved<ade::EdgeHandle> m_meta;
+
     public:
         DropLink(ade::Graph &g,
                  const ade::NodeHandle &node,
                  const ade::EdgeHandle &edge)
-            : m_node(node), m_dir(node == edge->srcNode()
-                                  ? Direction::Out
-                                  : Direction::In)
+            : m_node(node)
+            , m_dir(node == edge->srcNode() ? Direction::Out : Direction::In)
+            , m_meta(g, edge)
         {
             m_sibling = (m_dir == Direction::In
                          ? edge->srcNode()
@@ -67,12 +110,17 @@ namespace Change
 
         virtual void rollback(ade::Graph &g) override
         {
+            // FIXME: Need to preserve metadata here!
+            // GIslandModel edges now have metadata
+            ade::EdgeHandle eh;
             switch(m_dir)
             {
-            case Direction::In:  g.link(m_sibling, m_node); break;
-            case Direction::Out: g.link(m_node, m_sibling); break;
+            case Direction::In:  eh = g.link(m_sibling, m_node); break;
+            case Direction::Out: eh = g.link(m_node, m_sibling); break;
             default: GAPI_Assert(false);
             }
+            GAPI_Assert(eh != nullptr);
+            m_meta.copyTo(g, eh);
         }
     };
 
@@ -82,10 +130,15 @@ namespace Change
 
     public:
         NewLink(ade::Graph &g,
-                  const ade::NodeHandle &prod,
-                  const ade::NodeHandle &cons)
+                const ade::NodeHandle &prod,
+                const ade::NodeHandle &cons,
+                const ade::EdgeHandle &copy_from = ade::EdgeHandle())
             : m_edge(g.link(prod, cons))
         {
+            if (copy_from != nullptr)
+            {
+                Preserved<ade::EdgeHandle>(g, copy_from).copyTo(g, m_edge);
+            }
         }
 
         virtual void rollback(ade::Graph &g) override
@@ -141,7 +194,7 @@ namespace Change
             }
         }
     };
-} // namespace Change
+}; // struct Change
 ////////////////////////////////////////////////////////////////////////////
 
 #endif // OPENCV_GAPI_COMPILER_TRANSACTIONS_HPP
diff --git a/modules/gapi/src/executor/conc_queue.hpp b/modules/gapi/src/executor/conc_queue.hpp
index 5de50ef34b..9875e8245a 100644
--- a/modules/gapi/src/executor/conc_queue.hpp
+++ b/modules/gapi/src/executor/conc_queue.hpp
@@ -119,8 +119,7 @@ void concurrent_bounded_queue<T>::set_capacity(std::size_t capacity) {
 // Clear the queue. Similar to the TBB version, this method is not
 // thread-safe.
 template<typename T>
-void concurrent_bounded_queue<T>::clear()
-{
+void concurrent_bounded_queue<T>::clear() {
     m_data = std::queue<T>{};
 }
 
diff --git a/modules/gapi/src/executor/gexecutor.cpp b/modules/gapi/src/executor/gexecutor.cpp
index d9f5cfafe6..66f3b24771 100644
--- a/modules/gapi/src/executor/gexecutor.cpp
+++ b/modules/gapi/src/executor/gexecutor.cpp
@@ -12,6 +12,8 @@
 #include <ade/util/zip_range.hpp>
 
 #include <opencv2/gapi/opencv_includes.hpp>
+
+#include "api/gproto_priv.hpp" // ptr(GRunArgP)
 #include "executor/gexecutor.hpp"
 #include "compiler/passes/passes.hpp"
 
@@ -105,6 +107,9 @@ void bindInArgExec(Mag& mag, const RcDesc &rc, const GRunArg &arg)
         mag_rmat = util::get<cv::RMat>(arg); break;
     default: util::throw_error(std::logic_error("content type of the runtime argument does not match to resource description ?"));
     }
+    // FIXME: has to take extra care about meta here for this particuluar
+    // case, just because this function exists at all
+    mag.meta<cv::RMat>()[rc.id] = arg.meta;
 }
 
 void bindOutArgExec(Mag& mag, const RcDesc &rc, const GRunArgP &arg)
@@ -131,7 +136,7 @@ cv::GRunArgP getObjPtrExec(Mag& mag, const RcDesc &rc)
     {
         return getObjPtr(mag, rc);
     }
-    return GRunArgP(&mag.template slot<cv::RMat>()[rc.id]);
+    return GRunArgP(&mag.slot<cv::RMat>()[rc.id]);
 }
 
 void writeBackExec(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg)
@@ -155,6 +160,25 @@ void writeBackExec(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg)
     default: util::throw_error(std::logic_error("content type of the runtime argument does not match to resource description ?"));
     }
 }
+
+void assignMetaStubExec(Mag& mag, const RcDesc &rc, const cv::GRunArg::Meta &meta) {
+    switch (rc.shape)
+    {
+    case GShape::GARRAY:  mag.meta<cv::detail::VectorRef>()[rc.id] = meta; break;
+    case GShape::GOPAQUE: mag.meta<cv::detail::OpaqueRef>()[rc.id] = meta; break;
+    case GShape::GSCALAR: mag.meta<cv::Scalar>()[rc.id]            = meta; break;
+    case GShape::GFRAME:  mag.meta<cv::MediaFrame>()[rc.id]        = meta; break;
+    case GShape::GMAT:
+        mag.meta<cv::Mat>() [rc.id] = meta;
+        mag.meta<cv::RMat>()[rc.id] = meta;
+#if !defined(GAPI_STANDALONE)
+        mag.meta<cv::UMat>()[rc.id] = meta;
+#endif
+        break;
+    default: util::throw_error(std::logic_error("Unsupported GShape type")); break;
+    }
+}
+
 } // anonymous namespace
 }}} // namespace cv::gimpl::magazine
 
@@ -231,11 +255,28 @@ public:
 class cv::gimpl::GExecutor::Output final: public cv::gimpl::GIslandExecutable::IOutput
 {
     cv::gimpl::Mag &mag;
-    virtual GRunArgP get(int idx) override { return magazine::getObjPtrExec(mag, desc()[idx]); }
-    virtual void post(GRunArgP&&) override { } // Do nothing here
-    virtual void post(EndOfStream&&) override {} // Do nothing here too
+    std::unordered_map<const void*, int> out_idx;
+
+    GRunArgP get(int idx) override
+    {
+        auto r = magazine::getObjPtrExec(mag, desc()[idx]);
+        // Remember the output port for this output object
+        out_idx[cv::gimpl::proto::ptr(r)] = idx;
+        return r;
+    }
+    void post(GRunArgP&&) override { } // Do nothing here
+    void post(EndOfStream&&) override {} // Do nothing here too
+    void meta(const GRunArgP &out, const GRunArg::Meta &m) override
+    {
+        const auto idx = out_idx.at(cv::gimpl::proto::ptr(out));
+        magazine::assignMetaStubExec(mag, desc()[idx], m);
+    }
 public:
-    Output(cv::gimpl::Mag &m, const std::vector<RcDesc> &rcs) : mag(m) { set(rcs); }
+    Output(cv::gimpl::Mag &m, const std::vector<RcDesc> &rcs)
+        : mag(m)
+    {
+        set(rcs);
+    }
 };
 
 void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args)
@@ -330,7 +371,7 @@ void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args)
     // Run the script
     for (auto &op : m_ops)
     {
-        // (5)
+        // (5), (6)
         Input i{m_res, op.in_objects};
         Output o{m_res, op.out_objects};
         op.isl_exec->run(i, o);
diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp
index afdebee020..70686699d0 100644
--- a/modules/gapi/src/executor/gstreamingexecutor.cpp
+++ b/modules/gapi/src/executor/gstreamingexecutor.cpp
@@ -6,12 +6,17 @@
 
 #include "precomp.hpp"
 
+#include <memory> // make_shared
 #include <iostream>
 
 #include <ade/util/zip_range.hpp>
 
 #include <opencv2/gapi/opencv_includes.hpp>
 
+#if !defined(GAPI_STANDALONE)
+#include <opencv2/gapi/core.hpp> // GCopy -- FIXME - to be removed!
+#endif // GAPI_STANDALONE
+
 #include "api/gproto_priv.hpp" // ptr(GRunArgP)
 #include "compiler/passes/passes.hpp"
 #include "backends/common/gbackend.hpp" // createMat
@@ -60,14 +65,27 @@ public:
 
 struct DataQueue {
     static const char *name() { return "StreamingDataQueue"; }
+    enum tag { DESYNC }; // Enum of 1 element: purely a syntax sugar
 
     explicit DataQueue(std::size_t capacity) {
-        if (capacity) {
-            q.set_capacity(capacity);
+        // Note: `ptr` is shared<SyncQueue>, while the `q` is a shared<Q>
+        auto ptr = std::make_shared<cv::gimpl::stream::SyncQueue>();
+        if (capacity != 0) {
+            ptr->set_capacity(capacity);
         }
+        q = std::move(ptr);
+    }
+    explicit DataQueue(tag t)
+        : q(new cv::gimpl::stream::DesyncQueue()) {
+        GAPI_Assert(t == DESYNC);
     }
 
-    cv::gimpl::stream::Q q;
+    // FIXME: ADE metadata requires types to be copiable
+    std::shared_ptr<cv::gimpl::stream::Q> q;
+};
+
+struct DesyncSpecialCase {
+    static const char *name() { return "DesyncSpecialCase"; }
 };
 
 std::vector<cv::gimpl::stream::Q*> reader_queues(      ade::Graph &g,
@@ -77,7 +95,7 @@ std::vector<cv::gimpl::stream::Q*> reader_queues(      ade::Graph &g,
     std::vector<cv::gimpl::stream::Q*> result;
     for (auto &&out_eh : obj->outEdges())
     {
-        result.push_back(&qgr.metadata(out_eh).get<DataQueue>().q);
+        result.push_back(qgr.metadata(out_eh).get<DataQueue>().q.get());
     }
     return result;
 }
@@ -90,7 +108,7 @@ std::vector<cv::gimpl::stream::Q*> input_queues(      ade::Graph &g,
     for (auto &&in_eh : obj->inEdges())
     {
         result.push_back(qgr.metadata(in_eh).contains<DataQueue>()
-                         ? &qgr.metadata(in_eh).get<DataQueue>().q
+                         ? qgr.metadata(in_eh).get<DataQueue>().q.get()
                          : nullptr);
     }
     return result;
@@ -133,6 +151,77 @@ void sync_data(cv::GRunArgs &results, cv::GRunArgsP &outputs)
     }
 }
 
+// FIXME: Is there a way to derive function from its GRunArgsP version?
+template<class C> using O = cv::util::optional<C>;
+void sync_data(cv::gimpl::stream::Result &r, cv::GOptRunArgsP &outputs)
+{
+    namespace own = cv::gapi::own;
+
+    for (auto && it : ade::util::zip(ade::util::toRange(outputs),
+                                     ade::util::toRange(r.args),
+                                     ade::util::toRange(r.flags)))
+    {
+        auto &out_obj  = std::get<0>(it);
+        auto &res_obj  = std::get<1>(it);
+        bool available = std::get<2>(it);
+
+        using T = cv::GOptRunArgP;
+#define HANDLE_CASE(Type)                                               \
+        case T::index_of<O<Type>*>():                                   \
+            if (available) {                                            \
+                *cv::util::get<O<Type>*>(out_obj)                       \
+                    = cv::util::make_optional(std::move(cv::util::get<Type>(res_obj))); \
+            } else {                                                    \
+                cv::util::get<O<Type>*>(out_obj)->reset();              \
+            }
+
+        // FIXME: this conversion should be unified
+        switch (out_obj.index())
+        {
+            HANDLE_CASE(cv::Scalar); break;
+            HANDLE_CASE(cv::RMat);   break;
+
+        case T::index_of<O<cv::Mat>*>(): {
+            // Mat: special handling.
+            auto &mat_opt = *cv::util::get<O<cv::Mat>*>(out_obj);
+            if (available) {
+                auto q_map = cv::util::get<cv::RMat>(res_obj).access(cv::RMat::Access::R);
+                // FIXME: Copy! Maybe we could do some optimization for this case!
+                // e.g. don't handle RMat for last ilsand in the graph.
+                // It is not always possible though.
+                mat_opt = cv::util::make_optional(cv::gimpl::asMat(q_map).clone());
+            } else {
+                mat_opt.reset();
+            }
+        } break;
+        case T::index_of<cv::detail::OptionalVectorRef>(): {
+            // std::vector<>: special handling
+            auto &vec_opt = cv::util::get<cv::detail::OptionalVectorRef>(out_obj);
+            if (available) {
+                vec_opt.mov(cv::util::get<cv::detail::VectorRef>(res_obj));
+            } else {
+                vec_opt.reset();
+            }
+        } break;
+        case T::index_of<cv::detail::OptionalOpaqueRef>(): {
+            // std::vector<>: special handling
+            auto &opq_opt = cv::util::get<cv::detail::OptionalOpaqueRef>(out_obj);
+            if (available) {
+                opq_opt.mov(cv::util::get<cv::detail::OpaqueRef>(res_obj));
+            } else {
+                opq_opt.reset();
+            }
+        } break;
+        default:
+            // ...maybe because of STANDALONE mode.
+            GAPI_Assert(false && "This value type is not supported!");
+            break;
+        }
+    }
+#undef HANDLE_CASE
+}
+
+
 // Pops an item from every input queue and combine it to the final
 // result.  Blocks the current thread.  Returns true if the vector has
 // been obtained successfully and false if a Stop message has been
@@ -206,12 +295,39 @@ class QueueReader
     bool m_finishing = false; // Set to true once a "soft" stop is received
     std::vector<Cmd> m_cmd;
 
+    void rewindToStop(std::vector<Q*>   &in_queues,
+                      const std::size_t  this_id);
+
 public:
-    bool getInputVector(std::vector<Q*> &in_queues,
-                        cv::GRunArgs    &in_constants,
-                        cv::GRunArgs    &isl_inputs);
+    bool getInputVector  (std::vector<Q*>   &in_queues,
+                          cv::GRunArgs      &in_constants,
+                          cv::GRunArgs      &isl_inputs);
+
+    bool getResultsVector(std::vector<Q*>         &in_queues,
+                          const std::vector<int>  &in_mapping,
+                          const std::size_t        out_size,
+                          cv::GRunArgs            &out_results);
 };
 
+// This method handles a stop sign got from some input
+// island. Reiterate through all _remaining valid_ queues (some of
+// them can be set to nullptr already -- see handling in
+// getInputVector) and rewind data to every Stop sign per queue.
+void QueueReader::rewindToStop(std::vector<Q*>   &in_queues,
+                               const std::size_t  this_id)
+{
+    for (auto &&qit : ade::util::indexed(in_queues))
+    {
+        auto id2 = ade::util::index(qit);
+        auto &q2 = ade::util::value(qit);
+        if (this_id == id2) continue;
+
+        Cmd cmd;
+        while (q2 && !cv::util::holds_alternative<Stop>(cmd))
+            q2->pop(cmd);
+    }
+}
+
 bool QueueReader::getInputVector(std::vector<Q*> &in_queues,
                                  cv::GRunArgs    &in_constants,
                                  cv::GRunArgs    &isl_inputs)
@@ -234,16 +350,14 @@ bool QueueReader::getInputVector(std::vector<Q*> &in_queues,
             // value-initialized scalar)
             // It can also hold a constant value received with
             // Stop::Kind::CNST message (see above).
-            // FIXME: Variant move problem
-            isl_inputs[id] = const_cast<const cv::GRunArg&>(in_constants[id]);
+            isl_inputs[id] = in_constants[id];
             continue;
         }
 
         q->pop(m_cmd[id]);
         if (!cv::util::holds_alternative<Stop>(m_cmd[id]))
         {
-            // FIXME: Variant move problem
-            isl_inputs[id] = const_cast<const cv::GRunArg &>(cv::util::get<cv::GRunArg>(m_cmd[id]));
+            isl_inputs[id] = cv::util::get<cv::GRunArg>(m_cmd[id]);
         }
         else // A Stop sign
         {
@@ -266,25 +380,12 @@ bool QueueReader::getInputVector(std::vector<Q*> &in_queues,
                 // NEXT time (on a next call to getInputVector()), the
                 // "q==nullptr" check above will be triggered, but now
                 // we need to make it manually:
-                isl_inputs[id] = const_cast<const cv::GRunArg&>(in_constants[id]);
+                isl_inputs[id] = in_constants[id];
             }
             else
             {
                 GAPI_Assert(stop.kind == Stop::Kind::HARD);
-                // Just got a stop sign. Reiterate through all
-                // _remaining valid_ queues (some of them can be
-                // set to nullptr already -- see above) and rewind
-                // data to every Stop sign per queue
-                for (auto &&qit : ade::util::indexed(in_queues))
-                {
-                    auto id2 = ade::util::index(qit);
-                    auto &q2 = ade::util::value(qit);
-                    if (id == id2) continue;
-
-                    Cmd cmd2;
-                    while (q2 && !cv::util::holds_alternative<Stop>(cmd2))
-                        q2->pop(cmd2);
-                }
+                rewindToStop(in_queues, id);
                 // After queues are read to the proper indicator,
                 // indicate end-of-stream
                 return false;
@@ -303,6 +404,60 @@ bool QueueReader::getInputVector(std::vector<Q*> &in_queues,
     return true; // A regular case - there is data to process.
 }
 
+// This is a special method to obtain a result vector
+// for the entire pipeline's outputs.
+//
+// After introducing desync(), the pipeline output's vector
+// can be produced just partially. Also, if a desynchronized
+// path has multiple outputs for the pipeline, _these_ outputs
+// should still come synchronized to the end user (via pull())
+//
+//
+// This method handles all this.
+// It takes a number of input queues, which may or may not be
+// equal to the number of pipeline outputs (<=).
+// It also takes indexes saying which queue produces which
+// output in the resulting pipeline.
+//
+// `out_results` is always produced with the size of full output
+// vector. In the desync case, the number of in_queues will
+// be less than this size and some of the items won't be produced.
+// In the sync case, there will be a 1-1 mapping.
+//
+// In the desync case, there _will be_ multiple collector threads
+// calling this method, and pushing their whole-pipeline outputs
+// (_may be_ partially filled) to the same final output queue.
+// The receiver part at the GStreamingExecutor level won't change
+// because of that.
+bool QueueReader::getResultsVector(std::vector<Q*>   &in_queues,
+                                   const std::vector<int>  &in_mapping,
+                                   const std::size_t  out_size,
+                                   cv::GRunArgs      &out_results)
+{
+    m_cmd.resize(out_size);
+    for (auto &&it : ade::util::indexed(in_queues))
+    {
+        auto ii = ade::util::index(it);
+        auto oi = in_mapping[ii];
+        auto &q = ade::util::value(it);
+        q->pop(m_cmd[oi]);
+        if (!cv::util::holds_alternative<Stop>(m_cmd[oi]))
+        {
+            out_results[oi] = std::move(cv::util::get<cv::GRunArg>(m_cmd[oi]));
+        }
+        else // A Stop sign
+        {
+            // In theory, the CNST should never reach here.
+            // Collector thread never handles the inputs directly
+            // (collector's input queues are always produced by
+            // islands in the graph).
+            rewindToStop(in_queues, ii);
+            return false;
+        } // if(Stop)
+    } // for(in_queues)
+    return true;
+}
+
 
 // This thread is a plain dump source actor. What it do is just:
 // - Check input queue (the only one) for a control command
@@ -509,8 +664,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput
             Cmd cmd;
             if (cv::util::holds_alternative<cv::GRunArg>(post_iter->data))
             {
-                // FIXME: That ugly VARIANT problem
-                cmd = Cmd{const_cast<const cv::GRunArg&>(cv::util::get<cv::GRunArg>(post_iter->data))};
+                cmd = Cmd{cv::util::get<cv::GRunArg>(post_iter->data)};
             }
             else
             {
@@ -520,8 +674,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput
             }
             for (auto &&q : m_out_queues[out_idx])
             {
-                // FIXME: This ugly VARIANT problem
-                q->push(const_cast<const Cmd&>(cmd));
+                q->push(cmd);
             }
             post_iter = m_postings[out_idx].erase(post_iter);
         }
@@ -551,6 +704,15 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput
             }
         }
     }
+    void meta(const cv::GRunArgP &out, const cv::GRunArg::Meta &m) override
+    {
+        const auto it = m_postIdx.find(cv::gimpl::proto::ptr(out));
+        GAPI_Assert(it != m_postIdx.end());
+
+        const auto out_iter = it->second.second;
+        cv::util::get<cv::GRunArg>(out_iter->data).meta = m;
+    }
+
 public:
     explicit StreamingOutput(const cv::GMetaArgs &metas,
                              std::vector< std::vector<Q*> > &out_queues,
@@ -603,22 +765,84 @@ void islandActorThread(std::vector<cv::gimpl::RcDesc> in_rcs,                //
 // and then put the resulting vector into one single queue.  While it
 // looks redundant, it simplifies dramatically the way how try_pull()
 // is implemented - we need to check one queue instead of many.
-void collectorThread(std::vector<Q*> in_queues,
-                     Q&              out_queue)
+//
+// After desync() is added, there may be multiple collector threads
+// running, every thread producing its own part of the partial
+// pipeline output (optional<T>...). All partial outputs are pushed
+// to the same output queue and then picked by GStreamingExecutor
+// in the end.
+void collectorThread(std::vector<Q*>   in_queues,
+                     std::vector<int>  in_mapping,
+                     const std::size_t out_size,
+                     const bool        handle_stop,
+                     Q&                out_queue)
 {
+    // These flags are static now: regardless if the sync or
+    // desync branch is collected by this thread, all in_queue
+    // data should come in sync.
+    std::vector<bool> flags(out_size, false);
+    for (auto idx : in_mapping) {
+        flags[idx] = true;
+    }
+
     QueueReader qr;
     while (true)
     {
-        cv::GRunArgs this_result(in_queues.size());
-        cv::GRunArgs this_const(in_queues.size());
-        if (!qr.getInputVector(in_queues, this_const, this_result))
+        cv::GRunArgs this_result(out_size);
+        const bool ok = qr.getResultsVector(in_queues, in_mapping, out_size, this_result);
+        if (!ok)
         {
-            out_queue.push(Cmd{Stop{}});
+            if (handle_stop)
+            {
+                out_queue.push(Cmd{Stop{}});
+            }
+            // Terminate the thread anyway
             return;
         }
-        out_queue.push(Cmd{this_result});
+        out_queue.push(Cmd{Result{std::move(this_result), flags}});
     }
 }
+
+void check_DesyncObjectConsumedByMultipleIslands(const cv::gimpl::GIslandModel::Graph &gim) {
+    using namespace cv::gimpl;
+
+    // Since the limitation exists only in this particular
+    // implementation, the check is also done only here but not at the
+    // graph compiler level.
+    //
+    // See comment in desync(GMat) src/api/kernels_streaming.cpp for details.
+    for (auto &&nh : gim.nodes()) {
+        if (gim.metadata(nh).get<NodeKind>().k == NodeKind::SLOT) {
+            // SLOTs are read by ISLANDs, so look for the metadata
+            // of the outbound edges
+            std::unordered_map<int, GIsland*> out_desync_islands;
+            for (auto &&out_eh : nh->outEdges()) {
+                if (gim.metadata(out_eh).contains<DesyncIslEdge>()) {
+                    // This is a desynchronized edge
+                    // Look what Island it leads to
+                    const auto out_desync_idx = gim.metadata(out_eh)
+                        .get<DesyncIslEdge>().index;
+                    const auto out_island = gim.metadata(out_eh->dstNode())
+                        .get<FusedIsland>().object;
+
+                    auto it = out_desync_islands.find(out_desync_idx);
+                    if (it != out_desync_islands.end()) {
+                        // If there's already an edge with this desync
+                        // id, it must point to the same island object
+                        GAPI_Assert(it->second == out_island.get()
+                                    && "A single desync object may only be used by a single island!");
+                    } else {
+                        // Store the island pointer for the further check
+                        out_desync_islands[out_desync_idx] = out_island.get();
+                    }
+                } // if(desync)
+            } // for(out_eh)
+            // There must be only one backend in the end of the day
+            // (under this desync path)
+        } // if(SLOT)
+    } // for(nodes)
+}
+
 } // anonymous namespace
 
 // GStreamingExecutor expects compile arguments as input to have possibility to do
@@ -630,20 +854,28 @@ cv::gimpl::GStreamingExecutor::GStreamingExecutor(std::unique_ptr<ade::Graph> &&
                      .get<IslandModel>().model)
     , m_comp_args(comp_args)
     , m_gim(*m_island_graph)
+    , m_desync(GModel::Graph(*m_orig_graph).metadata()
+               .contains<Desynchronized>())
 {
     GModel::Graph gm(*m_orig_graph);
     // NB: Right now GIslandModel is acyclic, and all the below code assumes that.
-    // NB: This naive execution code is taken from GExecutor nearly "as-is"
+    // NB: This naive execution code is taken from GExecutor nearly
+    // "as-is"
+
+    if (m_desync) {
+        check_DesyncObjectConsumedByMultipleIslands(m_gim);
+    }
 
     const auto proto = gm.metadata().get<Protocol>();
     m_emitters      .resize(proto.in_nhs.size());
     m_emitter_queues.resize(proto.in_nhs.size());
     m_sinks         .resize(proto.out_nhs.size());
-    m_sink_queues   .resize(proto.out_nhs.size());
+    m_sink_queues   .resize(proto.out_nhs.size(), nullptr);
+    m_sink_sync     .resize(proto.out_nhs.size(), -1);
 
     // Very rough estimation to limit internal queue sizes.
     // Pipeline depth is equal to number of its (pipeline) steps.
-    const auto queue_capacity = std::count_if
+    const auto queue_capacity = 3*std::count_if
         (m_gim.nodes().begin(),
          m_gim.nodes().end(),
          [&](ade::NodeHandle nh) {
@@ -723,15 +955,53 @@ cv::gimpl::GStreamingExecutor::GStreamingExecutor(std::unique_ptr<ade::Graph> &&
                                          , isl_exec
                                          });
                 // Initialize queues for every operation's input
-                ade::TypedGraph<DataQueue> qgr(*m_island_graph);
+                ade::TypedGraph<DataQueue, DesyncSpecialCase> qgr(*m_island_graph);
+                bool is_desync_start = false;
                 for (auto eh : nh->inEdges())
                 {
                     // ...only if the data is not compile-const
                     if (const_ins.count(eh->srcNode()) == 0) {
-                        qgr.metadata(eh).set(DataQueue(queue_capacity));
-                        m_internal_queues.insert(&qgr.metadata(eh).get<DataQueue>().q);
+                        if (m_gim.metadata(eh).contains<DesyncIslEdge>()) {
+                            qgr.metadata(eh).set(DataQueue(DataQueue::DESYNC));
+                            is_desync_start = true;
+                        } else if (qgr.metadata(eh).contains<DesyncSpecialCase>()) {
+                            // See comment below
+                            // Limit queue size to 1 in this case
+                            qgr.metadata(eh).set(DataQueue(1u));
+                        } else {
+                            qgr.metadata(eh).set(DataQueue(queue_capacity));
+                        }
+                        m_internal_queues.insert(qgr.metadata(eh).get<DataQueue>().q.get());
                     }
                 }
+                // WORKAROUND:
+                // Since now we always know desync() is followed by copy(),
+                // copy is always the island with DesyncIslEdge.
+                // Mark the node's outputs a special way so then its following
+                // queue sizes will be limited to 1 (to avoid copy reading more
+                // data in advance - as there's no other way for the underlying
+                // "slow" part to control it)
+                if (is_desync_start) {
+                    auto isl = m_gim.metadata(nh).get<FusedIsland>().object;
+                    // In the current implementation, such islands
+                    // _must_ start with copy
+                    GAPI_Assert(isl->in_ops().size() == 1u);
+#if !defined(GAPI_STANDALONE)
+                    GAPI_Assert(GModel::Graph(*m_orig_graph)
+                                .metadata(*isl->in_ops().begin())
+                                .get<cv::gimpl::Op>()
+                                .k.name == cv::gapi::core::GCopy::id());
+#endif // GAPI_STANDALONE
+                    for (auto out_nh : nh->outNodes()) {
+                        for (auto out_eh : out_nh->outEdges()) {
+                            qgr.metadata(out_eh).set(DesyncSpecialCase{});
+                        }
+                    }
+                }
+                // It is ok to do it here since the graph is visited in
+                // a topologic order and its consumers (those checking
+                // their input edges & initializing queues) are yet to be
+                // visited
             }
             break;
         case NodeKind::SLOT:
@@ -760,7 +1030,14 @@ cv::gimpl::GStreamingExecutor::GStreamingExecutor(std::unique_ptr<ade::Graph> &&
                 ade::TypedGraph<DataQueue> qgr(*m_island_graph);
                 GAPI_Assert(nh->inEdges().size() == 1u);
                 qgr.metadata(nh->inEdges().front()).set(DataQueue(queue_capacity));
-                m_sink_queues[sink_idx] = &qgr.metadata(nh->inEdges().front()).get<DataQueue>().q;
+                m_sink_queues[sink_idx] = qgr.metadata(nh->inEdges().front()).get<DataQueue>().q.get();
+
+                // Assign a desync tag
+                const auto sink_out_nh = gm.metadata().get<Protocol>().out_nhs[sink_idx];
+                if (gm.metadata(sink_out_nh).contains<DesyncPath>()) {
+                    // metadata().get_or<> could make this thing better
+                    m_sink_sync[sink_idx] = gm.metadata(sink_out_nh).get<DesyncPath>().index;
+                }
             }
             break;
         default:
@@ -768,7 +1045,23 @@ cv::gimpl::GStreamingExecutor::GStreamingExecutor(std::unique_ptr<ade::Graph> &&
             break;
         } // switch(kind)
     } // for(gim nodes)
-    m_out_queue.set_capacity(queue_capacity);
+
+    // If there are desynchronized parts in the graph, there may be
+    // multiple theads polling every separate (desynchronized)
+    // branch in the graph individually. Prepare a mapping information
+    // for any such thread
+    for (auto &&idx : ade::util::iota(m_sink_queues.size())) {
+        auto  path_id = m_sink_sync[idx];
+        auto &info    = m_collector_map[path_id];
+        info.queues.push_back(m_sink_queues[idx]);
+        info.mapping.push_back(static_cast<int>(idx));
+    }
+
+    // Reserve space in the final queue based on the number
+    // of desync parts (they can generate output individually
+    // per the same input frame, so the output traffic multiplies)
+    GAPI_Assert(m_collector_map.size() > 0u);
+    m_out_queue.set_capacity(queue_capacity * m_collector_map.size());
 }
 
 cv::gimpl::GStreamingExecutor::~GStreamingExecutor()
@@ -938,7 +1231,6 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins)
                                real_video_completion_cb);
     }
 
-
     // Now do this for every island (in a topological order)
     for (auto &&op : m_ops)
     {
@@ -974,10 +1266,27 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins)
                                out_queues);
     }
 
-    // Finally, start a collector thread.
-    m_threads.emplace_back(collectorThread,
-                           m_sink_queues,
-                           std::ref(m_out_queue));
+    // Finally, start collector thread(s).
+    // If there are desynchronized parts in the graph, there may be
+    // multiple theads polling every separate (desynchronized)
+    // branch in the graph individually.
+    const bool has_main_path = m_sink_sync.end() !=
+        std::find(m_sink_sync.begin(), m_sink_sync.end(), -1);
+    for (auto &&info : m_collector_map) {
+        m_threads.emplace_back(collectorThread,
+                               info.second.queues,
+                               info.second.mapping,
+                               m_sink_queues.size(),
+                               has_main_path ? info.first == -1 : true, // see below (*)
+                               std::ref(m_out_queue));
+
+        // (*) - there may be a problem with desynchronized paths when those work
+        // faster than the main path. In this case, the desync paths get "Stop" message
+        // earlier and thus broadcast it down to pipeline gets stopped when there is
+        // some "main path" data to process. This new collectorThread's flag regulates it:
+        // - desync paths should never post Stop message if there is a main path.
+        // - if there is no main path, than any desync path can terminate the execution.
+    }
     state = State::READY;
 }
 
@@ -1018,15 +1327,25 @@ void cv::gimpl::GStreamingExecutor::wait_shutdown()
     for (auto &q : m_internal_queues) q->clear();
     m_out_queue.clear();
 
+    for (auto &&op : m_ops) {
+        op.isl_exec->handleStopStream();
+    }
+
     state = State::STOPPED;
 }
 
 bool cv::gimpl::GStreamingExecutor::pull(cv::GRunArgsP &&outs)
 {
+    // This pull() can only be called when there's no desynchronized
+    // parts in the graph.
+    GAPI_Assert(!m_desync &&
+                "This graph has desynchronized parts! Please use another pull()");
+
     if (state == State::STOPPED)
         return false;
     GAPI_Assert(state == State::RUNNING);
-    GAPI_Assert(m_sink_queues.size() == outs.size());
+    GAPI_Assert(m_sink_queues.size() == outs.size() &&
+                "Number of data objects in cv::gout() must match the number of graph outputs in cv::GOut()");
 
     Cmd cmd;
     m_out_queue.pop(cmd);
@@ -1036,12 +1355,39 @@ bool cv::gimpl::GStreamingExecutor::pull(cv::GRunArgsP &&outs)
         return false;
     }
 
-    GAPI_Assert(cv::util::holds_alternative<cv::GRunArgs>(cmd));
-    cv::GRunArgs &this_result = cv::util::get<cv::GRunArgs>(cmd);
+    GAPI_Assert(cv::util::holds_alternative<Result>(cmd));
+    cv::GRunArgs &this_result = cv::util::get<Result>(cmd).args;
     sync_data(this_result, outs);
     return true;
 }
 
+bool cv::gimpl::GStreamingExecutor::pull(cv::GOptRunArgsP &&outs)
+{
+    // This pull() can only be called in both cases: if there are
+    // desyncrhonized parts or not.
+
+    // FIXME: so far it is a full duplicate of standard pull except
+    // the sync_data version called.
+    if (state == State::STOPPED)
+        return false;
+    GAPI_Assert(state == State::RUNNING);
+    GAPI_Assert(m_sink_queues.size() == outs.size() &&
+                "Number of data objects in cv::gout() must match the number of graph outputs in cv::GOut()");
+
+    Cmd cmd;
+    m_out_queue.pop(cmd);
+    if (cv::util::holds_alternative<Stop>(cmd))
+    {
+        wait_shutdown();
+        return false;
+    }
+
+    GAPI_Assert(cv::util::holds_alternative<Result>(cmd));
+    sync_data(cv::util::get<Result>(cmd), outs);
+    return true;
+}
+
+
 bool cv::gimpl::GStreamingExecutor::try_pull(cv::GRunArgsP &&outs)
 {
     if (state == State::STOPPED)
@@ -1059,8 +1405,8 @@ bool cv::gimpl::GStreamingExecutor::try_pull(cv::GRunArgsP &&outs)
         return false;
     }
 
-    GAPI_Assert(cv::util::holds_alternative<cv::GRunArgs>(cmd));
-    cv::GRunArgs &this_result = cv::util::get<cv::GRunArgs>(cmd);
+    GAPI_Assert(cv::util::holds_alternative<Result>(cmd));
+    cv::GRunArgs &this_result = cv::util::get<Result>(cmd).args;
     sync_data(this_result, outs);
     return true;
 }
diff --git a/modules/gapi/src/executor/gstreamingexecutor.hpp b/modules/gapi/src/executor/gstreamingexecutor.hpp
index d10f9eddd0..b6093ac1ef 100644
--- a/modules/gapi/src/executor/gstreamingexecutor.hpp
+++ b/modules/gapi/src/executor/gstreamingexecutor.hpp
@@ -14,6 +14,8 @@
 
 #include <memory> // unique_ptr, shared_ptr
 #include <thread> // thread
+#include <vector>
+#include <unordered_map>
 
 #if defined(HAVE_TBB)
 #  include <tbb/concurrent_queue.h> // FIXME: drop it from here!
@@ -22,6 +24,7 @@ template<typename T> using QueueClass = tbb::concurrent_bounded_queue<T>;
 #  include "executor/conc_queue.hpp"
 template<typename T> using QueueClass = cv::gapi::own::concurrent_bounded_queue<T>;
 #endif // TBB
+#include "executor/last_value.hpp"
 
 #include <ade/graph.hpp>
 
@@ -40,14 +43,61 @@ struct Stop {
     cv::GRunArg cdata; // const data for CNST stop
 };
 
+struct Result {
+    cv::GRunArgs      args;  // Full results vector
+    std::vector<bool> flags; // Availability flags (in case of desync)
+};
+
 using Cmd = cv::util::variant
     < cv::util::monostate
     , Start        // Tells emitters to start working. Not broadcasted to workers.
     , Stop         // Tells emitters to stop working. Broadcasted to workers.
     , cv::GRunArg  // Workers data payload to process.
-    , cv::GRunArgs // Full results vector
+    , Result       // Pipeline's data for gout()
     >;
-using Q = QueueClass<Cmd>;
+
+// Interface over a queue. The underlying queue implementation may be
+// different. This class is mainly introduced to bring some
+// abstraction over the real queues (bounded in-order) and a
+// desynchronized data slots (see required to implement
+// cv::gapi::desync)
+
+class Q {
+public:
+    virtual void push(const Cmd &cmd) = 0;
+    virtual void pop(Cmd &cmd) = 0;
+    virtual bool try_pop(Cmd &cmd) = 0;
+    virtual void clear() = 0;
+    virtual ~Q() = default;
+};
+
+// A regular queue implementation
+class SyncQueue final: public Q {
+    QueueClass<Cmd> m_q;    // FIXME: OWN or WRAP??
+
+public:
+    virtual void push(const Cmd &cmd) override { m_q.push(cmd); }
+    virtual void pop(Cmd &cmd)        override { m_q.pop(cmd);  }
+    virtual bool try_pop(Cmd &cmd)    override { return m_q.try_pop(cmd); }
+    virtual void clear()              override { m_q.clear(); }
+
+    void set_capacity(std::size_t c) { m_q.set_capacity(c);}
+};
+
+// Desynchronized "queue" implementation
+// Every push overwrites value which is not yet popped
+// This container can hold 0 or 1 element
+// Special handling for Stop is implemented (FIXME: not really)
+class DesyncQueue final: public Q {
+    cv::gapi::own::last_written_value<Cmd> m_v;
+
+public:
+    virtual void push(const Cmd &cmd) override { m_v.push(cmd); }
+    virtual void pop(Cmd &cmd)        override { m_v.pop(cmd);  }
+    virtual bool try_pop(Cmd &cmd)    override { return m_v.try_pop(cmd); }
+    virtual void clear()              override { m_v.clear(); }
+};
+
 } // namespace stream
 
 // FIXME: Currently all GExecutor comments apply also
@@ -87,6 +137,7 @@ protected:
     util::optional<bool> m_reshapable;
 
     cv::gimpl::GIslandModel::Graph m_gim; // FIXME: make const?
+    const bool m_desync;
 
     // FIXME: Naive executor details are here for now
     // but then it should be moved to another place
@@ -117,11 +168,27 @@ protected:
     std::vector<ade::NodeHandle> m_sinks;
 
     std::vector<std::thread> m_threads;
-    std::vector<stream::Q>   m_emitter_queues;
-    std::vector<stream::Q*>  m_const_emitter_queues; // a view over m_emitter_queues
-    std::vector<stream::Q*>  m_sink_queues;
-    std::unordered_set<stream::Q*> m_internal_queues;
-    stream::Q m_out_queue;
+    std::vector<stream::SyncQueue>   m_emitter_queues;
+
+    // a view over m_emitter_queues
+    std::vector<stream::SyncQueue*>  m_const_emitter_queues;
+
+    std::vector<stream::Q*>          m_sink_queues;
+
+    // desync path tags for outputs. -1 means that output
+    // doesn't belong to a desync path
+    std::vector<int>                 m_sink_sync;
+
+    std::unordered_set<stream::Q*>   m_internal_queues;
+    stream::SyncQueue m_out_queue;
+
+    // Describes mapping from desync paths to collector threads
+    struct CollectorThreadInfo {
+        std::vector<stream::Q*>  queues;
+        std::vector<int> mapping;
+    };
+    std::unordered_map<int, CollectorThreadInfo> m_collector_map;
+
 
     void wait_shutdown();
 
@@ -132,6 +199,7 @@ public:
     void setSource(GRunArgs &&args);
     void start();
     bool pull(cv::GRunArgsP &&outs);
+    bool pull(cv::GOptRunArgsP &&outs);
     bool try_pull(cv::GRunArgsP &&outs);
     void stop();
     bool running() const;
diff --git a/modules/gapi/src/executor/last_value.hpp b/modules/gapi/src/executor/last_value.hpp
new file mode 100644
index 0000000000..152449a879
--- /dev/null
+++ b/modules/gapi/src/executor/last_value.hpp
@@ -0,0 +1,105 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_EXECUTOR_LAST_VALUE_HPP
+#define OPENCV_GAPI_EXECUTOR_LAST_VALUE_HPP
+
+#include <mutex>
+#include <condition_variable>
+
+#include <opencv2/gapi/util/optional.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+namespace cv {
+namespace gapi {
+namespace own {
+
+// This class implements a "Last Written Value" thing.  Writer threads
+// (in our case, it is just one) can write as many values there as it
+// can.
+//
+// The reader thread gets only a value it gets at the time (or blocks
+// if there was no value written since the last read).
+//
+// Again, the implementation is highly inefficient right now.
+template<class T>
+class last_written_value {
+    cv::util::optional<T> m_data;
+
+    std::mutex m_mutex;
+    std::condition_variable m_cond_empty;
+
+    void unsafe_pop(T &t);
+
+public:
+    last_written_value() {}
+    last_written_value(const last_written_value<T> &cc)
+        : m_data(cc.m_data) {
+        // FIXME: what to do with all that locks, etc?
+    }
+    last_written_value(last_written_value<T> &&cc)
+        : m_data(std::move(cc.m_data)) {
+        // FIXME: what to do with all that locks, etc?
+    }
+
+    // FIXME: && versions
+    void push(const T &t);
+    void pop(T &t);
+    bool try_pop(T &t);
+
+    // Not thread-safe
+    void clear();
+};
+
+// Internal: do shared pop things assuming the lock is already there
+template<typename T>
+void last_written_value<T>::unsafe_pop(T &t) {
+    GAPI_Assert(m_data.has_value());
+    t = std::move(m_data.value());
+    m_data.reset();
+}
+
+// Push an element to the queue. Blocking if there's no space left
+template<typename T>
+void last_written_value<T>::push(const T& t) {
+    std::unique_lock<std::mutex> lock(m_mutex);
+    m_data = cv::util::make_optional(t);
+    lock.unlock();
+    m_cond_empty.notify_one();
+}
+
+// Pop an element from the queue. Blocking if there's no items
+template<typename T>
+void last_written_value<T>::pop(T &t) {
+    std::unique_lock<std::mutex> lock(m_mutex);
+    if (!m_data.has_value()) {
+        // if there is no data, wait
+        m_cond_empty.wait(lock, [&](){return m_data.has_value();});
+    }
+    unsafe_pop(t);
+}
+
+// Try pop an element from the queue. Returns false if queue is empty
+template<typename T>
+bool last_written_value<T>::try_pop(T &t) {
+    std::unique_lock<std::mutex> lock(m_mutex);
+    if (!m_data.has_value()) {
+        // if there is no data, return
+        return false;
+    }
+    unsafe_pop(t);
+    return true;
+}
+
+// Clear the value holder. This method is not thread-safe.
+template<typename T>
+void last_written_value<T>::clear() {
+    m_data.reset();
+}
+
+}}} // namespace cv::gapi::own
+
+#endif //  OPENCV_GAPI_EXECUTOR_CONC_QUEUE_HPP
diff --git a/modules/gapi/test/common/gapi_core_tests.hpp b/modules/gapi/test/common/gapi_core_tests.hpp
index 4a0a7641f9..889e32f1c1 100644
--- a/modules/gapi/test/common/gapi_core_tests.hpp
+++ b/modules/gapi/test/common/gapi_core_tests.hpp
@@ -157,7 +157,7 @@ GAPI_TEST_EXT_BASE_FIXTURE(ParseSSDBLTest, ParserSSDTest, initNothing,
 GAPI_TEST_EXT_BASE_FIXTURE(ParseSSDTest, ParserSSDTest, initNothing,
     FIXTURE_API(float, bool, bool), 3, confidence_threshold, alignment_to_square, filter_out_of_bounds)
 GAPI_TEST_EXT_BASE_FIXTURE(ParseYoloTest, ParserYoloTest, initNothing,
-    FIXTURE_API(float, float, int), 3, confidence_threshold, nms_threshold, num_classes)
+    FIXTURE_API(float, float, int, std::pair<bool,int>), 4, confidence_threshold, nms_threshold, num_classes, dims_config)
 GAPI_TEST_FIXTURE(SizeTest, initMatrixRandU, <>, 0)
 GAPI_TEST_FIXTURE(SizeRTest, initNothing, <>, 0)
 } // opencv_test
diff --git a/modules/gapi/test/common/gapi_core_tests_inl.hpp b/modules/gapi/test/common/gapi_core_tests_inl.hpp
index e350a14e65..045b556369 100644
--- a/modules/gapi/test/common/gapi_core_tests_inl.hpp
+++ b/modules/gapi/test/common/gapi_core_tests_inl.hpp
@@ -618,7 +618,8 @@ TEST_P(SumTest, AccuracyTest)
 #undef countNonZero
 TEST_P(CountNonZeroTest, AccuracyTest)
 {
-    int out_cnz_gapi, out_cnz_ocv;
+    int out_cnz_gapi = -1;
+    int out_cnz_ocv = -2;
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
@@ -1665,7 +1666,7 @@ TEST_P(ParseSSDTest, ParseTest)
 
 TEST_P(ParseYoloTest, ParseTest)
 {
-    cv::Mat in_mat = generateYoloOutput(num_classes);
+    cv::Mat in_mat = generateYoloOutput(num_classes, dims_config);
     auto anchors = cv::gapi::nn::parsers::GParseYolo::defaultAnchors();
     std::vector<cv::Rect> boxes_gapi, boxes_ref;
     std::vector<int> labels_gapi, labels_ref;
@@ -1690,7 +1691,7 @@ TEST_P(SizeTest, ParseTest)
     cv::GMat in;
     cv::Size out_sz;
 
-    auto out = cv::gapi::size(in);
+    auto out = cv::gapi::streaming::size(in);
     cv::GComputation c(cv::GIn(in), cv::GOut(out));
     c.apply(cv::gin(in_mat1), cv::gout(out_sz), getCompileArgs());
 
@@ -1703,7 +1704,7 @@ TEST_P(SizeRTest, ParseTest)
     cv::Size out_sz;
 
     cv::GOpaque<cv::Rect> op_rect;
-    auto out = cv::gapi::size(op_rect);
+    auto out = cv::gapi::streaming::size(op_rect);
     cv::GComputation c(cv::GIn(op_rect), cv::GOut(out));
     c.apply(cv::gin(rect), cv::gout(out_sz), getCompileArgs());
 
diff --git a/modules/gapi/test/common/gapi_imgproc_tests.hpp b/modules/gapi/test/common/gapi_imgproc_tests.hpp
index cd074efda0..b48b7b6732 100644
--- a/modules/gapi/test/common/gapi_imgproc_tests.hpp
+++ b/modules/gapi/test/common/gapi_imgproc_tests.hpp
@@ -46,6 +46,8 @@ GAPI_TEST_FIXTURE(Erode3x3Test, initMatrixRandN, FIXTURE_API(CompareMats,int), 2
 GAPI_TEST_FIXTURE(DilateTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int), 3,
     cmpF, kernSize, kernType)
 GAPI_TEST_FIXTURE(Dilate3x3Test, initMatrixRandN, FIXTURE_API(CompareMats,int), 2, cmpF, numIters)
+GAPI_TEST_FIXTURE(MorphologyExTest, initMatrixRandN, FIXTURE_API(CompareMats,MorphTypes),
+                  2, cmpF, op)
 GAPI_TEST_FIXTURE(SobelTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int,int), 4,
     cmpF, kernSize, dx, dy)
 GAPI_TEST_FIXTURE(SobelXYTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int,int,int), 5,
@@ -64,9 +66,45 @@ GAPI_TEST_FIXTURE_SPEC_PARAMS(GoodFeaturesTest,
                                           double,int,bool),
                               8, cmpF, fileName, type, maxCorners, qualityLevel, minDistance,
                               blockSize, useHarrisDetector)
+GAPI_TEST_FIXTURE_SPEC_PARAMS(FindContoursNoOffsetTest,
+                              FIXTURE_API(cv::Size,MatType2,cv::RetrievalModes,
+                                          cv::ContourApproximationModes),
+                              4, sz, type, mode, method)
+GAPI_TEST_FIXTURE_SPEC_PARAMS(FindContoursOffsetTest, <>, 0)
+GAPI_TEST_FIXTURE_SPEC_PARAMS(FindContoursHNoOffsetTest,
+                              FIXTURE_API(cv::Size,MatType2,cv::RetrievalModes,
+                                          cv::ContourApproximationModes),
+                              4, sz, type, mode, method)
+GAPI_TEST_FIXTURE_SPEC_PARAMS(FindContoursHOffsetTest, <>, 0)
+GAPI_TEST_FIXTURE(BoundingRectMatTest, initMatrixRandU, FIXTURE_API(CompareRects), 1, cmpF)
+GAPI_TEST_FIXTURE(BoundingRectMatVector32STest, initNothing, FIXTURE_API(CompareRects), 1, cmpF)
+GAPI_TEST_FIXTURE(BoundingRectMatVector32FTest, initNothing, FIXTURE_API(CompareRects), 1, cmpF)
+GAPI_TEST_FIXTURE(BoundingRectVector32STest, initNothing, FIXTURE_API(CompareRects), 1, cmpF)
+GAPI_TEST_FIXTURE(BoundingRectVector32FTest, initNothing, FIXTURE_API(CompareRects), 1, cmpF)
+GAPI_TEST_FIXTURE(BGR2RGBTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
 GAPI_TEST_FIXTURE(RGB2GrayTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(FitLine2DMatVectorTest, initMatByPointsVectorRandU<cv::Point_>,
+                  FIXTURE_API(CompareVecs<float, 4>,cv::DistanceTypes), 2, cmpF, distType)
+GAPI_TEST_FIXTURE(FitLine2DVector32STest, initNothing,
+                  FIXTURE_API(CompareVecs<float, 4>,cv::DistanceTypes), 2, cmpF, distType)
+GAPI_TEST_FIXTURE(FitLine2DVector32FTest, initNothing,
+                  FIXTURE_API(CompareVecs<float, 4>,cv::DistanceTypes), 2, cmpF, distType)
+GAPI_TEST_FIXTURE(FitLine2DVector64FTest, initNothing,
+                  FIXTURE_API(CompareVecs<float, 4>,cv::DistanceTypes), 2, cmpF, distType)
+GAPI_TEST_FIXTURE(FitLine3DMatVectorTest, initMatByPointsVectorRandU<cv::Point3_>,
+                  FIXTURE_API(CompareVecs<float, 6>,cv::DistanceTypes), 2, cmpF, distType)
+GAPI_TEST_FIXTURE(FitLine3DVector32STest, initNothing,
+                  FIXTURE_API(CompareVecs<float, 6>,cv::DistanceTypes), 2, cmpF, distType)
+GAPI_TEST_FIXTURE(FitLine3DVector32FTest, initNothing,
+                  FIXTURE_API(CompareVecs<float, 6>,cv::DistanceTypes), 2, cmpF, distType)
+GAPI_TEST_FIXTURE(FitLine3DVector64FTest, initNothing,
+                  FIXTURE_API(CompareVecs<float, 6>,cv::DistanceTypes), 2, cmpF, distType)
 GAPI_TEST_FIXTURE(BGR2GrayTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
 GAPI_TEST_FIXTURE(RGB2YUVTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(BGR2I420Test, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(RGB2I420Test, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(I4202BGRTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(I4202RGBTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
 GAPI_TEST_FIXTURE(YUV2RGBTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
 GAPI_TEST_FIXTURE(YUV2GrayTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
 GAPI_TEST_FIXTURE(NV12toRGBTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
diff --git a/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp b/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp
index 4aadc17d5d..2a4f2e64ea 100644
--- a/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp
+++ b/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp
@@ -50,6 +50,27 @@ namespace
             rgb2yuyv(in_line_p, out_line_p, in.cols);
         }
     }
+
+    // Draw random ellipses on given mat of given size and type
+    void initMatForFindingContours(cv::Mat& mat, const cv::Size& sz, const int type)
+    {
+        cv::RNG& rng = theRNG();
+        mat = cv::Mat(sz, type, cv::Scalar::all(0));
+        size_t numEllipses = rng.uniform(1, 10);
+
+        for( size_t i = 0; i < numEllipses; i++ )
+        {
+            cv::Point center;
+            cv::Size  axes;
+            center.x     = rng.uniform(0, sz.width);
+            center.y     = rng.uniform(0, sz.height);
+            axes.width   = rng.uniform(2, sz.width);
+            axes.height  = rng.uniform(2, sz.height);
+            int color    = rng.uniform(1, 256);
+            double angle = rng.uniform(0., 180.);
+            cv::ellipse(mat, center, axes, angle, 0., 360., color, 1, FILLED);
+        }
+    }
 }
 
 TEST_P(Filter2DTest, AccuracyTest)
@@ -290,6 +311,29 @@ TEST_P(Dilate3x3Test, AccuracyTest)
     }
 }
 
+TEST_P(MorphologyExTest, AccuracyTest)
+{
+    MorphShapes defShape = cv::MORPH_RECT;
+    int defKernSize = 3;
+    cv::Mat kernel = cv::getStructuringElement(defShape, cv::Size(defKernSize, defKernSize));
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::morphologyEx(in, op, kernel);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::morphologyEx(in_mat1, out_mat_ocv, op, kernel);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
+    }
+}
+
 TEST_P(SobelTest, AccuracyTest)
 {
     // G-API code //////////////////////////////////////////////////////////////
@@ -447,6 +491,472 @@ TEST_P(GoodFeaturesTest, AccuracyTest)
     }
 }
 
+TEST_P(FindContoursNoOffsetTest, AccuracyTest)
+{
+    std::vector<std::vector<cv::Point>> outCtsOCV,  outCtsGAPI;
+
+    initMatForFindingContours(in_mat1, sz, type);
+    out_mat_gapi = cv::Mat(sz, type, cv::Scalar::all(0));
+    out_mat_ocv  = cv::Mat(sz, type, cv::Scalar::all(0));
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::findContours(in_mat1, outCtsOCV, mode, method);
+    }
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    cv::GArray<cv::GArray<cv::Point>> outCts;
+    outCts = cv::gapi::findContours(in, mode, method);
+    cv::GComputation c(GIn(in), GOut(outCts));
+    c.apply(gin(in_mat1), gout(outCtsGAPI), getCompileArgs());
+
+    // Comparison //////////////////////////////////////////////////////////////
+    EXPECT_TRUE(outCtsGAPI.size() == outCtsOCV.size());
+    cv::fillPoly(out_mat_ocv,  outCtsOCV,  cv::Scalar::all(1));
+    cv::fillPoly(out_mat_gapi, outCtsGAPI, cv::Scalar::all(1));
+    EXPECT_TRUE(AbsExact().to_compare_f()(out_mat_ocv, out_mat_gapi));
+}
+
+TEST_P(FindContoursOffsetTest, AccuracyTest)
+{
+    const cv::Size sz(1280, 720);
+    const MatType2 type = CV_8UC1;
+    const cv::RetrievalModes mode = cv::RETR_EXTERNAL;
+    const cv::ContourApproximationModes method = cv::CHAIN_APPROX_NONE;
+    const cv::Point offset(15, 15);
+    std::vector<std::vector<cv::Point>> outCtsOCV,  outCtsGAPI;
+
+    initMatForFindingContours(in_mat1, sz, type);
+    out_mat_gapi = cv::Mat(sz, type, cv::Scalar::all(0));
+    out_mat_ocv  = cv::Mat(sz, type, cv::Scalar::all(0));
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::findContours(in_mat1, outCtsOCV, mode, method, offset);
+    }
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    GOpaque<Point> gOffset;
+    cv::GArray<cv::GArray<cv::Point>> outCts;
+    outCts = cv::gapi::findContours(in, mode, method, gOffset);
+    cv::GComputation c(GIn(in, gOffset), GOut(outCts));
+    c.apply(gin(in_mat1, offset), gout(outCtsGAPI), getCompileArgs());
+
+    // Comparison //////////////////////////////////////////////////////////////
+    EXPECT_TRUE(outCtsGAPI.size() == outCtsOCV.size());
+    cv::fillPoly(out_mat_ocv,  outCtsOCV,  cv::Scalar::all(1));
+    cv::fillPoly(out_mat_gapi, outCtsGAPI, cv::Scalar::all(1));
+    EXPECT_TRUE(AbsExact().to_compare_f()(out_mat_ocv, out_mat_gapi));
+}
+
+TEST_P(FindContoursHNoOffsetTest, AccuracyTest)
+{
+    std::vector<std::vector<cv::Point>> outCtsOCV,  outCtsGAPI;
+    std::vector<cv::Vec4i>              outHierOCV, outHierGAPI;
+
+    initMatForFindingContours(in_mat1, sz, type);
+    out_mat_gapi = cv::Mat(sz, type, cv::Scalar::all(0));
+    out_mat_ocv  = cv::Mat(sz, type, cv::Scalar::all(0));
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::findContours(in_mat1, outCtsOCV, outHierOCV, mode, method);
+    }
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    cv::GArray<cv::GArray<cv::Point>> outCts;
+    cv::GArray<cv::Vec4i> outHier;
+    std::tie(outCts, outHier) = cv::gapi::findContoursH(in, mode, method);
+    cv::GComputation c(GIn(in), GOut(outCts, outHier));
+    c.apply(gin(in_mat1), gout(outCtsGAPI, outHierGAPI), getCompileArgs());
+
+    // Comparison //////////////////////////////////////////////////////////////
+    EXPECT_TRUE(outCtsGAPI.size() == outCtsOCV.size());
+    cv::fillPoly(out_mat_ocv,  outCtsOCV,  cv::Scalar::all(1));
+    cv::fillPoly(out_mat_gapi, outCtsGAPI, cv::Scalar::all(1));
+    EXPECT_TRUE(AbsExact().to_compare_f()(out_mat_ocv, out_mat_gapi));
+
+    EXPECT_TRUE(outCtsGAPI.size() == outCtsOCV.size());
+    EXPECT_TRUE(AbsExactVector<cv::Vec4i>().to_compare_f()(outHierOCV, outHierGAPI));
+}
+
+TEST_P(FindContoursHOffsetTest, AccuracyTest)
+{
+    const cv::Size sz(1280, 720);
+    const MatType2 type = CV_8UC1;
+    const cv::RetrievalModes mode = cv::RETR_EXTERNAL;
+    const cv::ContourApproximationModes method = cv::CHAIN_APPROX_NONE;
+    const cv::Point offset(15, 15);
+    std::vector<std::vector<cv::Point>> outCtsOCV,  outCtsGAPI;
+    std::vector<cv::Vec4i>              outHierOCV, outHierGAPI;
+
+    initMatForFindingContours(in_mat1, sz, type);
+    out_mat_gapi = cv::Mat(sz, type, cv::Scalar::all(0));
+    out_mat_ocv  = cv::Mat(sz, type, cv::Scalar::all(0));
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::findContours(in_mat1, outCtsOCV, outHierOCV, mode, method, offset);
+    }
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    GOpaque<Point> gOffset;
+    cv::GArray<cv::GArray<cv::Point>> outCts;
+    cv::GArray<cv::Vec4i> outHier;
+    std::tie(outCts, outHier) = cv::gapi::findContoursH(in, mode, method, gOffset);
+    cv::GComputation c(GIn(in, gOffset), GOut(outCts, outHier));
+    c.apply(gin(in_mat1, offset), gout(outCtsGAPI, outHierGAPI), getCompileArgs());
+
+    // Comparison //////////////////////////////////////////////////////////////
+    EXPECT_TRUE(outCtsGAPI.size() == outCtsOCV.size());
+    cv::fillPoly(out_mat_ocv,  outCtsOCV,  cv::Scalar::all(1));
+    cv::fillPoly(out_mat_gapi, outCtsGAPI, cv::Scalar::all(1));
+    EXPECT_TRUE(AbsExact().to_compare_f()(out_mat_ocv, out_mat_gapi));
+
+    EXPECT_TRUE(outCtsGAPI.size() == outCtsOCV.size());
+    EXPECT_TRUE(AbsExactVector<cv::Vec4i>().to_compare_f()(outHierOCV, outHierGAPI));
+}
+
+TEST_P(BoundingRectMatTest, AccuracyTest)
+{
+    cv::Rect out_rect_gapi, out_rect_ocv;
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::boundingRect(in);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_mat1), cv::gout(out_rect_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_rect_ocv = cv::boundingRect(in_mat1);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_rect_gapi, out_rect_ocv));
+    }
+}
+
+TEST_P(BoundingRectMatVector32STest, AccuracyTest)
+{
+    cv::Rect out_rect_gapi, out_rect_ocv;
+
+    std::vector<cv::Point2i> in_vectorS(sz.width);
+    cv::randu(in_vectorS, cv::Scalar::all(0), cv::Scalar::all(255));
+    in_mat1 = cv::Mat(in_vectorS);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::boundingRect(in);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_mat1), cv::gout(out_rect_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_rect_ocv = cv::boundingRect(in_mat1);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_rect_gapi, out_rect_ocv));
+    }
+}
+
+TEST_P(BoundingRectMatVector32FTest, AccuracyTest)
+{
+    cv::RNG& rng = theRNG();
+    cv::Rect out_rect_gapi, out_rect_ocv;
+
+    std::vector<cv::Point2f> in_vectorF(sz.width);
+    const int fscale = 256;  // avoid bits near ULP, generate stable test input
+    for (int i = 0; i < sz.width; i++)
+    {
+        cv::Point2f pt(rng.uniform(0, 255 * fscale) / static_cast<float>(fscale),
+                       rng.uniform(0, 255 * fscale) / static_cast<float>(fscale));
+        in_vectorF.push_back(pt);
+    }
+    in_mat1 = cv::Mat(in_vectorF);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::boundingRect(in);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_mat1), cv::gout(out_rect_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_rect_ocv = cv::boundingRect(in_mat1);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_rect_gapi, out_rect_ocv));
+    }
+}
+
+
+TEST_P(BoundingRectVector32STest, AccuracyTest)
+{
+    cv::Rect out_rect_gapi, out_rect_ocv;
+
+    std::vector<cv::Point2i> in_vectorS(sz.width);
+    cv::randu(in_vectorS, cv::Scalar::all(0), cv::Scalar::all(255));
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point2i> in;
+    auto out = cv::gapi::boundingRect(in);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vectorS), cv::gout(out_rect_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_rect_ocv = cv::boundingRect(in_vectorS);
+    }
+
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_rect_gapi, out_rect_ocv));
+    }
+}
+
+TEST_P(BoundingRectVector32FTest, AccuracyTest)
+{
+    cv::RNG& rng = theRNG();
+    cv::Rect out_rect_gapi, out_rect_ocv;
+
+    std::vector<cv::Point2f> in_vectorF(sz.width);
+    const int fscale = 256;  // avoid bits near ULP, generate stable test input
+    for (int i = 0; i < sz.width; i++)
+    {
+        cv::Point2f pt(rng.uniform(0, 255 * fscale) / static_cast<float>(fscale),
+                       rng.uniform(0, 255 * fscale) / static_cast<float>(fscale));
+        in_vectorF.push_back(pt);
+    }
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point2f> in;
+    auto out = cv::gapi::boundingRect(in);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vectorF), cv::gout(out_rect_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_rect_ocv = cv::boundingRect(in_vectorF);
+    }
+
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_rect_gapi, out_rect_ocv));
+    }
+}
+
+TEST_P(FitLine2DMatVectorTest, AccuracyTest)
+{
+    cv::Vec4f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::fitLine2D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_mat1), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_mat1, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(FitLine2DVector32STest, AccuracyTest)
+{
+    cv::Vec4f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    std::vector<cv::Point2i> in_vec;
+    initPointsVectorRandU(sz.width, in_vec);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point2i> in;
+    auto out = cv::gapi::fitLine2D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vec), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_vec, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(FitLine2DVector32FTest, AccuracyTest)
+{
+    cv::Vec4f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    std::vector<cv::Point2f> in_vec;
+    initPointsVectorRandU(sz.width, in_vec);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point2f> in;
+    auto out = cv::gapi::fitLine2D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vec), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_vec, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(FitLine2DVector64FTest, AccuracyTest)
+{
+    cv::Vec4f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    std::vector<cv::Point2d> in_vec;
+    initPointsVectorRandU(sz.width, in_vec);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point2d> in;
+    auto out = cv::gapi::fitLine2D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vec), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_vec, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(FitLine3DMatVectorTest, AccuracyTest)
+{
+    cv::Vec6f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::fitLine3D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_mat1), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_mat1, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(FitLine3DVector32STest, AccuracyTest)
+{
+    cv::Vec6f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    std::vector<cv::Point3i> in_vec;
+    initPointsVectorRandU(sz.width, in_vec);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point3i> in;
+    auto out = cv::gapi::fitLine3D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vec), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_vec, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(FitLine3DVector32FTest, AccuracyTest)
+{
+    cv::Vec6f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    std::vector<cv::Point3f> in_vec;
+    initPointsVectorRandU(sz.width, in_vec);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point3f> in;
+    auto out = cv::gapi::fitLine3D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vec), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_vec, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(FitLine3DVector64FTest, AccuracyTest)
+{
+    cv::Vec6f out_vec_gapi, out_vec_ocv;
+    double paramDefault = 0., repsDefault = 0., aepsDefault = 0.;
+
+    std::vector<cv::Point3d> in_vec;
+    initPointsVectorRandU(sz.width, in_vec);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GArray<cv::Point3d> in;
+    auto out = cv::gapi::fitLine3D(in, distType, paramDefault, repsDefault, aepsDefault);
+
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(in_vec), cv::gout(out_vec_gapi), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::fitLine(in_vec, out_vec_ocv, distType, paramDefault, repsDefault, aepsDefault);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_vec_gapi, out_vec_ocv));
+    }
+}
+
+TEST_P(BGR2RGBTest, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::BGR2RGB(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BGR2RGB);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
+    }
+}
+
 TEST_P(RGB2GrayTest, AccuracyTest)
 {
     // G-API code //////////////////////////////////////////////////////////////
@@ -523,6 +1033,82 @@ TEST_P(YUV2RGBTest, AccuracyTest)
     }
 }
 
+TEST_P(BGR2I420Test, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::BGR2I420(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BGR2YUV_I420);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), Size(sz.width, sz.height * 3 / 2));
+    }
+}
+
+TEST_P(RGB2I420Test, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::RGB2I420(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_RGB2YUV_I420);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), Size(sz.width, sz.height * 3 / 2));
+    }
+}
+
+TEST_P(I4202BGRTest, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::I4202BGR(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_YUV2BGR_I420);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), Size(sz.width, sz.height * 2 / 3));
+    }
+}
+
+TEST_P(I4202RGBTest, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::I4202RGB(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_YUV2RGB_I420);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), Size(sz.width, sz.height * 2 / 3));
+    }
+}
+
 TEST_P(NV12toRGBTest, AccuracyTest)
 {
     // G-API code //////////////////////////////////////////////////////////////
diff --git a/modules/gapi/test/common/gapi_parsers_tests_common.hpp b/modules/gapi/test/common/gapi_parsers_tests_common.hpp
index 127a1c5a5e..328f86b851 100644
--- a/modules/gapi/test/common/gapi_parsers_tests_common.hpp
+++ b/modules/gapi/test/common/gapi_parsers_tests_common.hpp
@@ -176,7 +176,7 @@ private:
     int randInRange(const int start, const int end)
     {
         GAPI_Assert(start <= end);
-        return start + std::rand() % (end - start + 1);
+        return theRNG().uniform(start, end);
     }
 
     cv::Rect generateBox(const cv::Size& in_sz)
@@ -211,7 +211,7 @@ private:
         SSDitem it;
         it.image_id = static_cast<float>(i);
         it.label = static_cast<float>(randInRange(0, 9));
-        it.confidence = static_cast<float>(std::rand()) / RAND_MAX;
+        it.confidence = theRNG().uniform(0.f, 1.f);
         auto box = generateBox(in_sz);
         it.rc_left   = normalize(box.x, in_sz.width);
         it.rc_right  = normalize(box.x + box.width, in_sz.width);
@@ -225,16 +225,30 @@ private:
 class ParserYoloTest
 {
 public:
-    cv::Mat generateYoloOutput(const int num_classes)
+    cv::Mat generateYoloOutput(const int num_classes, std::pair<bool,int> dims_config = {false, 4})
     {
-        std::vector<int> dims = { 1, 13, 13, (num_classes + 5) * 5 };
+        bool one_dim = false;
+        int num_dims = 0;
+        std::tie(one_dim, num_dims) = dims_config;
+        GAPI_Assert(num_dims <= 4);
+        GAPI_Assert((!one_dim && num_dims >= 3) ||
+                    ( one_dim && num_dims >= 1));
+        std::vector<int> dims(num_dims, 1);
+        if (one_dim) {
+            dims.back() = (num_classes+5)*5*13*13;
+        } else {
+            dims.back() = (num_classes+5)*5;
+            dims[num_dims-2] = 13;
+            dims[num_dims-3] = 13;
+        }
         cv::Mat mat(dims, CV_32FC1);
         auto data = mat.ptr<float>();
 
-        const size_t range = dims[0] * dims[1] * dims[2] * dims[3];
+        const size_t range = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int>());
+        cv::RNG& rng = theRNG();
         for (size_t i = 0; i < range; ++i)
         {
-            data[i] = static_cast<float>(std::rand()) / RAND_MAX;
+            data[i] = rng.uniform(0.f, 1.f);
         }
         return mat;
     }
diff --git a/modules/gapi/test/common/gapi_tests_common.hpp b/modules/gapi/test/common/gapi_tests_common.hpp
index 113f3c73c0..514fa2be38 100644
--- a/modules/gapi/test/common/gapi_tests_common.hpp
+++ b/modules/gapi/test/common/gapi_tests_common.hpp
@@ -74,6 +74,50 @@ namespace
         }
 #endif // WINRT
     }
+
+    template <typename T> inline void initPointRandU(cv::RNG &rng, cv::Point_<T>& pt)
+    {
+        GAPI_Assert(std::is_integral<T>::value);
+        pt = cv::Point_<T>(static_cast<T>(static_cast<char>(rng(CHAR_MAX + 1U))),
+                           static_cast<T>(static_cast<char>(rng(CHAR_MAX + 1U))));
+    }
+
+    template <typename T> inline void initPointRandU(cv::RNG &rng, cv::Point3_<T>& pt)
+    {
+        GAPI_Assert(std::is_integral<T>::value);
+        pt = cv::Point3_<T>(static_cast<T>(static_cast<char>(rng(CHAR_MAX + 1U))),
+                            static_cast<T>(static_cast<char>(rng(CHAR_MAX + 1U))),
+                            static_cast<T>(static_cast<char>(rng(CHAR_MAX + 1U))));
+    }
+
+    template <typename F> inline void initFloatPointRandU(cv::RNG &rng, cv::Point_<F> &pt)
+    {
+        GAPI_Assert(std::is_floating_point<F>::value);
+        static const int fscale = 256;  // avoid bits near ULP, generate stable test input
+        pt = cv::Point_<F>(rng.uniform(0, 255 * fscale) / static_cast<F>(fscale),
+                           rng.uniform(0, 255 * fscale) / static_cast<F>(fscale));
+    }
+
+    template<> inline void initPointRandU(cv::RNG &rng, cv::Point2f &pt)
+    { initFloatPointRandU(rng, pt); }
+
+    template<> inline void initPointRandU(cv::RNG &rng, cv::Point2d &pt)
+    { initFloatPointRandU(rng, pt); }
+
+    template <typename F> inline void initFloatPointRandU(cv::RNG &rng, cv::Point3_<F> &pt)
+    {
+        GAPI_Assert(std::is_floating_point<F>::value);
+        static const int fscale = 256;  // avoid bits near ULP, generate stable test input
+        pt = cv::Point3_<F>(rng.uniform(0, 255 * fscale) / static_cast<F>(fscale),
+                            rng.uniform(0, 255 * fscale) / static_cast<F>(fscale),
+                            rng.uniform(0, 255 * fscale) / static_cast<F>(fscale));
+    }
+
+    template<> inline void initPointRandU(cv::RNG &rng, cv::Point3f &pt)
+    { initFloatPointRandU(rng, pt); }
+
+    template<> inline void initPointRandU(cv::RNG &rng, cv::Point3d &pt)
+    { initFloatPointRandU(rng, pt); }
 } // namespace
 
 namespace opencv_test
@@ -279,6 +323,80 @@ public:
         }
     }
 
+    template <typename T>
+    inline void initPointRandU(cv::RNG& rng, T& pt)
+    { ::initPointRandU(rng, pt); }
+
+// Disable unreachable code warning for MSVS 2015
+#if defined _MSC_VER && _MSC_VER < 1910 /*MSVS 2017*/
+#pragma warning(push)
+#pragma warning(disable: 4702)
+#endif
+    // initialize std::vector<cv::Point_<T>>/std::vector<cv::Point3_<T>>
+    template <typename T, template <typename> class Pt>
+    void initPointsVectorRandU(const int sz_in, std::vector<Pt<T>> &vec_)
+    {
+        cv::RNG& rng = theRNG();
+
+        vec_.clear();
+        vec_.reserve(sz_in);
+
+        for (int i = 0; i < sz_in; i++)
+        {
+            Pt<T> pt;
+            initPointRandU(rng, pt);
+            vec_.emplace_back(pt);
+        }
+    }
+#if defined _MSC_VER && _MSC_VER < 1910 /*MSVS 2017*/
+#pragma warning(pop)
+#endif
+
+    template<typename Pt>
+    inline void initMatByPointsVectorRandU(const cv::Size &sz_in)
+    {
+            std::vector<Pt> in_vector;
+            initPointsVectorRandU(sz_in.width, in_vector);
+            in_mat1 = cv::Mat(in_vector, true);
+    }
+
+    // initialize Mat by a vector of Points
+    template<template <typename> class Pt>
+    inline void initMatByPointsVectorRandU(int type, cv::Size sz_in, int)
+    {
+        int depth = CV_MAT_DEPTH(type);
+        switch (depth)
+        {
+        case CV_8U:
+            initMatByPointsVectorRandU<Pt<uchar>>(sz_in);
+            break;
+        case CV_8S:
+            initMatByPointsVectorRandU<Pt<char>>(sz_in);
+            break;
+        case CV_16U:
+            initMatByPointsVectorRandU<Pt<ushort>>(sz_in);
+            break;
+        case CV_16S:
+            initMatByPointsVectorRandU<Pt<short>>(sz_in);
+            break;
+        case CV_32S:
+            initMatByPointsVectorRandU<Pt<int>>(sz_in);
+            break;
+        case CV_32F:
+            initMatByPointsVectorRandU<Pt<float>>(sz_in);
+            break;
+        case CV_64F:
+            initMatByPointsVectorRandU<Pt<double>>(sz_in);
+            break;
+        case CV_16F:
+            initMatByPointsVectorRandU<Pt<cv::float16_t>>(sz_in);
+            break;
+        default:
+            GAPI_Assert(false && "Unsupported depth");
+            break;
+        }
+    }
+
     // empty function intended to show that nothing is to be initialized via TestFunctional methods
     void initNothing(int, cv::Size, int, bool = true) {}
 };
@@ -463,11 +581,15 @@ struct TestWithParamsSpecific : public TestWithParamsBase<ParamsSpecific<Specifi
 
 using compare_f = std::function<bool(const cv::Mat &a, const cv::Mat &b)>;
 using compare_scalar_f = std::function<bool(const cv::Scalar &a, const cv::Scalar &b)>;
+using compare_rect_f = std::function<bool(const cv::Rect &a, const cv::Rect &b)>;
 
 template<typename Elem>
 using compare_vector_f = std::function<bool(const std::vector<Elem> &a,
                                             const std::vector<Elem> &b)>;
 
+template<typename Elem, int cn>
+using compare_vec_f = std::function<bool(const cv::Vec<Elem, cn> &a, const cv::Vec<Elem, cn> &b)>;
+
 template<typename T1, typename T2>
 struct CompareF
 {
@@ -489,10 +611,14 @@ private:
 
 using CompareMats = CompareF<cv::Mat, cv::Mat>;
 using CompareScalars = CompareF<cv::Scalar, cv::Scalar>;
+using CompareRects = CompareF<cv::Rect, cv::Rect>;
 
 template<typename Elem>
 using CompareVectors = CompareF<std::vector<Elem>, std::vector<Elem>>;
 
+template<typename Elem, int cn>
+using CompareVecs = CompareF<cv::Vec<Elem, cn>, cv::Vec<Elem, cn>>;
+
 template<typename T>
 struct Wrappable
 {
@@ -535,6 +661,27 @@ struct WrappableScalar
     }
 };
 
+template<typename T>
+struct WrappableRect
+{
+    compare_rect_f to_compare_f()
+    {
+        T t = *static_cast<T*const>(this);
+        return [t](const cv::Rect &a, const cv::Rect &b)
+        {
+            return t(a, b);
+        };
+    }
+
+    CompareRects to_compare_obj()
+    {
+        T t = *static_cast<T*const>(this);
+        std::stringstream ss;
+        ss << t;
+        return CompareRects(to_compare_f(), ss.str());
+    }
+};
+
 template<typename T, typename Elem>
 struct WrappableVector
 {
@@ -557,6 +704,27 @@ struct WrappableVector
     }
 };
 
+template<typename T, typename Elem, int cn>
+struct WrappableVec
+{
+    compare_vec_f<Elem, cn> to_compare_f()
+    {
+        T t = *static_cast<T* const>(this);
+        return [t](const cv::Vec<Elem, cn> &a, const cv::Vec<Elem, cn> &b)
+        {
+            return t(a, b);
+        };
+    }
+
+    CompareVecs<Elem, cn> to_compare_obj()
+    {
+        T t = *static_cast<T* const>(this);
+        std::stringstream ss;
+        ss << t;
+        return CompareVecs<Elem, cn>(to_compare_f(), ss.str());
+    }
+};
+
 
 class AbsExact : public Wrappable<AbsExact>
 {
@@ -719,13 +887,15 @@ public:
             double err_Inf = cv::norm(in1, in2, NORM_INF);
             if (err_Inf > _inf_tol)
             {
-                std::cout << "ToleranceColor error: err_Inf=" << err_Inf << "  tolerance=" << _inf_tol << std::endl;;
+                std::cout << "ToleranceColor error: err_Inf=" << err_Inf
+                          << "  tolerance=" << _inf_tol << std::endl;
                 return false;
             }
             double err = cv::norm(in1, in2, NORM_L1 | NORM_RELATIVE);
             if (err > _tol)
             {
-                std::cout << "ToleranceColor error: err=" << err << "  tolerance=" << _tol << std::endl;;
+                std::cout << "ToleranceColor error: err=" << err
+                          << "  tolerance=" << _tol << std::endl;
                 return false;
             }
         }
@@ -749,7 +919,8 @@ public:
         double abs_err = std::abs(in1[0] - in2[0]) / std::max(1.0, std::abs(in2[0]));
         if (abs_err > _tol)
         {
-            std::cout << "AbsToleranceScalar error: abs_err=" << abs_err << "  tolerance=" << _tol << " in1[0]" << in1[0] << " in2[0]" << in2[0] << std::endl;;
+            std::cout << "AbsToleranceScalar error: abs_err=" << abs_err << "  tolerance=" << _tol
+                      << " in1[0]" << in1[0] << " in2[0]" << in2[0] << std::endl;
             return false;
         }
         else
@@ -765,6 +936,46 @@ private:
     double _tol;
 };
 
+class IoUToleranceRect : public WrappableRect<IoUToleranceRect>
+{
+public:
+    IoUToleranceRect(double tol) : _tol(tol) {}
+    bool operator() (const cv::Rect& in1, const cv::Rect& in2) const
+    {
+        // determine the (x, y)-coordinates of the intersection rectangle
+        int xA = max(in1.x, in2.x);
+        int yA = max(in1.y, in2.y);
+        int xB = min(in1.br().x, in2.br().x);
+        int yB = min(in1.br().y, in2.br().y);
+        // compute the area of intersection rectangle
+        int interArea = max(0, xB - xA) * max(0, yB - yA);
+        // compute the area of union rectangle
+        int unionArea = in1.area() + in2.area() - interArea;
+
+        double iou = interArea / unionArea;
+        double err = 1 - iou;
+        if (err > _tol)
+        {
+            std::cout << "IoUToleranceRect error: err=" << err << "  tolerance=" << _tol
+                      << " in1.x="      << in1.x      << " in2.x="      << in2.x
+                      << " in1.y="      << in1.y      << " in2.y="      << in2.y
+                      << " in1.width="  << in1.width  << " in2.width="  << in2.width
+                      << " in1.height=" << in1.height << " in2.height=" << in2.height << std::endl;
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+    friend std::ostream& operator<<(std::ostream& os, const IoUToleranceRect& obj)
+    {
+        return os << "IoUToleranceRect(" << std::to_string(obj._tol) << ")";
+    }
+private:
+    double _tol;
+};
+
 template<typename Elem>
 class AbsExactVector : public WrappableVector<AbsExactVector<Elem>, Elem>
 {
@@ -789,6 +1000,41 @@ public:
         return os << "AbsExactVector()";
     }
 };
+
+template<typename Elem, int cn>
+class RelDiffToleranceVec : public WrappableVec<RelDiffToleranceVec<Elem, cn>, Elem, cn>
+{
+public:
+    RelDiffToleranceVec(double tol) : _tol(tol) {}
+    bool operator() (const cv::Vec<Elem, cn> &in1, const cv::Vec<Elem, cn> &in2) const
+    {
+        double abs_err  = cv::norm(in1, in2, cv::NORM_L1);
+        double in2_norm = cv::norm(in2, cv::NORM_L1);
+        // Checks to avoid dividing by zero
+        double err = abs_err ? abs_err / (in2_norm ? in2_norm : cv::norm(in1, cv::NORM_L1))
+                             : abs_err;
+        if (err > _tol)
+        {
+            std::cout << "RelDiffToleranceVec error: err=" << err << "  tolerance=" << _tol;
+            for (int i = 0; i < cn; i++)
+            {
+                std::cout << " in1[" << i << "]=" << in1[i] << " in2[" << i << "]=" << in2[i];
+            }
+            std::cout << std::endl;
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+    friend std::ostream& operator<<(std::ostream& os, const RelDiffToleranceVec<Elem, cn>& obj)
+    {
+        return os << "RelDiffToleranceVec(" << std::to_string(obj._tol) << ")";
+    }
+    private:
+        double _tol;
+};
 } // namespace opencv_test
 
 namespace
@@ -803,11 +1049,22 @@ inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_sca
     return os << "compare_scalar_f";
 }
 
+inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_rect_f&)
+{
+    return os << "compare_rect_f";
+}
+
 template<typename Elem>
 inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_vector_f<Elem>&)
 {
     return os << "compare_vector_f";
 }
+
+template<typename Elem, int cn>
+inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_vec_f<Elem, cn>&)
+{
+    return os << "compare_vec_f";
+}
 }  // anonymous namespace
 
 // Note: namespace must match the namespace of the type of the printed object
@@ -848,6 +1105,75 @@ inline std::ostream& operator<<(std::ostream& os, NormTypes op)
 #undef CASE
     return os;
 }
+
+inline std::ostream& operator<<(std::ostream& os, RetrievalModes op)
+{
+#define CASE(v) case RetrievalModes::v: os << #v; break
+    switch (op)
+    {
+        CASE(RETR_EXTERNAL);
+        CASE(RETR_LIST);
+        CASE(RETR_CCOMP);
+        CASE(RETR_TREE);
+        CASE(RETR_FLOODFILL);
+        default: GAPI_Assert(false && "unknown RetrievalModes value");
+    }
+#undef CASE
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, ContourApproximationModes op)
+{
+#define CASE(v) case ContourApproximationModes::v: os << #v; break
+    switch (op)
+    {
+        CASE(CHAIN_APPROX_NONE);
+        CASE(CHAIN_APPROX_SIMPLE);
+        CASE(CHAIN_APPROX_TC89_L1);
+        CASE(CHAIN_APPROX_TC89_KCOS);
+        default: GAPI_Assert(false && "unknown ContourApproximationModes value");
+    }
+#undef CASE
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, MorphTypes op)
+{
+#define CASE(v) case MorphTypes::v: os << #v; break
+    switch (op)
+    {
+        CASE(MORPH_ERODE);
+        CASE(MORPH_DILATE);
+        CASE(MORPH_OPEN);
+        CASE(MORPH_CLOSE);
+        CASE(MORPH_GRADIENT);
+        CASE(MORPH_TOPHAT);
+        CASE(MORPH_BLACKHAT);
+        CASE(MORPH_HITMISS);
+        default: GAPI_Assert(false && "unknown MorphTypes value");
+    }
+#undef CASE
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, DistanceTypes op)
+{
+#define CASE(v) case DistanceTypes::v: os << #v; break
+    switch (op)
+    {
+        CASE(DIST_USER);
+        CASE(DIST_L1);
+        CASE(DIST_L2);
+        CASE(DIST_C);
+        CASE(DIST_L12);
+        CASE(DIST_FAIR);
+        CASE(DIST_WELSCH);
+        CASE(DIST_HUBER);
+        default: GAPI_Assert(false && "unknown DistanceTypes value");
+    }
+#undef CASE
+    return os;
+}
 }  // namespace cv
 
 #endif //OPENCV_GAPI_TESTS_COMMON_HPP
diff --git a/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp b/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp
index 53faa28178..595b63dd1f 100644
--- a/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp
+++ b/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp
@@ -531,7 +531,12 @@ INSTANTIATE_TEST_CASE_P(ParseTestCPU, ParseYoloTest,
                                 Values(CORE_CPU),
                                 Values(0.3f, 0.5f, 0.7f),
                                 Values(0.5f, 1.0f),
-                                Values(80, 7)));
+                                Values(80, 7),
+                                Values(std::make_pair(false, 3),
+                                       std::make_pair(false, 4),
+                                       std::make_pair(true,  2),
+                                       std::make_pair(true,  3),
+                                       std::make_pair(true,  4))));
 
 INSTANTIATE_TEST_CASE_P(SizeTestCPU, SizeTest,
                         Combine(Values(CV_8UC1, CV_8UC3, CV_32FC1),
diff --git a/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp b/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp
index 8a94583fcc..884bf0dbae 100644
--- a/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp
+++ b/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp
@@ -130,6 +130,30 @@ INSTANTIATE_TEST_CASE_P(Dilate3x3TestCPU, Dilate3x3Test,
                                 Values(AbsExact().to_compare_obj()),
                                 Values(1,2,4)));
 
+INSTANTIATE_TEST_CASE_P(MorphologyExTestCPU, MorphologyExTest,
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(cv::MorphTypes::MORPH_ERODE,
+                                       cv::MorphTypes::MORPH_DILATE,
+                                       cv::MorphTypes::MORPH_OPEN,
+                                       cv::MorphTypes::MORPH_CLOSE,
+                                       cv::MorphTypes::MORPH_GRADIENT,
+                                       cv::MorphTypes::MORPH_TOPHAT,
+                                       cv::MorphTypes::MORPH_BLACKHAT)));
+
+INSTANTIATE_TEST_CASE_P(MorphologyExHitMissTestCPU, MorphologyExTest,
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(cv::MorphTypes::MORPH_HITMISS)));
+
 INSTANTIATE_TEST_CASE_P(SobelTestCPU, SobelTest,
                         Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
@@ -241,6 +265,153 @@ INSTANTIATE_TEST_CASE_P(GoodFeaturesInternalTestCPU, GoodFeaturesTest,
                                 Values(3),
                                 Values(true)));
 
+INSTANTIATE_TEST_CASE_P(FindContoursNoOffsetTestCPU, FindContoursNoOffsetTest,
+                        Combine(Values(IMGPROC_CPU),
+                                Values(cv::Size(1280, 720)),
+                                Values(CV_8UC1),
+                                Values(RETR_EXTERNAL),
+                                Values(CHAIN_APPROX_NONE)));
+
+INSTANTIATE_TEST_CASE_P(FindContoursOffsetTestCPU, FindContoursOffsetTest,
+                        Values(IMGPROC_CPU));
+
+INSTANTIATE_TEST_CASE_P(FindContoursHNoOffsetTestCPU, FindContoursHNoOffsetTest,
+                        Combine(Values(IMGPROC_CPU),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(RETR_EXTERNAL, RETR_LIST, RETR_CCOMP, RETR_TREE),
+                                Values(CHAIN_APPROX_NONE, CHAIN_APPROX_SIMPLE,
+                                       CHAIN_APPROX_TC89_L1, CHAIN_APPROX_TC89_KCOS)));
+
+INSTANTIATE_TEST_CASE_P(FindContoursHNoOffset32STestCPU, FindContoursHNoOffsetTest,
+                        Combine(Values(IMGPROC_CPU),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_32SC1),
+                                Values(RETR_CCOMP, RETR_FLOODFILL),
+                                Values(CHAIN_APPROX_NONE, CHAIN_APPROX_SIMPLE,
+                                       CHAIN_APPROX_TC89_L1, CHAIN_APPROX_TC89_KCOS)));
+
+INSTANTIATE_TEST_CASE_P(FindContoursHOffsetTestCPU, FindContoursHOffsetTest,
+                        Values(IMGPROC_CPU));
+
+INSTANTIATE_TEST_CASE_P(BoundingRectMatTestCPU, BoundingRectMatTest,
+                        Combine(Values( CV_8UC1 ),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(IoUToleranceRect(0).to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(BoundingRectMatVector32STestCPU, BoundingRectMatVector32STest,
+                        Combine(Values(-1),
+                                Values(cv::Size(1280, 1),
+                                       cv::Size(128, 1)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(IoUToleranceRect(0).to_compare_obj())));
+
+ INSTANTIATE_TEST_CASE_P(BoundingRectMatVector32FTestCPU, BoundingRectMatVector32FTest,
+                         Combine(Values(-1),
+                                 Values(cv::Size(1280, 1),
+                                        cv::Size(128, 1)),
+                                 Values(-1),
+                                 Values(IMGPROC_CPU),
+                                 Values(IoUToleranceRect(1e-5).to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(BoundingRectVector32STestCPU, BoundingRectVector32STest,
+                        Combine(Values(-1),
+                                Values(cv::Size(1280, 1),
+                                       cv::Size(128, 1)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(IoUToleranceRect(0).to_compare_obj())));
+
+ INSTANTIATE_TEST_CASE_P(BoundingRectVector32FTestCPU, BoundingRectVector32FTest,
+                         Combine(Values(-1),
+                                 Values(cv::Size(1280, 1),
+                                        cv::Size(128, 1)),
+                                 Values(-1),
+                                 Values(IMGPROC_CPU),
+                                 Values(IoUToleranceRect(1e-5).to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(FitLine2DMatVectorTestCPU, FitLine2DMatVectorTest,
+                        Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S,
+                                       CV_32S, CV_32F, CV_64F),
+                                Values(cv::Size(8, 0), cv::Size(1024, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 4>(0.01).to_compare_obj()),
+                                Values(DIST_L1, DIST_L2, DIST_L12, DIST_FAIR,
+                                       DIST_WELSCH, DIST_HUBER)));
+
+INSTANTIATE_TEST_CASE_P(FitLine2DVector32STestCPU, FitLine2DVector32STest,
+                        Combine(Values(-1),
+                                Values(cv::Size(8, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 4>(0.01).to_compare_obj()),
+                                Values(DIST_L1)));
+
+INSTANTIATE_TEST_CASE_P(FitLine2DVector32FTestCPU, FitLine2DVector32FTest,
+                        Combine(Values(-1),
+                                Values(cv::Size(8, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 4>(0.01).to_compare_obj()),
+                                Values(DIST_L1)));
+
+INSTANTIATE_TEST_CASE_P(FitLine2DVector64FTestCPU, FitLine2DVector64FTest,
+                        Combine(Values(-1),
+                                Values(cv::Size(8, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 4>(0.01).to_compare_obj()),
+                                Values(DIST_L1)));
+
+INSTANTIATE_TEST_CASE_P(FitLine3DMatVectorTestCPU, FitLine3DMatVectorTest,
+                        Combine(Values(CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1,
+                                       CV_32SC1, CV_32FC1, CV_64FC1),
+                                Values(cv::Size(8, 0), cv::Size(1024, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 6>(0.01).to_compare_obj()),
+                                Values(DIST_L1, DIST_L2, DIST_L12, DIST_FAIR,
+                                       DIST_WELSCH, DIST_HUBER)));
+
+INSTANTIATE_TEST_CASE_P(FitLine3DVector32STestCPU, FitLine3DVector32STest,
+                        Combine(Values(-1),
+                                Values(cv::Size(8, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 6>(0.01).to_compare_obj()),
+                                Values(DIST_L1)));
+
+INSTANTIATE_TEST_CASE_P(FitLine3DVector32FTestCPU, FitLine3DVector32FTest,
+                        Combine(Values(-1),
+                                Values(cv::Size(8, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 6>(0.01).to_compare_obj()),
+                                Values(DIST_L1)));
+
+INSTANTIATE_TEST_CASE_P(FitLine3DVector64FTestCPU, FitLine3DVector64FTest,
+                        Combine(Values(-1),
+                                Values(cv::Size(8, 0)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(RelDiffToleranceVec<float, 6>(0.01).to_compare_obj()),
+                                Values(DIST_L1)));
+
+INSTANTIATE_TEST_CASE_P(BGR2RGBTestCPU, BGR2RGBTest,
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(RGB2GrayTestCPU, RGB2GrayTest,
                         Combine(Values(CV_8UC3),
@@ -274,6 +445,38 @@ INSTANTIATE_TEST_CASE_P(YUV2RGBTestCPU, YUV2RGBTest,
                                 Values(IMGPROC_CPU),
                                 Values(AbsExact().to_compare_obj())));
 
+INSTANTIATE_TEST_CASE_P(BGR2I420TestCPU, BGR2I420Test,
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(RGB2I420TestCPU, RGB2I420Test,
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(I4202BGRTestCPU, I4202BGRTest,
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(I4202RGBTestCPU, I4202RGBTest,
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
 INSTANTIATE_TEST_CASE_P(NV12toRGBTestCPU, NV12toRGBTest,
                         Combine(Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
diff --git a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_test_utils.hpp b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_test_utils.hpp
index 040e628460..0caf0115f1 100644
--- a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_test_utils.hpp
+++ b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_test_utils.hpp
@@ -22,8 +22,10 @@ struct Name                                      \
 
 namespace opencv_test
 {
-namespace
-{
+
+// types from anonymous namespace doesn't work well with templates
+inline namespace gapi_ocv_stateful_kernel_test_utils {
+
 struct UserStruct
 {
     UserStruct() = default;
@@ -41,7 +43,8 @@ private:
     short _myShortVal;
     float _myFloatVal;
 };
-} // anonymous namespace
+
+} // namespace
 } // opencv_test
 
 #endif // OPENCV_GAPI_OCV_STATEFUL_KERNEL_TESTS_UTILS_HPP
diff --git a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp
index 75ca7989e0..416c141076 100644
--- a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp
+++ b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp
@@ -21,7 +21,7 @@ namespace opencv_test
     {
         std::string method;
     };
-}
+} // namespace opencv_test
 
 namespace cv
 {
@@ -31,11 +31,11 @@ namespace cv
         {
             static const char* tag()
             {
-                return "org.opencv.test..background_substractor_state_params";
+                return "org.opencv.test.background_substractor_state_params";
             }
         };
-    }
-}
+    } // namespace detail
+} // namespace cv
 
 namespace opencv_test
 {
@@ -51,9 +51,9 @@ namespace
         {
             // Since G-API has no own test data (yet), it is taken from the common space
             const char* testDataPath = getenv("OPENCV_TEST_DATA_PATH");
-            GAPI_Assert(testDataPath != nullptr);
-
-            cvtest::addDataSearchPath(testDataPath);
+            if (testDataPath) {
+                cvtest::addDataSearchPath(testDataPath);
+            }
             initialized = true;
         }
 #endif // WINRT
@@ -192,8 +192,12 @@ TEST(StatefulKernel, StateIsAutoResetForNewStream)
     // Compilation & testing
     auto ccomp = c.compileStreaming(cv::compile_args(pkg));
 
-    ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>
-                               (findDataFile("cv/video/768x576.avi")));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     ccomp.start();
     EXPECT_TRUE(ccomp.running());
 
@@ -204,8 +208,12 @@ TEST(StatefulKernel, StateIsAutoResetForNewStream)
     }
     EXPECT_FALSE(ccomp.running());
 
-    ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>
-                               (findDataFile("cv/video/1920x1080.avi")));
+    path = findDataFile("cv/video/1920x1080.avi");
+    try {
+        ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     ccomp.start();
     EXPECT_TRUE(ccomp.running());
 
@@ -335,14 +343,22 @@ TEST(StatefulKernel, StateIsInitViaCompArgsInStreaming)
                            cv::compile_args(pkg, BackSubStateParams { "knn" }));
 
     // Testing G-API Background Substractor in streaming mode
-    gapiBackSub.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>
-                               (findDataFile("cv/video/768x576.avi")));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        gapiBackSub.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     // Allowing 1% difference of all pixels between G-API and reference OpenCV results
     testBackSubInStreaming(gapiBackSub, 1);
 
-    // Additionally, test the case when the new stream happens
-    gapiBackSub.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>
-                               (findDataFile("cv/video/1920x1080.avi")));
+    path = findDataFile("cv/video/1920x1080.avi");
+    try {
+        // Additionally, test the case when the new stream happens
+        gapiBackSub.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     // Allowing 5% difference of all pixels between G-API and reference OpenCV results
     testBackSubInStreaming(gapiBackSub, 5);
 }
diff --git a/modules/gapi/test/gapi_array_tests.cpp b/modules/gapi/test/gapi_array_tests.cpp
index b4c8378799..8bdc0854f0 100644
--- a/modules/gapi/test/gapi_array_tests.cpp
+++ b/modules/gapi/test/gapi_array_tests.cpp
@@ -240,7 +240,8 @@ TEST(GArray_VectorRef, TestMov)
     EXPECT_EQ(V{}, vtest);
 }
 
-namespace {
+// types from anonymous namespace doesn't work well with templates
+inline namespace gapi_array_tests {
     struct MyTestStruct {
         int i;
         float f;
diff --git a/modules/gapi/test/gapi_graph_meta_tests.cpp b/modules/gapi/test/gapi_graph_meta_tests.cpp
new file mode 100644
index 0000000000..1cc4c0c7d8
--- /dev/null
+++ b/modules/gapi/test/gapi_graph_meta_tests.cpp
@@ -0,0 +1,210 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include <tuple>
+#include <unordered_set>
+
+#include "test_precomp.hpp"
+#include "opencv2/gapi/streaming/meta.hpp"
+#include "opencv2/gapi/streaming/cap.hpp"
+
+namespace opencv_test {
+
+namespace {
+void initTestDataPath() {
+#ifndef WINRT
+    static bool initialized = false;
+    if (!initialized)
+    {
+        // Since G-API has no own test data (yet), it is taken from the common space
+        const char* testDataPath = getenv("OPENCV_TEST_DATA_PATH");
+        if (testDataPath != nullptr) {
+            cvtest::addDataSearchPath(testDataPath);
+            initialized = true;
+        }
+    }
+#endif // WINRT
+}
+} // anonymous namespace
+
+TEST(GraphMeta, Trad_AccessInput) {
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::blur(in, cv::Size(3,3));
+    cv::GOpaque<int> out2 = cv::gapi::streaming::meta<int>(in, "foo");
+    cv::GComputation graph(cv::GIn(in), cv::GOut(out1, out2));
+
+    cv::Mat in_mat = cv::Mat::eye(cv::Size(64, 64), CV_8UC1);
+    cv::Mat out_mat;
+    int out_meta = 0;
+
+    // manually set metadata in the input fields
+    auto inputs = cv::gin(in_mat);
+    inputs[0].meta["foo"] = 42;
+
+    graph.apply(std::move(inputs), cv::gout(out_mat, out_meta));
+    EXPECT_EQ(42, out_meta);
+}
+
+TEST(GraphMeta, Trad_AccessTmp) {
+    cv::GMat in;
+    cv::GMat tmp = cv::gapi::blur(in, cv::Size(3,3));
+    cv::GMat out1 = tmp+1;
+    cv::GOpaque<float> out2 = cv::gapi::streaming::meta<float>(tmp, "bar");
+    cv::GComputation graph(cv::GIn(in), cv::GOut(out1, out2));
+
+    cv::Mat in_mat = cv::Mat::eye(cv::Size(64, 64), CV_8UC1);
+    cv::Mat out_mat;
+    float out_meta = 0.f;
+
+    // manually set metadata in the input fields
+    auto inputs = cv::gin(in_mat);
+    inputs[0].meta["bar"] = 1.f;
+
+    graph.apply(std::move(inputs), cv::gout(out_mat, out_meta));
+    EXPECT_EQ(1.f, out_meta);
+}
+
+TEST(GraphMeta, Trad_AccessOutput) {
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::blur(in, cv::Size(3,3));
+    cv::GOpaque<std::string> out2 = cv::gapi::streaming::meta<std::string>(out1, "baz");
+    cv::GComputation graph(cv::GIn(in), cv::GOut(out1, out2));
+
+    cv::Mat in_mat = cv::Mat::eye(cv::Size(64, 64), CV_8UC1);
+    cv::Mat out_mat;
+    std::string out_meta;
+
+    // manually set metadata in the input fields
+    auto inputs = cv::gin(in_mat);
+
+    // NOTE: Assigning explicitly an std::string is important,
+    // otherwise a "const char*" will be stored and won't be
+    // translated properly by util::any since std::string is
+    // used within the graph.
+    inputs[0].meta["baz"] = std::string("opencv");
+
+    graph.apply(std::move(inputs), cv::gout(out_mat, out_meta));
+    EXPECT_EQ("opencv", out_meta);
+}
+
+TEST(GraphMeta, Streaming_AccessInput) {
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::blur(in, cv::Size(3,3));
+    cv::GOpaque<int64_t> out2 = cv::gapi::streaming::seq_id(in);
+    cv::GComputation graph(cv::GIn(in), cv::GOut(out1, out2));
+
+    auto ccomp = graph.compileStreaming();
+    const auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        ccomp.setSource<cv::gapi::wip::GCaptureSource>(path);
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    ccomp.start();
+
+    cv::Mat out_mat;
+    int64_t out_meta = 0;
+    int64_t expected_counter = 0;
+
+    while (ccomp.pull(cv::gout(out_mat, out_meta))) {
+        EXPECT_EQ(expected_counter, out_meta);
+        ++expected_counter;
+    }
+}
+
+TEST(GraphMeta, Streaming_AccessOutput) {
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::blur(in, cv::Size(3,3));
+    cv::GOpaque<int64_t> out2 = cv::gapi::streaming::seq_id(out1);
+    cv::GOpaque<int64_t> out3 = cv::gapi::streaming::timestamp(out1);
+    cv::GComputation graph(cv::GIn(in), cv::GOut(out1, out2, out3));
+
+    auto ccomp = graph.compileStreaming();
+    const auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        ccomp.setSource<cv::gapi::wip::GCaptureSource>(path);
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    ccomp.start();
+
+    cv::Mat out_mat;
+    int64_t out_meta = 0;
+    int64_t out_timestamp = 0;
+    int64_t expected_counter = 0;
+    int64_t prev_timestamp = -1;
+
+    while (ccomp.pull(cv::gout(out_mat, out_meta, out_timestamp))) {
+        EXPECT_EQ(expected_counter, out_meta);
+        ++expected_counter;
+
+        EXPECT_NE(prev_timestamp, out_timestamp);
+        prev_timestamp = out_timestamp;
+    }
+}
+
+TEST(GraphMeta, Streaming_AccessDesync) {
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GOpaque<int64_t> out1 = cv::gapi::streaming::seq_id(in);
+    cv::GOpaque<int64_t> out2 = cv::gapi::streaming::timestamp(in);
+    cv::GMat             out3 = cv::gapi::blur(in, cv::Size(3,3));
+
+    cv::GMat tmp = cv::gapi::streaming::desync(in);
+    cv::GScalar mean = cv::gapi::mean(tmp);
+    cv::GOpaque<int64_t> out4 = cv::gapi::streaming::seq_id(mean);
+    cv::GOpaque<int64_t> out5 = cv::gapi::streaming::timestamp(mean);
+    cv::GComputation graph(cv::GIn(in), cv::GOut(out1, out2, out3, out4, out5));
+
+    auto ccomp = graph.compileStreaming();
+    const auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        ccomp.setSource<cv::gapi::wip::GCaptureSource>(path);
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    ccomp.start();
+
+    cv::optional<int64_t> out_sync_id;
+    cv::optional<int64_t> out_sync_ts;
+    cv::optional<cv::Mat> out_sync_mat;
+
+    cv::optional<int64_t> out_desync_id;
+    cv::optional<int64_t> out_desync_ts;
+
+    std::unordered_set<int64_t> sync_ids;
+    std::unordered_set<int64_t> desync_ids;
+
+    while (ccomp.pull(cv::gout(out_sync_id, out_sync_ts, out_sync_mat,
+                               out_desync_id, out_desync_ts))) {
+        if (out_sync_id.has_value()) {
+            CV_Assert(out_sync_ts.has_value());
+            CV_Assert(out_sync_mat.has_value());
+            sync_ids.insert(out_sync_id.value());
+        }
+        if (out_desync_id.has_value()) {
+            CV_Assert(out_desync_ts.has_value());
+            desync_ids.insert(out_desync_id.value());
+        }
+    }
+    // Visually report that everything is really ok
+    std::cout << sync_ids.size() << " vs " << desync_ids.size() << std::endl;
+
+    // Desync path should generate less objects than the synchronized one
+    EXPECT_GE(sync_ids.size(), desync_ids.size());
+
+    // ..but all desynchronized IDs must be present in the synchronized set
+    for (auto &&d_id : desync_ids) {
+        EXPECT_TRUE(sync_ids.count(d_id) > 0);
+    }
+}
+
+} // namespace opencv_test
diff --git a/modules/gapi/test/gapi_opaque_tests.cpp b/modules/gapi/test/gapi_opaque_tests.cpp
index 4cadb918b8..de3572c4bd 100644
--- a/modules/gapi/test/gapi_opaque_tests.cpp
+++ b/modules/gapi/test/gapi_opaque_tests.cpp
@@ -284,7 +284,8 @@ TEST(GOpaque_OpaqueRef, TestMov)
     EXPECT_NE(test, mov.rref<I>());         // ref lost the data
 }
 
-namespace {
+// types from anonymous namespace doesn't work well with templates
+inline namespace gapi_opaque_tests {
     struct MyTestStruct {
         int i;
         float f;
diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp
index 74d8558909..547c7c7d33 100644
--- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp
+++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp
@@ -350,6 +350,161 @@ TEST(DISABLED_TestTwoIENNPipeline, InferBasicImage)
     normAssert(cv::gapi::ie::util::to_ocv(ie_gender2), gapi_gender2, "Test gender output 2");
 }
 
+TEST(TestAgeGenderIE, GenericInfer)
+{
+    initDLDTDataPath();
+
+    cv::gapi::ie::detail::ParamDesc params;
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.device_id = "CPU";
+
+    cv::Mat in_mat(cv::Size(320, 240), CV_8UC3);
+    cv::randu(in_mat, 0, 255);
+
+    cv::Mat gapi_age, gapi_gender;
+
+    // Load & run IE network
+    IE::Blob::Ptr ie_age, ie_gender;
+    {
+        auto plugin = cv::gimpl::ie::wrap::getPlugin(params);
+        auto net    = cv::gimpl::ie::wrap::readNetwork(params);
+        setNetParameters(net);
+        auto this_network  = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params);
+        auto infer_request = this_network.CreateInferRequest();
+        infer_request.SetBlob("data", cv::gapi::ie::util::to_ie(in_mat));
+        infer_request.Infer();
+        ie_age    = infer_request.GetBlob("age_conv3");
+        ie_gender = infer_request.GetBlob("prob");
+    }
+
+    // Configure & run G-API
+    cv::GMat in;
+    GInferInputs inputs;
+    inputs["data"] = in;
+
+    auto outputs = cv::gapi::infer<cv::gapi::Generic>("age-gender-generic", inputs);
+
+    auto age    = outputs.at("age_conv3");
+    auto gender = outputs.at("prob");
+
+    cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender));
+
+    cv::gapi::ie::Params<cv::gapi::Generic> pp{"age-gender-generic",
+                                                params.model_path,
+                                                params.weights_path,
+                                                params.device_id};
+
+    comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender),
+               cv::compile_args(cv::gapi::networks(pp)));
+
+    // Validate with IE itself (avoid DNN module dependency here)
+    normAssert(cv::gapi::ie::util::to_ocv(ie_age),    gapi_age,    "Test age output"   );
+    normAssert(cv::gapi::ie::util::to_ocv(ie_gender), gapi_gender, "Test gender output");
+}
+
+TEST(TestAgeGenderIE, InvalidConfigGeneric)
+{
+    initDLDTDataPath();
+
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string device_id    = "CPU";
+
+    // Configure & run G-API
+    cv::GMat in;
+    GInferInputs inputs;
+    inputs["data"] = in;
+
+    auto outputs = cv::gapi::infer<cv::gapi::Generic>("age-gender-generic", inputs);
+    auto age     = outputs.at("age_conv3");
+    auto gender  = outputs.at("prob");
+    cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender));
+
+    auto pp = cv::gapi::ie::Params<cv::gapi::Generic>{"age-gender-generic",
+                                                       model_path,
+                                                       weights_path,
+                                                       device_id}.pluginConfig({{"unsupported_config", "some_value"}});
+
+    EXPECT_ANY_THROW(comp.compile(cv::GMatDesc{CV_8U,3,cv::Size{320, 240}},
+                     cv::compile_args(cv::gapi::networks(pp))));
+}
+
+TEST(TestAgeGenderIE, CPUConfigGeneric)
+{
+    initDLDTDataPath();
+
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string device_id    = "CPU";
+
+    // Configure & run G-API
+    cv::GMat in;
+    GInferInputs inputs;
+    inputs["data"] = in;
+
+    auto outputs = cv::gapi::infer<cv::gapi::Generic>("age-gender-generic", inputs);
+    auto age     = outputs.at("age_conv3");
+    auto gender  = outputs.at("prob");
+    cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender));
+
+    auto pp = cv::gapi::ie::Params<cv::gapi::Generic>{"age-gender-generic",
+                                                       model_path,
+                                                       weights_path,
+                                                       device_id}.pluginConfig({{"ENFORCE_BF16", "NO"}});
+
+    EXPECT_NO_THROW(comp.compile(cv::GMatDesc{CV_8U,3,cv::Size{320, 240}},
+                    cv::compile_args(cv::gapi::networks(pp))));
+}
+
+TEST(TestAgeGenderIE, InvalidConfig)
+{
+    initDLDTDataPath();
+
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string device_id    = "CPU";
+
+    using AGInfo = std::tuple<cv::GMat, cv::GMat>;
+    G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "test-age-gender");
+
+    cv::GMat in;
+    cv::GMat age, gender;
+    std::tie(age, gender) = cv::gapi::infer<AgeGender>(in);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender));
+
+    auto pp = cv::gapi::ie::Params<AgeGender> {
+        model_path, weights_path, device_id
+    }.cfgOutputLayers({ "age_conv3", "prob" }).pluginConfig({{"unsupported_config", "some_value"}});
+
+    EXPECT_ANY_THROW(comp.compile(cv::GMatDesc{CV_8U,3,cv::Size{320, 240}},
+                     cv::compile_args(cv::gapi::networks(pp))));
+}
+
+TEST(TestAgeGenderIE, CPUConfig)
+{
+    initDLDTDataPath();
+
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string device_id    = "CPU";
+
+    using AGInfo = std::tuple<cv::GMat, cv::GMat>;
+    G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "test-age-gender");
+
+    cv::GMat in;
+    cv::GMat age, gender;
+    std::tie(age, gender) = cv::gapi::infer<AgeGender>(in);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender));
+
+    auto pp = cv::gapi::ie::Params<AgeGender> {
+        model_path, weights_path, device_id
+    }.cfgOutputLayers({ "age_conv3", "prob" }).pluginConfig({{"ENFORCE_BF16", "NO"}});
+
+    EXPECT_NO_THROW(comp.compile(cv::GMatDesc{CV_8U,3,cv::Size{320, 240}},
+                    cv::compile_args(cv::gapi::networks(pp))));
+}
+
 } // namespace opencv_test
 
 #endif //  HAVE_INF_ENGINE
diff --git a/modules/gapi/test/infer/gapi_infer_onnx_test.cpp b/modules/gapi/test/infer/gapi_infer_onnx_test.cpp
new file mode 100644
index 0000000000..782e1b093a
--- /dev/null
+++ b/modules/gapi/test/infer/gapi_infer_onnx_test.cpp
@@ -0,0 +1,473 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "../test_precomp.hpp"
+
+#ifdef HAVE_ONNX
+
+#include <stdexcept>
+#include <onnxruntime_cxx_api.h>
+#include <ade/util/iota_range.hpp>
+
+#include <opencv2/gapi/own/convert.hpp>
+#include <opencv2/gapi/infer/onnx.hpp>
+
+namespace {
+struct ONNXInitPath {
+    ONNXInitPath() {
+        const char* env_path = getenv("OPENCV_GAPI_ONNX_MODEL_PATH");
+        if (env_path) {
+            cvtest::addDataSearchPath(env_path);
+        }
+    }
+};
+static ONNXInitPath g_init_path;
+
+cv::Mat initMatrixRandU(const int type, const cv::Size& sz_in) {
+    const cv::Mat in_mat1 = cv::Mat(sz_in, type);
+
+    if (CV_MAT_DEPTH(type) < CV_32F) {
+        cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
+    } else {
+        const int fscale = 256;  // avoid bits near ULP, generate stable test input
+        cv::Mat in_mat32s(in_mat1.size(), CV_MAKE_TYPE(CV_32S, CV_MAT_CN(type)));
+        cv::randu(in_mat32s, cv::Scalar::all(0), cv::Scalar::all(255 * fscale));
+        in_mat32s.convertTo(in_mat1, type, 1.0f / fscale, 0);
+    }
+    return in_mat1;
+}
+} // anonymous namespace
+namespace opencv_test
+{
+namespace {
+// FIXME: taken from the DNN module
+void normAssert(const cv::InputArray& ref, const cv::InputArray& test,
+                const char *comment /*= ""*/,
+                const double l1 = 0.00001, const double lInf = 0.0001) {
+    const double normL1 = cvtest::norm(ref, test, cv::NORM_L1) / ref.getMat().total();
+    EXPECT_LE(normL1, l1) << comment;
+
+    const double normInf = cvtest::norm(ref, test, cv::NORM_INF);
+    EXPECT_LE(normInf, lInf) << comment;
+}
+
+inline std::string findModel(const std::string &model_name) {
+    return findDataFile("vision/" + model_name + ".onnx", false);
+}
+
+inline void toCHW(const cv::Mat& src, cv::Mat& dst) {
+    dst.create(cv::Size(src.cols, src.rows * src.channels()), CV_32F);
+    std::vector<cv::Mat> planes;
+    for (int i = 0; i < src.channels(); ++i) {
+        planes.push_back(dst.rowRange(i * src.rows, (i + 1) * src.rows));
+    }
+    cv::split(src, planes);
+}
+
+inline int toCV(const ONNXTensorElementDataType prec) {
+    switch (prec) {
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: return CV_8U;
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: return CV_32F;
+    default: GAPI_Assert(false && "Unsupported data type");
+    }
+    return -1;
+}
+
+inline std::vector<int64_t> toORT(const cv::MatSize &sz) {
+    return cv::to_own<int64_t>(sz);
+}
+
+inline std::vector<const char*> getCharNames(const std::vector<std::string>& names) {
+    std::vector<const char*> out_vec;
+    for (const auto& el : names) {
+        out_vec.push_back(el.data());
+    }
+    return out_vec;
+}
+
+inline void copyToOut(const cv::Mat& in, cv::Mat& out) {
+    GAPI_Assert(in.depth() == CV_32F);
+    GAPI_Assert(in.size == out.size);
+    const float* const inptr = in.ptr<float>();
+    float* const optr = out.ptr<float>();
+    const int size = in.total();
+    for (int i = 0; i < size; ++i) {
+        optr[i] = inptr[i];
+    }
+}
+
+void remapYolo(const std::unordered_map<std::string, cv::Mat> &onnx,
+                      std::unordered_map<std::string, cv::Mat> &gapi) {
+    GAPI_Assert(onnx.size() == 1u);
+    GAPI_Assert(gapi.size() == 1u);
+    // Result from Run method
+    const cv::Mat& in = onnx.begin()->second;
+    // Configured output
+    cv::Mat& out = gapi.begin()->second;
+    // Simple copy
+    copyToOut(in, out);
+}
+
+void remapSsdPorts(const std::unordered_map<std::string, cv::Mat> &onnx,
+                           std::unordered_map<std::string, cv::Mat> &gapi) {
+    // Result from Run method
+    const cv::Mat& in_num     = onnx.at("num_detections:0");
+    const cv::Mat& in_boxes   = onnx.at("detection_boxes:0");
+    const cv::Mat& in_scores  = onnx.at("detection_scores:0");
+    const cv::Mat& in_classes = onnx.at("detection_classes:0");
+    // Configured outputs
+    cv::Mat& out_boxes   = gapi.at("out1");
+    cv::Mat& out_classes = gapi.at("out2");
+    cv::Mat& out_scores  = gapi.at("out3");
+    cv::Mat& out_num     = gapi.at("out4");
+    // Simple copy for outputs
+    copyToOut(in_num, out_num);
+    copyToOut(in_boxes, out_boxes);
+    copyToOut(in_scores, out_scores);
+    copyToOut(in_classes, out_classes);
+}
+
+class ONNXtest : public ::testing::Test {
+public:
+    std::string model_path;
+    size_t num_in, num_out;
+    std::vector<cv::Mat> out_gapi;
+    std::vector<cv::Mat> out_onnx;
+    cv::Mat in_mat1;
+
+    ONNXtest() {
+        env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test");
+        memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+        out_gapi.resize(1);
+        out_onnx.resize(1);
+        // FIXME: All tests chek "random" image
+        // Ideally it should be a real image
+        in_mat1 = initMatrixRandU(CV_8UC3, cv::Size{640, 480});
+    }
+
+    template<typename T>
+    void infer(const std::vector<cv::Mat>& ins,
+                     std::vector<cv::Mat>& outs) {
+        // Prepare session
+        session = Ort::Session(env, model_path.data(), session_options);
+        num_in = session.GetInputCount();
+        num_out = session.GetOutputCount();
+        GAPI_Assert(num_in == ins.size());
+        in_node_names.clear();
+        out_node_names.clear();
+        // Inputs Run params
+        std::vector<Ort::Value> in_tensors;
+        for(size_t i = 0; i < num_in; ++i) {
+            char* in_node_name_p = session.GetInputName(i, allocator);
+            in_node_names.push_back(std::string(in_node_name_p));
+            allocator.Free(in_node_name_p);
+            in_node_dims = toORT(ins[i].size);
+            in_tensors.emplace_back(Ort::Value::CreateTensor<T>(memory_info,
+                                                                const_cast<T*>(ins[i].ptr<T>()),
+                                                                ins[i].total(),
+                                                                in_node_dims.data(),
+                                                                in_node_dims.size()));
+        }
+        // Outputs Run params
+        for(size_t i = 0; i < num_out; ++i) {
+            char* out_node_name_p = session.GetOutputName(i, allocator);
+            out_node_names.push_back(std::string(out_node_name_p));
+            allocator.Free(out_node_name_p);
+        }
+        // Input/output order by names
+        const auto in_run_names  = getCharNames(in_node_names);
+        const auto out_run_names = getCharNames(out_node_names);
+        // Run
+        auto result = session.Run(Ort::RunOptions{nullptr},
+                                  in_run_names.data(),
+                                  &in_tensors.front(),
+                                  num_in,
+                                  out_run_names.data(),
+                                  num_out);
+        // Copy outputs
+        GAPI_Assert(result.size() == num_out);
+        outs.resize(num_out);
+        for (size_t i = 0; i < num_out; ++i) {
+            const auto info = result[i].GetTensorTypeAndShapeInfo();
+            const auto shape = info.GetShape();
+            const auto type = info.GetElementType();
+            cv::Mat mt(std::vector<int>(shape.begin(), shape.end()), toCV(type),
+                       reinterpret_cast<void*>(result[i].GetTensorMutableData<uint8_t*>()));
+            mt.copyTo(outs[i]);
+        }
+    }
+    // One input/output overload
+    template<typename T>
+    void infer(const cv::Mat& in, cv::Mat& out) {
+        std::vector<cv::Mat> result;
+        infer<T>({in}, result);
+        GAPI_Assert(result.size() == 1u);
+        out = result.front();
+    }
+
+    void validate() {
+        GAPI_Assert(!out_gapi.empty() && !out_onnx.empty());
+        ASSERT_EQ(out_gapi.size(), out_onnx.size());
+        const auto size = out_gapi.size();
+        for (size_t i = 0; i < size; ++i) {
+            normAssert(out_onnx[i], out_gapi[i], "Test outputs");
+        }
+    }
+
+    void useModel(const std::string& model_name) {
+        model_path = findModel(model_name);
+    }
+private:
+    Ort::Env env{nullptr};
+    Ort::MemoryInfo memory_info{nullptr};
+    Ort::AllocatorWithDefaultOptions allocator;
+    Ort::SessionOptions session_options;
+    Ort::Session session{nullptr};
+
+    std::vector<int64_t> in_node_dims;
+    std::vector<std::string> in_node_names;
+    std::vector<std::string> out_node_names;
+};
+
+class ONNXClassificationTest : public ONNXtest {
+public:
+    const cv::Scalar mean = { 0.485, 0.456, 0.406 };
+    const cv::Scalar std  = { 0.229, 0.224, 0.225 };
+
+    void preprocess(const cv::Mat& src, cv::Mat& dst) {
+        const int new_h = 224;
+        const int new_w = 224;
+        cv::Mat tmp, cvt, rsz;
+        cv::resize(src, rsz, cv::Size(new_w, new_h));
+        rsz.convertTo(cvt, CV_32F, 1.f / 255);
+        tmp = (cvt - mean) / std;
+        toCHW(tmp, dst);
+        dst = dst.reshape(1, {1, 3, new_h, new_w});
+    }
+};
+
+class ONNXGRayScaleTest : public ONNXtest {
+public:
+    void preprocess(const cv::Mat& src, cv::Mat& dst) {
+        const int new_h = 64;
+        const int new_w = 64;
+        cv::Mat cvc, rsz, cvt;
+        cv::cvtColor(src, cvc, cv::COLOR_BGR2GRAY);
+        cv::resize(cvc, rsz, cv::Size(new_w, new_h));
+        rsz.convertTo(cvt, CV_32F);
+        toCHW(cvt, dst);
+        dst = dst.reshape(1, {1, 1, new_h, new_w});
+    }
+};
+} // anonymous namespace
+
+TEST_F(ONNXClassificationTest, Infer)
+{
+    useModel("classification/squeezenet/model/squeezenet1.0-9");
+    // ONNX_API code
+    cv::Mat processed_mat;
+    preprocess(in_mat1, processed_mat);
+    infer<float>(processed_mat, out_onnx.front());
+    // G_API code
+    G_API_NET(SqueezNet, <cv::GMat(cv::GMat)>, "squeeznet");
+    cv::GMat in;
+    cv::GMat out = cv::gapi::infer<SqueezNet>(in);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out));
+    // NOTE: We have to normalize U8 tensor
+    // so cfgMeanStd() is here
+    auto net = cv::gapi::onnx::Params<SqueezNet> { model_path }.cfgMeanStd({ mean }, { std });
+    comp.apply(cv::gin(in_mat1),
+               cv::gout(out_gapi.front()),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+
+TEST_F(ONNXtest, InferTensor)
+{
+    useModel("classification/squeezenet/model/squeezenet1.0-9");
+    // Create tensor
+    // FIXME: Test cheks "random" image
+    // Ideally it should be a real image
+    const cv::Mat rand_mat = initMatrixRandU(CV_32FC3, cv::Size{224, 224});
+    const std::vector<int> dims = {1, rand_mat.channels(), rand_mat.rows, rand_mat.cols};
+    const cv::Mat tensor(dims, CV_32F, rand_mat.data);
+    // ONNX_API code
+    infer<float>(tensor, out_onnx.front());
+    // G_API code
+    G_API_NET(SqueezNet, <cv::GMat(cv::GMat)>, "squeeznet");
+    cv::GMat in;
+    cv::GMat out = cv::gapi::infer<SqueezNet>(in);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out));
+    auto net = cv::gapi::onnx::Params<SqueezNet> { model_path };
+    comp.apply(cv::gin(tensor),
+               cv::gout(out_gapi.front()),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+
+TEST_F(ONNXClassificationTest, InferROI)
+{
+    useModel("classification/squeezenet/model/squeezenet1.0-9");
+    const cv::Rect ROI(cv::Point{0, 0}, cv::Size{250, 250});
+    // ONNX_API code
+    cv::Mat roi_mat;
+    preprocess(in_mat1(ROI), roi_mat);
+    infer<float>(roi_mat, out_onnx.front());
+    // G_API code
+    G_API_NET(SqueezNet, <cv::GMat(cv::GMat)>, "squeeznet");
+    cv::GMat in;
+    cv::GOpaque<cv::Rect> rect;
+    cv::GMat out = cv::gapi::infer<SqueezNet>(rect, in);
+    cv::GComputation comp(cv::GIn(in, rect), cv::GOut(out));
+    // NOTE: We have to normalize U8 tensor
+    // so cfgMeanStd() is here
+    auto net = cv::gapi::onnx::Params<SqueezNet> { model_path }.cfgMeanStd({ mean }, { std });
+    comp.apply(cv::gin(in_mat1, ROI),
+               cv::gout(out_gapi.front()),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+
+TEST_F(ONNXClassificationTest, InferROIList)
+{
+    useModel("classification/squeezenet/model/squeezenet1.0-9");
+    const std::vector<cv::Rect> rois = {
+        cv::Rect(cv::Point{ 0,   0}, cv::Size{80, 120}),
+        cv::Rect(cv::Point{50, 100}, cv::Size{250, 360}),
+    };
+    // ONNX_API code
+    out_onnx.resize(rois.size());
+    for (size_t i = 0; i < rois.size(); ++i) {
+        cv::Mat roi_mat;
+        preprocess(in_mat1(rois[i]), roi_mat);
+        infer<float>(roi_mat, out_onnx[i]);
+    }
+    // G_API code
+    G_API_NET(SqueezNet, <cv::GMat(cv::GMat)>, "squeeznet");
+    cv::GMat in;
+    cv::GArray<cv::Rect> rr;
+    cv::GArray<cv::GMat> out = cv::gapi::infer<SqueezNet>(rr, in);
+    cv::GComputation comp(cv::GIn(in, rr), cv::GOut(out));
+    // NOTE: We have to normalize U8 tensor
+    // so cfgMeanStd() is here
+    auto net = cv::gapi::onnx::Params<SqueezNet> { model_path }.cfgMeanStd({ mean }, { std });
+    comp.apply(cv::gin(in_mat1, rois),
+               cv::gout(out_gapi),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+
+TEST_F(ONNXClassificationTest, Infer2ROIList)
+{
+    useModel("classification/squeezenet/model/squeezenet1.0-9");
+    const std::vector<cv::Rect> rois = {
+        cv::Rect(cv::Point{ 0,   0}, cv::Size{80, 120}),
+        cv::Rect(cv::Point{50, 100}, cv::Size{250, 360}),
+    };
+    // ONNX_API code
+    out_onnx.resize(rois.size());
+    for (size_t i = 0; i < rois.size(); ++i) {
+        cv::Mat roi_mat;
+        preprocess(in_mat1(rois[i]), roi_mat);
+        infer<float>(roi_mat, out_onnx[i]);
+    }
+    // G_API code
+    G_API_NET(SqueezNet, <cv::GMat(cv::GMat)>, "squeeznet");
+    cv::GMat in;
+    cv::GArray<cv::Rect> rr;
+    cv::GArray<cv::GMat> out = cv::gapi::infer2<SqueezNet>(in, rr);
+    cv::GComputation comp(cv::GIn(in, rr), cv::GOut(out));
+    // NOTE: We have to normalize U8 tensor
+    // so cfgMeanStd() is here
+    auto net = cv::gapi::onnx::Params<SqueezNet> { model_path }.cfgMeanStd({ mean }, { std });
+    comp.apply(cv::gin(in_mat1, rois),
+               cv::gout(out_gapi),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+
+TEST_F(ONNXtest, InferDynamicInputTensor)
+{
+    useModel("object_detection_segmentation/tiny-yolov2/model/tinyyolov2-8");
+    // Create tensor
+    // FIXME: Test cheks "random" image
+    // Ideally it should be a real image
+    const cv::Mat rand_mat = initMatrixRandU(CV_32FC3, cv::Size{416, 416});
+    const std::vector<int> dims = {1, rand_mat.channels(), rand_mat.rows, rand_mat.cols};
+    cv::Mat tensor(dims, CV_32F, rand_mat.data);
+    const cv::Mat in_tensor = tensor / 255.f;
+    // ONNX_API code
+    infer<float>(in_tensor, out_onnx.front());
+    // G_API code
+    G_API_NET(YoloNet, <cv::GMat(cv::GMat)>, "YoloNet");
+    cv::GMat in;
+    cv::GMat out = cv::gapi::infer<YoloNet>(in);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out));
+    auto net = cv::gapi::onnx::Params<YoloNet>{model_path}
+        .cfgPostProc({cv::GMatDesc{CV_32F, {1, 125, 13, 13}}}, remapYolo)
+        .cfgOutputLayers({"out"});
+    comp.apply(cv::gin(in_tensor),
+               cv::gout(out_gapi.front()),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+
+TEST_F(ONNXGRayScaleTest, InferImage)
+{
+    useModel("body_analysis/emotion_ferplus/model/emotion-ferplus-8");
+    // ONNX_API code
+    cv::Mat prep_mat;
+    preprocess(in_mat1, prep_mat);
+    infer<float>(prep_mat, out_onnx.front());
+    // G_API code
+    G_API_NET(EmotionNet, <cv::GMat(cv::GMat)>, "emotion-ferplus");
+    cv::GMat in;
+    cv::GMat out = cv::gapi::infer<EmotionNet>(in);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out));
+    auto net = cv::gapi::onnx::Params<EmotionNet> { model_path }
+        .cfgNormalize({ false }); // model accepts 0..255 range in FP32;
+    comp.apply(cv::gin(in_mat1),
+               cv::gout(out_gapi.front()),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+
+TEST_F(ONNXtest, InferMultOutput)
+{
+    useModel("object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_10");
+    // ONNX_API code
+    const auto prep_mat = in_mat1.reshape(1, {1, in_mat1.rows, in_mat1.cols, in_mat1.channels()});
+    infer<uint8_t>({prep_mat}, out_onnx);
+    // G_API code
+    using SSDOut = std::tuple<cv::GMat, cv::GMat, cv::GMat, cv::GMat>;
+    G_API_NET(MobileNet, <SSDOut(cv::GMat)>, "ssd_mobilenet");
+    cv::GMat in;
+    cv::GMat out1, out2, out3, out4;
+    std::tie(out1, out2, out3, out4) = cv::gapi::infer<MobileNet>(in);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out1, out2, out3, out4));
+    auto net = cv::gapi::onnx::Params<MobileNet>{model_path}
+        .cfgOutputLayers({"out1", "out2", "out3", "out4"})
+        .cfgPostProc({cv::GMatDesc{CV_32F, {1, 100, 4}},
+                      cv::GMatDesc{CV_32F, {1, 100}},
+                      cv::GMatDesc{CV_32F, {1, 100}},
+                      cv::GMatDesc{CV_32F, {1, 1}}}, remapSsdPorts);
+    out_gapi.resize(num_out);
+    comp.apply(cv::gin(in_mat1),
+               cv::gout(out_gapi[0], out_gapi[1], out_gapi[2], out_gapi[3]),
+               cv::compile_args(cv::gapi::networks(net)));
+    // Validate
+    validate();
+}
+} // namespace opencv_test
+
+#endif //  HAVE_ONNX
diff --git a/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp b/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
index f6543e59f7..c9d9926542 100644
--- a/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
+++ b/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2020 Intel Corporation
 
 
 #include "../test_precomp.hpp"
@@ -29,7 +29,9 @@ namespace
                                     , ""
                                     , nullptr
                                     , { GShape::GMAT }
-                                    , { D::OpaqueKind::CV_UNKNOWN } }).pass(m).yield(0);
+                                    , { D::OpaqueKind::CV_UNKNOWN }
+                                    , { cv::detail::HostCtor{cv::util::monostate{}} }
+                                    }).pass(m).yield(0);
     }
 
     cv::GMat binaryOp(cv::GMat m1, cv::GMat m2)
@@ -38,7 +40,9 @@ namespace
                                     , ""
                                     , nullptr
                                     , { GShape::GMAT }
-                                    , { D::OpaqueKind::CV_UNKNOWN, D::OpaqueKind::CV_UNKNOWN } }).pass(m1, m2).yield(0);
+                                    , { D::OpaqueKind::CV_UNKNOWN, D::OpaqueKind::CV_UNKNOWN }
+                                    , { cv::detail::HostCtor{cv::util::monostate{}} }
+                                    }).pass(m1, m2).yield(0);
     }
 
     std::vector<ade::NodeHandle> collectOperations(const cv::gimpl::GModel::Graph& gr)
diff --git a/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp b/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
index c247cc7b79..723e42a6df 100644
--- a/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
+++ b/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
@@ -513,7 +513,65 @@ TEST(IslandFusion, Regression_ShouldFuseAll)
     EXPECT_EQ(1u, isl_nhs.size());  // 1 island
 }
 
-// FIXME: add more tests on mixed (hetero) graphs
+TEST(IslandFusion, Test_Desync_NoFuse)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = in*0.5f;
+    cv::GMat tmp2 = tmp1 + in;
+
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat tmp4 = tmp3*0.1f;
+
+    const auto in_meta = cv::GMetaArg(cv::GMatDesc{CV_8U,1,cv::Size(32,32)});
+    cv::GComputation comp(cv::GIn(in), cv::GOut(tmp2, tmp4));
+
+    //////////////////////////////////////////////////////////////////
+    // Compile the graph in "regular" mode, it should produce a single island
+    {
+        using namespace cv::gimpl;
+
+        GCompiler compiler(comp, {in_meta}, cv::compile_args());
+        GCompiler::GPtr graph = compiler.generateGraph();
+        compiler.runPasses(*graph);
+
+        auto isl_model = GModel::ConstGraph(*graph).metadata()
+            .get<IslandModel>().model;
+        GIslandModel::ConstGraph gim(*isl_model);
+
+        const auto is_island = [&](ade::NodeHandle nh) {
+            return (NodeKind::ISLAND == gim.metadata(nh).get<NodeKind>().k);
+        };
+        const auto num_isl = std::count_if(gim.nodes().begin(),
+                                           gim.nodes().end(),
+                                           is_island);
+        EXPECT_EQ(1, num_isl);
+    }
+    //////////////////////////////////////////////////////////////////
+    // Now compile the graph in the streaming mode.
+    // It has to produce two islands
+    {
+        using namespace cv::gimpl;
+
+        GCompiler compiler(comp, {in_meta}, cv::compile_args());
+        GCompiler::GPtr graph = compiler.generateGraph();
+        GModel::Graph(*graph).metadata().set(Streaming{});
+        compiler.runPasses(*graph);
+
+        auto isl_model = GModel::ConstGraph(*graph).metadata()
+             .get<IslandModel>().model;
+        GIslandModel::ConstGraph gim(*isl_model);
+
+        const auto is_island = [&](ade::NodeHandle nh) {
+            return (NodeKind::ISLAND == gim.metadata(nh).get<NodeKind>().k);
+        };
+        const auto num_isl = std::count_if(gim.nodes().begin(),
+                                           gim.nodes().end(),
+                                           is_island);
+        EXPECT_EQ(2, num_isl);
+    }
+}
+
+// Fixme: add more tests on mixed (hetero) graphs
 // ADE-222, ADE-223
 
 // FIXME: add test on combination of user-specified island
diff --git a/modules/gapi/test/internal/gapi_transactions_test.cpp b/modules/gapi/test/internal/gapi_transactions_test.cpp
index ac77c33d13..9d36401a71 100644
--- a/modules/gapi/test/internal/gapi_transactions_test.cpp
+++ b/modules/gapi/test/internal/gapi_transactions_test.cpp
@@ -2,11 +2,14 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018 - 2020 Intel Corporation
 
 
 #include "../test_precomp.hpp"
+
 #include <ade/graph.hpp>
+#include <ade/typed_graph.hpp>
+
 #include "compiler/transactions.hpp"
 
 namespace opencv_test
@@ -33,10 +36,11 @@ struct SimpleGraph
 
     enum { node_nums = 5 };
     ade::Graph        graph;
-    ade::NodeHandle   fused_nh;                     /* For check that fusion  node is connected to the
-                                                               inputs of the prod and the outputs of the cons */
+    ade::NodeHandle   fused_nh;  // For check that fusion  node is connected to the
+                                 // inputs of the prod and the outputs of the cons
     std::array<ade::NodeHandle, node_nums>     nhs;
     std::array<ade::EdgeHandle, node_nums - 1> ehs;
+    using Change = ChangeT<>;
     Change::List changes;
 
     SimpleGraph()
@@ -192,8 +196,6 @@ TEST_F(Transactions, DropNode_Commit)
 
 TEST_F(Transactions, Fusion_Commit)
 {
-    namespace C = Change;
-
     fuse();
     commit();
 
@@ -204,8 +206,6 @@ TEST_F(Transactions, Fusion_Commit)
 
 TEST_F(Transactions, Fusion_RollBack)
 {
-    namespace C = Change;
-
     fuse();
     rollback();
 
@@ -219,4 +219,151 @@ TEST_F(Transactions, Fusion_RollBack)
     }
 }
 
+namespace
+{
+    struct MetaInt {
+        static const char *name() { return "int_meta"; }
+        int x;
+    };
+
+    struct MetaStr {
+        static const char *name() { return "string_meta"; }
+        std::string s;
+    };
+}
+
+TEST(PreservedMeta, TestMetaCopy_Full)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto src_nh = tg.createNode();
+    tg.metadata(src_nh).set(MetaInt{42});
+    tg.metadata(src_nh).set(MetaStr{"hi"});
+
+    auto dst_nh = tg.createNode();
+
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    // Here we specify all the meta types we know about the src node
+    // Assume Preserved copies its all for us
+    Preserved<ade::NodeHandle, MetaInt, MetaStr>(g, src_nh).copyTo(g, dst_nh);
+
+    ASSERT_TRUE(tg.metadata(dst_nh).contains<MetaInt>());
+    ASSERT_TRUE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    EXPECT_EQ(42,   tg.metadata(dst_nh).get<MetaInt>().x);
+    EXPECT_EQ("hi", tg.metadata(dst_nh).get<MetaStr>().s);
+}
+
+
+TEST(PreservedMeta, TestMetaCopy_Partial_Dst)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto tmp_nh1 = tg.createNode();
+    auto tmp_nh2 = tg.createNode();
+    auto src_eh  = tg.link(tmp_nh1, tmp_nh2);
+
+    tg.metadata(src_eh).set(MetaInt{42});
+    tg.metadata(src_eh).set(MetaStr{"hi"});
+
+    auto tmp_nh3 = tg.createNode();
+    auto tmp_nh4 = tg.createNode();
+    auto dst_eh  = tg.link(tmp_nh3, tmp_nh4);
+
+    EXPECT_FALSE(tg.metadata(dst_eh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_eh).contains<MetaStr>());
+
+    // Here we specify just a single meta type for the src node
+    // Assume Preserved copies only this type and nothing else
+    Preserved<ade::EdgeHandle, MetaStr>(g, src_eh).copyTo(g, dst_eh);
+
+    ASSERT_FALSE(tg.metadata(dst_eh).contains<MetaInt>());
+    ASSERT_TRUE (tg.metadata(dst_eh).contains<MetaStr>());
+
+    EXPECT_EQ("hi", tg.metadata(dst_eh).get<MetaStr>().s);
+}
+
+TEST(PreservedMeta, TestMetaCopy_Partial_Src)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto src_nh = tg.createNode();
+    tg.metadata(src_nh).set(MetaInt{42});
+
+    auto dst_nh = tg.createNode();
+
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    // Here we specify all the meta types we know about the src node
+    // but the src node has just one of them.
+    // A valid situation, only MetaInt to be copied.
+    Preserved<ade::NodeHandle, MetaInt, MetaStr>(g, src_nh).copyTo(g, dst_nh);
+
+    ASSERT_TRUE (tg.metadata(dst_nh).contains<MetaInt>());
+    ASSERT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    EXPECT_EQ(42, tg.metadata(dst_nh).get<MetaInt>().x);
+}
+
+TEST(PreservedMeta, TestMetaCopy_Nothing)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto src_nh = tg.createNode();
+    auto dst_nh = tg.createNode();
+
+    EXPECT_FALSE(tg.metadata(src_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(src_nh).contains<MetaStr>());
+
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    // Here we specify all the meta types we know about the src node
+    // but the src node has none of those. See how it works now
+    Preserved<ade::NodeHandle, MetaInt, MetaStr>(g, src_nh).copyTo(g, dst_nh);
+
+    ASSERT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    ASSERT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+}
+
+TEST(PreservedMeta, DropEdge)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto nh1 = tg.createNode();
+    auto nh2 = tg.createNode();
+    auto eh  = tg.link(nh1, nh2);
+
+    tg.metadata(eh).set(MetaInt{42});
+    tg.metadata(eh).set(MetaStr{"hi"});
+
+    // Drop an edge using the transaction API
+    using Change = ChangeT<MetaInt, MetaStr>;
+    Change::List changes;
+    changes.enqueue<Change::DropLink>(g, nh1, eh);
+
+    EXPECT_EQ(0u,      nh1->outNodes().size());
+    EXPECT_EQ(nullptr, eh);
+
+    // Now restore the edge and check if it's meta was restored
+    changes.rollback(g);
+
+    ASSERT_EQ(1u,      nh1->outNodes().size());
+    eh = *nh1->outEdges().begin();
+
+    ASSERT_TRUE(tg.metadata(eh).contains<MetaInt>());
+    ASSERT_TRUE(tg.metadata(eh).contains<MetaStr>());
+
+    EXPECT_EQ(42,   tg.metadata(eh).get<MetaInt>().x);
+    EXPECT_EQ("hi", tg.metadata(eh).get<MetaStr>().s);
+}
+
 } // opencv_test
diff --git a/modules/gapi/test/own/conc_queue_tests.cpp b/modules/gapi/test/own/conc_queue_tests.cpp
index c3e6fd6e08..6e268f318c 100644
--- a/modules/gapi/test/own/conc_queue_tests.cpp
+++ b/modules/gapi/test/own/conc_queue_tests.cpp
@@ -55,7 +55,7 @@ TEST(ConcQueue, Clear)
     EXPECT_FALSE(q.try_pop(x));
 }
 
-// In this test, every writer thread produce its own range of integer
+// In this test, every writer thread produces its own range of integer
 // numbers, writing those to a shared queue.
 //
 // Every reader thread pops elements from the queue (until -1 is
@@ -64,12 +64,12 @@ TEST(ConcQueue, Clear)
 // Finally, the master thread waits for completion of all other
 // threads and verifies that all the necessary data is
 // produced/obtained.
+namespace
+{
 using StressParam = std::tuple<int           // Num writer threads
                               ,int           // Num elements per writer
                               ,int           // Num reader threads
                               ,std::size_t>; // Queue capacity
-namespace
-{
 constexpr int STOP_SIGN = -1;
 constexpr int BASE      = 1000;
 }
diff --git a/modules/gapi/test/own/gapi_types_tests.cpp b/modules/gapi/test/own/gapi_types_tests.cpp
index b40bb1df88..602a931de1 100644
--- a/modules/gapi/test/own/gapi_types_tests.cpp
+++ b/modules/gapi/test/own/gapi_types_tests.cpp
@@ -27,6 +27,22 @@ TEST(Point, CreateWithParams)
     EXPECT_EQ(2, p.y);
 }
 
+TEST(Point2f, CreateEmpty)
+{
+    cv::gapi::own::Point2f p;
+
+    EXPECT_EQ(0.f, p.x);
+    EXPECT_EQ(0.f, p.y);
+}
+
+TEST(Point2f, CreateWithParams)
+{
+    cv::gapi::own::Point2f p = {3.14f, 2.71f};
+
+    EXPECT_EQ(3.14f, p.x);
+    EXPECT_EQ(2.71f, p.y);
+}
+
 TEST(Rect, CreateEmpty)
 {
     cv::gapi::own::Rect r;
diff --git a/modules/gapi/test/own/last_written_value_tests.cpp b/modules/gapi/test/own/last_written_value_tests.cpp
new file mode 100644
index 0000000000..4bfb27f15f
--- /dev/null
+++ b/modules/gapi/test/own/last_written_value_tests.cpp
@@ -0,0 +1,156 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "../test_precomp.hpp"
+
+#include <unordered_set>
+#include <thread>
+
+#include "executor/last_value.hpp"
+
+namespace opencv_test {
+using namespace cv::gapi;
+
+TEST(LastValue, PushPop) {
+    own::last_written_value<int> v;
+    for (int i = 0; i < 100; i++) {
+        v.push(i);
+
+        int x = 1;
+        v.pop(x);
+        EXPECT_EQ(x, i);
+    }
+}
+
+TEST(LastValue, TryPop) {
+    own::last_written_value<int> v;
+    int x = 0;
+    EXPECT_FALSE(v.try_pop(x));
+
+    v.push(1);
+    EXPECT_TRUE(v.try_pop(x));
+    EXPECT_EQ(1, x);
+}
+
+TEST(LastValue, Clear) {
+    own::last_written_value<int> v;
+    v.push(42);
+    v.clear();
+
+    int x = 0;
+    EXPECT_FALSE(v.try_pop(x));
+}
+
+TEST(LastValue, Overwrite) {
+    own::last_written_value<int> v;
+    v.push(42);
+    v.push(0);
+
+    int x = -1;
+    EXPECT_TRUE(v.try_pop(x));
+    EXPECT_EQ(0, x);
+}
+
+// In this test, every writer thread produces its own range of integer
+// numbers, writing those to a shared queue.
+//
+// Every reader thread pops elements from the queue (until -1 is
+// reached) and stores those in its own associated set.
+//
+// Finally, the master thread waits for completion of all other
+// threads and verifies that all the necessary data is
+// produced/obtained.
+namespace {
+using StressParam = std::tuple<int   // Num writer threads
+                              ,int   // Num elements per writer
+                              ,int>; // Num reader threads
+constexpr int STOP_SIGN = -1;
+constexpr int BASE      = 1000;
+}
+struct LastValue_: public ::testing::TestWithParam<StressParam> {
+    using V = own::last_written_value<int>;
+    using S = std::unordered_set<int>;
+
+    static void writer(int base, int writes, V& v) {
+        for (int i = 0; i < writes; i++) {
+            if (i % 2) {
+                std::this_thread::sleep_for(std::chrono::milliseconds{1});
+            }
+            v.push(base + i);
+        }
+        v.push(STOP_SIGN);
+    }
+
+    static void reader(V& v, S& s) {
+        int x = 0;
+        while (true) {
+            v.pop(x);
+            if (x == STOP_SIGN) {
+                // If this thread was lucky enough to read this STOP_SIGN,
+                // push it back to v to make other possible readers able
+                // to read it again (note due to the last_written_value
+                // semantic, those STOP_SIGN could be simply lost i.e.
+                // overwritten.
+                v.push(STOP_SIGN);
+                return;
+            }
+            s.insert(x);
+        }
+    }
+};
+
+TEST_P(LastValue_, Test)
+{
+    int num_writers = 0;
+    int num_writes  = 0;
+    int num_readers = 0;
+    std::tie(num_writers, num_writes, num_readers) = GetParam();
+
+    CV_Assert(num_writers <   20);
+    CV_Assert(num_writes  < BASE);
+
+    V v;
+
+    // Start reader threads
+    std::vector<S> storage(num_readers);
+    std::vector<std::thread> readers;
+    for (S& s : storage) {
+        readers.emplace_back(reader, std::ref(v), std::ref(s));
+    }
+
+    // Start writer threads, also pre-generate reference numbers
+    S reference;
+    std::vector<std::thread> writers;
+    for (int w = 0; w < num_writers; w++) {
+        writers.emplace_back(writer, w*BASE, num_writes, std::ref(v));
+        for (int r = 0; r < num_writes; r++) {
+            reference.insert(w*BASE + r);
+        }
+    }
+
+    // Wait for completions
+    for (auto &t : readers) t.join();
+    for (auto &t : writers) t.join();
+
+    // Validate the result. Some values are read, and the values are
+    // correct (i.e. such values have been written)
+    std::size_t num_values_read = 0u;
+    for (const auto &s : storage) {
+        num_values_read += s.size();
+        for (auto &x : s) {
+            EXPECT_TRUE(reference.count(x) > 0);
+        }
+    }
+    // NOTE: Some combinations may end-up in 0 values read
+    // it is normal, the main thing is that the test shouldn't hang!
+    EXPECT_LE(0u, num_values_read);
+}
+
+INSTANTIATE_TEST_CASE_P(LastValueStress, LastValue_,
+                        Combine( Values(1, 2, 4, 8, 16)  // writers
+                               , Values(32, 96, 256)     // writes
+                               , Values(1, 2, 10)));     // readers
+} // namespace opencv_test
diff --git a/modules/gapi/test/render/ftp_render_test.cpp b/modules/gapi/test/render/ftp_render_test.cpp
index 5bdbb74e30..af9c5c6f13 100644
--- a/modules/gapi/test/render/ftp_render_test.cpp
+++ b/modules/gapi/test/render/ftp_render_test.cpp
@@ -13,7 +13,7 @@
 
 #include <opencv2/core/utils/configuration.private.hpp>
 
-#include "api/ft_render.hpp"
+#include "backends/render/ft_render.hpp"
 
 namespace opencv_test
 {
diff --git a/modules/gapi/test/render/gapi_render_tests_ocv.cpp b/modules/gapi/test/render/gapi_render_tests_ocv.cpp
index f727d977aa..88b5d88075 100644
--- a/modules/gapi/test/render/gapi_render_tests_ocv.cpp
+++ b/modules/gapi/test/render/gapi_render_tests_ocv.cpp
@@ -95,7 +95,6 @@ TEST_P(RenderNV12OCVTestFTexts, AccuracyTest)
                                 cv::compile_args(cv::gapi::wip::draw::freetype_font{
                                 "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc"
                                 })));
-
 }
 
 static std::wstring to_wstring(const char* bytes)
diff --git a/modules/gapi/test/rmat/rmat_test_common.hpp b/modules/gapi/test/rmat/rmat_test_common.hpp
index 47a744499e..5685d06253 100644
--- a/modules/gapi/test/rmat/rmat_test_common.hpp
+++ b/modules/gapi/test/rmat/rmat_test_common.hpp
@@ -19,14 +19,18 @@ public:
         : m_mat(m), m_callbackCalled(callbackCalled)
     {}
     virtual RMat::View access(RMat::Access access) override {
+        RMat::View::stepsT steps(m_mat.dims);
+        for (int i = 0; i < m_mat.dims; i++) {
+            steps[i] = m_mat.step[i];
+        }
         if (access == RMat::Access::W) {
-            return RMat::View(cv::descr_of(m_mat), m_mat.data, m_mat.step,
+            return RMat::View(cv::descr_of(m_mat), m_mat.data, steps,
                               [this](){
                                   EXPECT_FALSE(m_callbackCalled);
                                   m_callbackCalled = true;
                               });
         } else {
-            return RMat::View(cv::descr_of(m_mat), m_mat.data, m_mat.step);
+            return RMat::View(cv::descr_of(m_mat), m_mat.data, steps);
         }
     }
     virtual cv::GMatDesc desc() const override { return cv::descr_of(m_mat); }
@@ -42,8 +46,12 @@ public:
         : m_deviceMat(m), m_hostMat(m.clone()), m_callbackCalled(callbackCalled)
     {}
     virtual RMat::View access(RMat::Access access) override {
+        RMat::View::stepsT steps(m_hostMat.dims);
+        for (int i = 0; i < m_hostMat.dims; i++) {
+            steps[i] = m_hostMat.step[i];
+        }
         if (access == RMat::Access::W) {
-            return RMat::View(cv::descr_of(m_hostMat), m_hostMat.data, m_hostMat.step,
+            return RMat::View(cv::descr_of(m_hostMat), m_hostMat.data, steps,
                               [this](){
                                   EXPECT_FALSE(m_callbackCalled);
                                   m_callbackCalled = true;
@@ -51,7 +59,7 @@ public:
                               });
         } else {
             m_deviceMat.copyTo(m_hostMat);
-            return RMat::View(cv::descr_of(m_hostMat), m_hostMat.data, m_hostMat.step);
+            return RMat::View(cv::descr_of(m_hostMat), m_hostMat.data, steps);
         }
     }
     virtual cv::GMatDesc desc() const override { return cv::descr_of(m_hostMat); }
diff --git a/modules/gapi/test/rmat/rmat_tests.cpp b/modules/gapi/test/rmat/rmat_tests.cpp
index 9980925a3b..52c3806c5b 100644
--- a/modules/gapi/test/rmat/rmat_tests.cpp
+++ b/modules/gapi/test/rmat/rmat_tests.cpp
@@ -116,11 +116,11 @@ public:
 // we have some specific data hidden under RMat,
 // test that we can obtain it via RMat.as<T>() method
 TEST(RMat, UsageInBackend) {
-    int i = std::rand();
+    int i = 123456;
     auto rmat = cv::make_rmat<RMatAdapterForBackend>(i);
 
     auto adapter = rmat.get<RMatAdapterForBackend>();
-    EXPECT_NE(nullptr, adapter);
+    ASSERT_NE(nullptr, adapter);
     EXPECT_EQ(i, adapter->deviceSpecificData());
 }
 } // namespace opencv_test
diff --git a/modules/gapi/test/rmat/rmat_view_tests.cpp b/modules/gapi/test/rmat/rmat_view_tests.cpp
index abc251660b..14025231a7 100644
--- a/modules/gapi/test/rmat/rmat_view_tests.cpp
+++ b/modules/gapi/test/rmat/rmat_view_tests.cpp
@@ -15,6 +15,8 @@ namespace opencv_test
 using cv::GMatDesc;
 using View = cv::RMat::View;
 using cv::Mat;
+using cv::gimpl::asMat;
+using cv::gimpl::asView;
 using namespace ::testing;
 
 static void expect_eq_desc(const GMatDesc& desc, const View& view) {
@@ -22,7 +24,8 @@ static void expect_eq_desc(const GMatDesc& desc, const View& view) {
     EXPECT_EQ(desc.dims, view.dims());
     EXPECT_EQ(desc.size.width, view.cols());
     EXPECT_EQ(desc.size.height, view.rows());
-    EXPECT_EQ(CV_MAKE_TYPE(desc.depth,desc.chan), view.type());
+    EXPECT_EQ(desc.depth, view.depth());
+    EXPECT_EQ(desc.chan, view.chan());
     EXPECT_EQ(desc.depth, view.depth());
     EXPECT_EQ(desc.chan, view.chan());
 }
@@ -40,10 +43,10 @@ TEST_P(RMatViewTest, ConstructionFromMat) {
     auto type = GetParam();
     Mat mat(8,8,type);
     const auto desc = cv::descr_of(mat);
-    View view(cv::descr_of(mat), mat.ptr(), mat.step1());
+    View view = asView(mat);
     expect_eq_desc(desc, view);
     EXPECT_EQ(mat.ptr(), view.ptr());
-    EXPECT_EQ(mat.step1(), view.step());
+    EXPECT_EQ(mat.step, view.step());
 }
 
 TEST(RMatView, TestConstructionFromMatND) {
@@ -66,16 +69,98 @@ TEST_P(RMatViewTest, DefaultStep) {
     EXPECT_EQ(static_cast<size_t>(desc.size.width)*CV_ELEM_SIZE(type), view.step());
 }
 
-static Mat asMat(View& view) {
-    return Mat(view.size(), view.type(), view.ptr(), view.step());
+struct RMatViewNDTest : public TestWithParam<
+    std::tuple<int /*depth*/, int /*ndims*/>>{};
+TEST_P(RMatViewNDTest, DefaultStep) {
+    int depth = 0, ndims = 0;
+    std::tie(depth, ndims) = GetParam();
+    std::vector<int> dims(ndims, 12);
+    GMatDesc desc;
+    desc.dims = dims;
+    desc.depth = depth;
+    GAPI_Assert(desc.chan == -1);
+    auto elemSize = CV_ELEM_SIZE(depth);
+    auto total = std::accumulate(dims.begin(), dims.end(), elemSize, std::multiplies<int>());
+    std::vector<unsigned char> data(total);
+    View view(desc, data.data());
+    auto step = static_cast<size_t>(total/dims[0]);
+    EXPECT_EQ(step, view.step(0));
+    for (int i = 1; i < ndims; i++) {
+        step /= dims[i];
+        EXPECT_EQ(step, view.step(i));
+    }
 }
 
+TEST_P(RMatViewNDTest, StepFromMat) {
+    int depth = 0, ndims = 0;
+    std::tie(depth, ndims) = GetParam();
+    std::vector<int> dims(ndims, 12);
+    cv::Mat mat(dims, depth);
+    auto view = asView(mat);
+    EXPECT_EQ(mat.ptr(), view.ptr());
+    for (int i = 0; i < ndims; i++) {
+        EXPECT_EQ(mat.step[i], view.step(i));
+    }
+}
+
+TEST_P(RMatViewNDTest, StepFromView) {
+    int depth = 0, ndims = 0;
+    std::tie(depth, ndims) = GetParam();
+    std::vector<int> dims(ndims, 12);
+    std::vector<int> aligned(ndims, 16);
+    GMatDesc desc;
+    desc.dims = dims;
+    desc.depth = depth;
+    GAPI_Assert(desc.chan == -1);
+    auto elemSize = CV_ELEM_SIZE(depth);
+    auto total = std::accumulate(aligned.begin(), aligned.end(), elemSize, std::multiplies<int>());
+    std::vector<unsigned char> data(total);
+    View::stepsT steps(ndims);
+    auto step = static_cast<size_t>(total/aligned[0]);
+    steps[0] = step;
+    for (int i = 1; i < ndims; i++) {
+        step /= aligned[i];
+        steps[i] = step;
+    }
+    View view(desc, data.data(), steps);
+    auto mat = asMat(view);
+    EXPECT_EQ(mat.ptr(), view.ptr());
+    for (int i = 0; i < ndims; i++) {
+        EXPECT_EQ(mat.step[i], view.step(i));
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Test, RMatViewNDTest,
+                        Combine(Values(CV_8U, CV_32F), // depth
+                                Values(1,2,3,4,7)));   // ndims
+
+struct RMatViewNDTestNegative : public TestWithParam<
+    std::tuple<int /*depth*/, int /*chan*/, int /*ndims*/>>{};
+TEST_P(RMatViewNDTestNegative, DefaultStep) {
+    int depth = 0, chan = 0, ndims = 0;
+    std::tie(depth, chan, ndims) = GetParam();
+    std::vector<int> dims(ndims, 12);
+    GMatDesc desc;
+    desc.dims = dims;
+    desc.depth = depth;
+    desc.chan = chan;
+    auto elemSize = CV_ELEM_SIZE(depth);
+    auto total = std::accumulate(dims.begin(), dims.end(), elemSize, std::multiplies<int>());
+    std::vector<unsigned char> data(total);
+    EXPECT_ANY_THROW(View view(desc, data.data()));
+}
+
+INSTANTIATE_TEST_CASE_P(Test, RMatViewNDTestNegative,
+                        Combine(Values(CV_8U, CV_32F), // depth
+                                Values(1,2,3,4),       // chan
+                                Values(2,4,7)));       // ndims
+
 TEST_P(RMatViewTest, NonDefaultStepInput) {
     auto type = GetParam();
     Mat bigMat(16,16,type);
     cv::randn(bigMat, cv::Scalar::all(127), cv::Scalar::all(40));
     Mat mat = bigMat(cv::Rect{4,4,8,8});
-    View view(cv::descr_of(mat), mat.data, mat.step);
+    View view = asView(mat);
     const auto viewMat = asMat(view);
     Mat ref, out;
     cv::Size ksize{1,1};
@@ -90,7 +175,36 @@ TEST_P(RMatViewTest, NonDefaultStepOutput) {
     cv::randn(mat, cv::Scalar::all(127), cv::Scalar::all(40));
     Mat bigMat = Mat::zeros(16,16,type);
     Mat out = bigMat(cv::Rect{4,4,8,8});
-    View view(cv::descr_of(out), out.ptr(), out.step);
+    View view = asView(out);
+    auto viewMat = asMat(view);
+    Mat ref;
+    cv::Size ksize{1,1};
+    cv::blur(mat, viewMat, ksize);
+    cv::blur(mat, ref,     ksize);
+    EXPECT_EQ(0, cvtest::norm(ref, out, NORM_INF));
+}
+
+TEST_P(RMatViewTest, NonDefaultStep2DInput) {
+    auto type = GetParam();
+    Mat bigMat(16,16,type);
+    cv::randn(bigMat, cv::Scalar::all(127), cv::Scalar::all(40));
+    Mat mat = bigMat(cv::Rect{4,4,8,8});
+    View view(cv::descr_of(mat), mat.data, mat.step);
+    const auto viewMat = asMat(view);
+    Mat ref, out;
+    cv::Size ksize{1,1};
+    cv::blur(viewMat, out, ksize);
+    cv::blur(    mat, ref, ksize);
+    EXPECT_EQ(0, cvtest::norm(ref, out, NORM_INF));
+}
+
+TEST_P(RMatViewTest, NonDefaultStep2DOutput) {
+    auto type = GetParam();
+    Mat mat(8,8,type);
+    cv::randn(mat, cv::Scalar::all(127), cv::Scalar::all(40));
+    Mat bigMat = Mat::zeros(16,16,type);
+    Mat out = bigMat(cv::Rect{4,4,8,8});
+    View view(cv::descr_of(out), out.data, out.step);
     auto viewMat = asMat(view);
     Mat ref;
     cv::Size ksize{1,1};
@@ -107,7 +221,7 @@ struct RMatViewCallbackTest : public ::testing::Test {
         : mat(8,8,CV_8UC1) {
         cv::randn(mat, cv::Scalar::all(127), cv::Scalar::all(40));
     }
-    View getView() { return {cv::descr_of(mat), mat.ptr(), mat.step1(), [this](){ callbackCalls++; }}; }
+    View getView() { return asView(mat, [this](){ callbackCalls++; }); }
     int callbackCalls = 0;
     Mat mat;
 };
diff --git a/modules/gapi/test/s11n/gapi_s11n_tests.cpp b/modules/gapi/test/s11n/gapi_s11n_tests.cpp
index 10fe586188..16f19846ed 100644
--- a/modules/gapi/test/s11n/gapi_s11n_tests.cpp
+++ b/modules/gapi/test/s11n/gapi_s11n_tests.cpp
@@ -1,39 +1,155 @@
 #include "../test_precomp.hpp"
 
 #include "backends/common/serialization.hpp"
+#include <opencv2/gapi/rmat.hpp>
+#include <../src/backends/common/gbackend.hpp> // asView
 
 namespace {
-    struct MyCustomType {
-        int val;
-        std::string name;
-        std::vector<float> vec;
-        std::map<int, uint64_t> mmap;
-        bool operator==(const MyCustomType& other) const {
-            return val == other.val && name == other.name &&
-                   vec == other.vec && mmap == other.mmap;
-        }
-    };
-}
+struct EmptyCustomType { };
+
+struct SimpleCustomType {
+    bool val;
+    bool operator==(const SimpleCustomType& other) const {
+        return val == other.val;
+    }
+};
+
+struct SimpleCustomType2 {
+    int id;
+    bool operator==(const SimpleCustomType2& other) const {
+        return id == other.id;
+    }
+};
+
+struct MyCustomType {
+    int val;
+    std::string name;
+    std::vector<float> vec;
+    std::map<int, uint64_t> mmap;
+    bool operator==(const MyCustomType& other) const {
+        return val == other.val && name == other.name &&
+                vec == other.vec && mmap == other.mmap;
+    }
+};
+
+struct MyCustomTypeNoS11N {
+    char sym;
+    int id;
+    std::string name;
+
+    bool operator==(const MyCustomTypeNoS11N& other) const {
+        return sym == other.sym && id == other.id &&
+                name == other.name;
+    }
+};
+} // anonymous namespace
 
 namespace cv {
 namespace gapi {
 namespace s11n {
 namespace detail {
-    template<> struct S11N<MyCustomType> {
-        static void serialize(IOStream &os, const MyCustomType &p) {
-            os << p.val << p.name << p.vec << p.mmap;
-        }
-        static MyCustomType deserialize(IIStream &is) {
-            MyCustomType p;
-            is >> p.val >> p.name >> p.vec >> p.mmap;
-            return p;
-        }
-    };
+template<> struct S11N<EmptyCustomType> {
+    static void serialize(IOStream &, const EmptyCustomType &) { }
+    static EmptyCustomType deserialize(IIStream &) { return EmptyCustomType { }; }
+};
+
+template<> struct S11N<SimpleCustomType> {
+    static void serialize(IOStream &os, const SimpleCustomType &p) {
+        os << p.val;
+    }
+    static SimpleCustomType deserialize(IIStream &is) {
+        SimpleCustomType p;
+        is >> p.val;
+        return p;
+    }
+};
+
+template<> struct S11N<SimpleCustomType2> {
+    static void serialize(IOStream &os, const SimpleCustomType2 &p) {
+        os << p.id;
+    }
+    static SimpleCustomType2 deserialize(IIStream &is) {
+        SimpleCustomType2 p;
+        is >> p.id;
+        return p;
+    }
+};
+
+template<> struct S11N<MyCustomType> {
+    static void serialize(IOStream &os, const MyCustomType &p) {
+        os << p.val << p.name << p.vec << p.mmap;
+    }
+    static MyCustomType deserialize(IIStream &is) {
+        MyCustomType p;
+        is >> p.val >> p.name >> p.vec >> p.mmap;
+        return p;
+    }
+};
 } // namespace detail
 } // namespace s11n
 } // namespace gapi
 } // namespace cv
 
+
+namespace cv {
+namespace detail {
+template<> struct CompileArgTag<EmptyCustomType> {
+    static const char* tag() {
+        return "org.opencv.test.empty_custom_type";
+    }
+};
+
+template<> struct CompileArgTag<SimpleCustomType> {
+    static const char* tag() {
+        return "org.opencv.test.simple_custom_type";
+    }
+};
+
+template<> struct CompileArgTag<SimpleCustomType2> {
+    static const char* tag() {
+        return "org.opencv.test.simple_custom_type_2";
+    }
+};
+
+template<> struct CompileArgTag<MyCustomType> {
+    static const char* tag() {
+        return "org.opencv.test.my_custom_type";
+    }
+};
+
+template<> struct CompileArgTag<MyCustomTypeNoS11N> {
+    static const char* tag() {
+        return "org.opencv.test.my_custom_type_no_s11n";
+    }
+};
+} // namespace detail
+} // namespace cv
+
+namespace {
+class MyRMatAdapter : public cv::RMat::Adapter {
+    cv::Mat m_mat;
+    int m_value;
+    std::string m_str;
+public:
+    MyRMatAdapter() = default;
+    MyRMatAdapter(cv::Mat m, int value, const std::string& str)
+        : m_mat(m), m_value(value), m_str(str)
+    {}
+    virtual cv::RMat::View access(cv::RMat::Access) override {
+        return cv::gimpl::asView(m_mat);
+    }
+    virtual cv::GMatDesc desc() const override { return cv::descr_of(m_mat); }
+    virtual void serialize(cv::gapi::s11n::IOStream& os) override {
+        os << m_value << m_str;
+    }
+    virtual void deserialize(cv::gapi::s11n::IIStream& is) override {
+        is >> m_value >> m_str;
+    }
+    int getVal() { return m_value; }
+    std::string getStr() { return m_str; }
+};
+}
+
 namespace opencv_test {
 
 struct S11N_Basic: public ::testing::Test {
@@ -246,6 +362,12 @@ TEST_F(S11N_Basic, Test_MatDesc) {
     EXPECT_EQ(v, get<cv::GMatDesc>());
 }
 
+TEST_F(S11N_Basic, Test_MatDescND) {
+    cv::GMatDesc v = { CV_8U, {1,1,224,224} };
+    put(v);
+    EXPECT_EQ(v, get<cv::GMatDesc>());
+}
+
 TEST_F(S11N_Basic, Test_MetaArg_MatDesc) {
     cv::GMatDesc desc = { CV_8U, 1,{ 320,240 } };
     auto v = cv::GMetaArg{ desc };
@@ -449,6 +571,39 @@ TEST_F(S11N_Basic, Test_Bind_RunArgs_MatScalar) {
     }
 }
 
+TEST_F(S11N_Basic, Test_RunArg_RMat) {
+    cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3);
+    cv::RMat rmat = cv::make_rmat<MyRMatAdapter>(mat, 42, "It actually works");
+    auto v = cv::GRunArgs{ cv::GRunArg{ rmat } };
+
+    const std::vector<char> sargsin = cv::gapi::serialize(v);
+    cv::GRunArgs out = cv::gapi::deserialize<cv::GRunArgs, MyRMatAdapter>(sargsin);
+    cv::RMat out_mat = cv::util::get<cv::RMat>(out[0]);
+    auto adapter = out_mat.get<MyRMatAdapter>();
+    EXPECT_EQ(42, adapter->getVal());
+    EXPECT_EQ("It actually works", adapter->getStr());
+}
+
+TEST_F(S11N_Basic, Test_RunArg_RMat_Scalar_Mat) {
+    cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3);
+    cv::RMat rmat = cv::make_rmat<MyRMatAdapter>(mat, 42, "It actually works");
+    cv::Scalar sc(111);
+    auto v = cv::GRunArgs{ cv::GRunArg{ rmat }, cv::GRunArg{ sc }, cv::GRunArg{ mat } };
+
+    const std::vector<char> sargsin = cv::gapi::serialize(v);
+    cv::GRunArgs out = cv::gapi::deserialize<cv::GRunArgs, MyRMatAdapter>(sargsin);
+    cv::RMat out_rmat = cv::util::get<cv::RMat>(out[0]);
+    auto adapter = out_rmat.get<MyRMatAdapter>();
+    EXPECT_EQ(42, adapter->getVal());
+    EXPECT_EQ("It actually works", adapter->getStr());
+
+    cv::Scalar out_sc = cv::util::get<cv::Scalar>(out[1]);
+    EXPECT_EQ(sc, out_sc);
+
+    cv::Mat out_mat = cv::util::get<cv::Mat>(out[2]);
+    EXPECT_EQ(0, cv::norm(mat, out_mat));
+}
+
 namespace {
     template <cv::detail::OpaqueKind K, typename T>
     bool verifyOpaqueKind(T&& in) {
@@ -511,4 +666,162 @@ TEST_F(S11N_Basic, Test_Custom_Type) {
     MyCustomType new_var = cv::gapi::s11n::detail::S11N<MyCustomType>::deserialize(is);
     EXPECT_EQ(var, new_var);
 }
+
+TEST_F(S11N_Basic, Test_CompileArg) {
+    MyCustomType customVar{1248, "World", {1280, 720, 640, 480}, {{5, 32434142342}, {7, 34242432}}};
+
+    std::vector<char> sArgs = cv::gapi::serialize(cv::compile_args(customVar));
+
+    GCompileArgs dArgs = cv::gapi::deserialize<GCompileArgs, MyCustomType>(sArgs);
+
+    MyCustomType dCustomVar = cv::gapi::getCompileArg<MyCustomType>(dArgs).value();
+    EXPECT_EQ(customVar, dCustomVar);
+}
+
+TEST_F(S11N_Basic, Test_CompileArg_Without_UserCallback) {
+    SimpleCustomType   customVar1 { false };
+    MyCustomTypeNoS11N customVar2 { 'z', 189, "Name" };
+    MyCustomType       customVar3 { 1248, "World", {1280, 720, 640, 480},
+                                    {{5, 32434142342}, {7, 34242432}} };
+
+    EXPECT_NO_THROW(cv::gapi::serialize(cv::compile_args(customVar1, customVar2, customVar3)));
+
+    std::vector<char> sArgs = cv::gapi::serialize(
+        cv::compile_args(customVar1, customVar2, customVar3));
+
+    GCompileArgs dArgs = cv::gapi::deserialize<GCompileArgs,
+                                               SimpleCustomType,
+                                               MyCustomType>(sArgs);
+
+    SimpleCustomType dCustomVar1 = cv::gapi::getCompileArg<SimpleCustomType>(dArgs).value();
+    MyCustomType     dCustomVar3 = cv::gapi::getCompileArg<MyCustomType>(dArgs).value();
+
+    EXPECT_EQ(customVar1, dCustomVar1);
+    EXPECT_EQ(customVar3, dCustomVar3);
+}
+
+TEST_F(S11N_Basic, Test_Deserialize_Only_Requested_CompileArgs) {
+    MyCustomType     myCustomVar { 1248, "World", {1280, 720, 640, 480},
+                                   {{5, 32434142342}, {7, 34242432}} };
+    SimpleCustomType simpleCustomVar { false };
+
+    std::vector<char> sArgs = cv::gapi::serialize(cv::compile_args(myCustomVar, simpleCustomVar));
+
+    GCompileArgs dArgs = cv::gapi::deserialize<GCompileArgs, MyCustomType>(sArgs);
+    EXPECT_EQ(1u, dArgs.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgs).value());
+
+    dArgs.clear();
+    dArgs = cv::gapi::deserialize<GCompileArgs, SimpleCustomType>(sArgs);
+    EXPECT_EQ(1u, dArgs.size());
+    EXPECT_EQ(simpleCustomVar, cv::gapi::getCompileArg<SimpleCustomType>(dArgs).value());
+
+    dArgs.clear();
+    dArgs = cv::gapi::deserialize<GCompileArgs, SimpleCustomType2>(sArgs);
+    EXPECT_EQ(0u, dArgs.size());
+
+    dArgs.clear();
+    dArgs = cv::gapi::deserialize<GCompileArgs, MyCustomType, SimpleCustomType>(sArgs);
+    EXPECT_EQ(2u, dArgs.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgs).value());
+    EXPECT_EQ(simpleCustomVar, cv::gapi::getCompileArg<SimpleCustomType>(dArgs).value());
+
+    SimpleCustomType2 simpleCustomVar2 { 5 };
+    std::vector<char> sArgs2 = cv::gapi::serialize(
+        cv::compile_args(myCustomVar, simpleCustomVar, simpleCustomVar2));
+    GCompileArgs dArgs2 = cv::gapi::deserialize<GCompileArgs,
+                                                MyCustomType,
+                                                SimpleCustomType2>(sArgs2);
+    EXPECT_EQ(2u, dArgs2.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgs2).value());
+    EXPECT_EQ(simpleCustomVar2, cv::gapi::getCompileArg<SimpleCustomType2>(dArgs2).value());
+}
+
+TEST_F(S11N_Basic, Test_Deserialize_CompileArgs_RandomOrder) {
+    SimpleCustomType  simpleCustomVar { false };
+    SimpleCustomType2 simpleCustomVar2 { 5 };
+
+    std::vector<char> sArgs = cv::gapi::serialize(
+        cv::compile_args(simpleCustomVar, simpleCustomVar2));
+    GCompileArgs dArgs = cv::gapi::deserialize<GCompileArgs,
+                                               // Here, types of passed to serialize() arguments
+                                               // are enumerated in reverse order
+                                               SimpleCustomType2,
+                                               SimpleCustomType>(sArgs);
+
+    EXPECT_EQ(simpleCustomVar, cv::gapi::getCompileArg<SimpleCustomType>(dArgs).value());
+    EXPECT_EQ(simpleCustomVar2, cv::gapi::getCompileArg<SimpleCustomType2>(dArgs).value());
+}
+
+TEST_F(S11N_Basic, Test_CompileArgs_With_EmptyCompileArg) {
+    MyCustomType      myCustomVar { 1248, "World", {1280, 720, 640, 480},
+                                    {{5, 32434142342}, {7, 34242432}} };
+    SimpleCustomType  simpleCustomVar { false };
+    EmptyCustomType   emptyCustomVar {  };
+
+    //----{ emptyCustomVar, myCustomVar }----
+    std::vector<char> sArgs1 = cv::gapi::serialize(cv::compile_args(emptyCustomVar, myCustomVar));
+    GCompileArgs dArgsEmptyVar1 = cv::gapi::deserialize<GCompileArgs, EmptyCustomType>(sArgs1);
+    GCompileArgs dArgsMyVar1 = cv::gapi::deserialize<GCompileArgs, MyCustomType>(sArgs1);
+    GCompileArgs dArgsEmptyAndMyVars1 = cv::gapi::deserialize<GCompileArgs,
+                                                              EmptyCustomType,
+                                                              MyCustomType>(sArgs1);
+    EXPECT_EQ(1u, dArgsEmptyVar1.size());
+    EXPECT_TRUE(cv::gapi::getCompileArg<EmptyCustomType>(dArgsEmptyVar1).has_value());
+    EXPECT_EQ(1u, dArgsMyVar1.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgsMyVar1).value());
+    EXPECT_EQ(2u, dArgsEmptyAndMyVars1.size());
+    EXPECT_TRUE(cv::gapi::getCompileArg<EmptyCustomType>(dArgsEmptyAndMyVars1).has_value());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgsEmptyAndMyVars1).value());
+
+    //----{ myCustomVar, emptyCustomVar }----
+    std::vector<char> sArgs2 = cv::gapi::serialize(cv::compile_args(myCustomVar, emptyCustomVar));
+    GCompileArgs dArgsMyVar2 = cv::gapi::deserialize<GCompileArgs, MyCustomType>(sArgs2);
+    GCompileArgs dArgsEmptyVar2 = cv::gapi::deserialize<GCompileArgs, EmptyCustomType>(sArgs2);
+    GCompileArgs dArgsMyAndEmptyVars2 = cv::gapi::deserialize<GCompileArgs,
+                                                              MyCustomType,
+                                                              EmptyCustomType>(sArgs2);
+    EXPECT_EQ(1u, dArgsMyVar2.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgsMyVar2).value());
+    EXPECT_EQ(1u, dArgsEmptyVar2.size());
+    EXPECT_TRUE(cv::gapi::getCompileArg<EmptyCustomType>(dArgsEmptyVar2).has_value());
+    EXPECT_EQ(2u, dArgsMyAndEmptyVars2.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgsMyAndEmptyVars2).value());
+    EXPECT_TRUE(cv::gapi::getCompileArg<EmptyCustomType>(dArgsMyAndEmptyVars2).has_value());
+
+    //----{ myCustomVar, emptyCustomVar, simpleCustomVar }----
+    std::vector<char> sArgs3 = cv::gapi::serialize(
+        cv::compile_args(myCustomVar, emptyCustomVar, simpleCustomVar));
+    GCompileArgs dArgsMyVar3 = cv::gapi::deserialize<GCompileArgs, MyCustomType>(sArgs3);
+    GCompileArgs dArgsEmptyVar3 = cv::gapi::deserialize<GCompileArgs, EmptyCustomType>(sArgs3);
+    GCompileArgs dArgsSimpleVar3 = cv::gapi::deserialize<GCompileArgs, SimpleCustomType>(sArgs3);
+    GCompileArgs dArgsMyAndSimpleVars3 = cv::gapi::deserialize<GCompileArgs,
+                                                               MyCustomType,
+                                                               SimpleCustomType>(sArgs3);
+    GCompileArgs dArgs3 = cv::gapi::deserialize<GCompileArgs,
+                                                MyCustomType,
+                                                EmptyCustomType,
+                                                SimpleCustomType>(sArgs3);
+    EXPECT_EQ(1u, dArgsMyVar3.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgsMyVar3).value());
+    EXPECT_EQ(1u, dArgsEmptyVar3.size());
+    EXPECT_TRUE(cv::gapi::getCompileArg<EmptyCustomType>(dArgsEmptyVar3).has_value());
+    EXPECT_EQ(1u, dArgsSimpleVar3.size());
+    EXPECT_EQ(simpleCustomVar, cv::gapi::getCompileArg<SimpleCustomType>(dArgsSimpleVar3).value());
+    EXPECT_EQ(2u, dArgsMyAndSimpleVars3.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgsMyAndSimpleVars3).value());
+    EXPECT_EQ(simpleCustomVar,
+              cv::gapi::getCompileArg<SimpleCustomType>(dArgsMyAndSimpleVars3).value());
+    EXPECT_EQ(3u, dArgs3.size());
+    EXPECT_EQ(myCustomVar, cv::gapi::getCompileArg<MyCustomType>(dArgs3).value());
+    EXPECT_TRUE(cv::gapi::getCompileArg<EmptyCustomType>(dArgs3).has_value());
+    EXPECT_EQ(simpleCustomVar, cv::gapi::getCompileArg<SimpleCustomType>(dArgs3).value());
+
+    //----{ emptyCustomVar }----
+    std::vector<char> sArgs4 = cv::gapi::serialize(cv::compile_args(emptyCustomVar));
+    GCompileArgs dArgsEmptyVar4 = cv::gapi::deserialize<GCompileArgs, EmptyCustomType>(sArgs4);
+    EXPECT_EQ(1u, dArgsEmptyVar4.size());
+    EXPECT_TRUE(cv::gapi::getCompileArg<EmptyCustomType>(dArgsEmptyVar4).has_value());
+}
+
 } // namespace opencv_test
diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp
index 1150e6a862..8370aee262 100644
--- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp
+++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp
@@ -2,11 +2,13 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2019-2020 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 
+#include <thread> // sleep_for (Delay)
+
 #include <opencv2/gapi/cpu/core.hpp>
 #include <opencv2/gapi/cpu/imgproc.hpp>
 
@@ -18,6 +20,7 @@
 #include <opencv2/gapi/ocl/imgproc.hpp>
 
 #include <opencv2/gapi/streaming/cap.hpp>
+#include <opencv2/gapi/streaming/desync.hpp>
 
 namespace opencv_test
 {
@@ -31,9 +34,9 @@ void initTestDataPath()
     {
         // Since G-API has no own test data (yet), it is taken from the common space
         const char* testDataPath = getenv("OPENCV_TEST_DATA_PATH");
-        GAPI_Assert(testDataPath != nullptr);
-
-        cvtest::addDataSearchPath(testDataPath);
+        if (testDataPath) {
+            cvtest::addDataSearchPath(testDataPath);
+        }
         initialized = true;
     }
 #endif // WINRT
@@ -100,6 +103,16 @@ struct GAPI_Streaming: public ::testing::TestWithParam<KernelPackage> {
     }
 };
 
+G_API_OP(Delay, <cv::GMat(cv::GMat, int)>, "org.opencv.test.delay") {
+    static cv::GMatDesc outMeta(const cv::GMatDesc &in, int) { return in; }
+};
+GAPI_OCV_KERNEL(OCVDelay, Delay) {
+    static void run(const cv::Mat &in, int ms, cv::Mat &out) {
+        std::this_thread::sleep_for(std::chrono::milliseconds{ms});
+        in.copyTo(out);
+    }
+};
+
 } // anonymous namespace
 
 TEST_P(GAPI_Streaming, SmokeTest_ConstInput_GMat)
@@ -189,8 +202,12 @@ TEST_P(GAPI_Streaming, SmokeTest_VideoInput_GMat)
     EXPECT_TRUE(ccomp);
     EXPECT_FALSE(ccomp.running());
 
-    ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi")));
-
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     ccomp.start();
     EXPECT_TRUE(ccomp.running());
 
@@ -259,8 +276,13 @@ TEST_P(GAPI_Streaming, SmokeTest_StartRestart)
     EXPECT_FALSE(ccomp.running());
 
     // Run 1
+    auto path = findDataFile("cv/video/768x576.avi");
     std::size_t num_frames1 = 0u;
-    ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi")));
+    try {
+        ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     ccomp.start();
     EXPECT_TRUE(ccomp.running());
 
@@ -271,7 +293,11 @@ TEST_P(GAPI_Streaming, SmokeTest_StartRestart)
 
     // Run 2
     std::size_t num_frames2 = 0u;
-    ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi")));
+    try {
+        ccomp.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     ccomp.start();
     EXPECT_TRUE(ccomp.running());
     while (ccomp.pull(cv::gout(out1, out2))) num_frames2++;
@@ -293,7 +319,12 @@ TEST_P(GAPI_Streaming, SmokeTest_VideoConstSource_NoHang)
     }).compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size{768,576}},
                         cv::compile_args(cv::gapi::use_only{getKernelPackage()}));
 
-    refc.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi")));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        refc.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     refc.start();
     std::size_t ref_frames = 0u;
     cv::Mat tmp;
@@ -312,7 +343,7 @@ TEST_P(GAPI_Streaming, SmokeTest_VideoConstSource_NoHang)
 
     cv::Mat in_const = cv::Mat::eye(cv::Size(256,256), CV_8UC3);
     testc.setSource(cv::gin(in_const,
-                            gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi"))));
+                            gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
     testc.start();
     std::size_t test_frames = 0u;
     while (testc.pull(cv::gout(tmp))) test_frames++;
@@ -335,8 +366,12 @@ TEST_P(GAPI_Streaming, SmokeTest_AutoMeta)
     cv::Mat tmp;
 
     // Test with one video source
-    auto in_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi"));
-    testc.setSource(cv::gin(in_const, in_src));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        testc.setSource(cv::gin(in_const, gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     testc.start();
 
     std::size_t test_frames = 0u;
@@ -344,8 +379,12 @@ TEST_P(GAPI_Streaming, SmokeTest_AutoMeta)
     EXPECT_EQ(100u, test_frames);
 
     // Now test with another one
-    in_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/1920x1080.avi"));
-    testc.setSource(cv::gin(in_const, in_src));
+    path = findDataFile("cv/video/1920x1080.avi");
+    try {
+        testc.setSource(cv::gin(in_const, gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     testc.start();
 
     test_frames = 0u;
@@ -398,8 +437,12 @@ TEST_P(GAPI_Streaming, SmokeTest_AutoMeta_VideoScalar)
 
     cv::Mat tmp;
     // Test with one video source and scalar
-    auto in_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi"));
-    testc.setSource(cv::gin(in_src, cv::Scalar{1.25}));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        testc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path), cv::Scalar{1.25}));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     testc.start();
 
     std::size_t test_frames = 0u;
@@ -407,8 +450,12 @@ TEST_P(GAPI_Streaming, SmokeTest_AutoMeta_VideoScalar)
     EXPECT_EQ(100u, test_frames);
 
     // Now test with another one video source and scalar
-    in_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/1920x1080.avi"));
-    testc.setSource(cv::gin(in_src, cv::Scalar{0.75}));
+    path = findDataFile("cv/video/1920x1080.avi");
+    try {
+        testc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path), cv::Scalar{0.75}));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     testc.start();
 
     test_frames = 0u;
@@ -503,9 +550,13 @@ TEST_P(GAPI_Streaming, SmokeTest_AutoMeta_VideoArray)
 
     cv::Mat tmp;
     // Test with one video source and vector
-    auto in_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi"));
+    auto path = findDataFile("cv/video/768x576.avi");
     std::vector<int> first_in_vec(768*3, 1);
-    testc.setSource(cv::gin(in_src, first_in_vec));
+    try {
+        testc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path), first_in_vec));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     testc.start();
 
     std::size_t test_frames = 0u;
@@ -513,9 +564,13 @@ TEST_P(GAPI_Streaming, SmokeTest_AutoMeta_VideoArray)
     EXPECT_EQ(100u, test_frames);
 
     // Now test with another one
-    in_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/1920x1080.avi"));
+    path = findDataFile("cv/video/1920x1080.avi");
     std::vector<int> second_in_vec(1920*3, 1);
-    testc.setSource(cv::gin(in_src, second_in_vec));
+    try {
+        testc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path), second_in_vec));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     testc.start();
 
     test_frames = 0u;
@@ -634,8 +689,13 @@ TEST(GAPI_Streaming_Types, XChangeScalar)
 
     // Compile streaming pipeline
     auto sc = c.compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size{768,576}},
-                                 cv::compile_args(cv::gapi::use_only{kernels}));
-    sc.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi")));
+                                cv::compile_args(cv::gapi::use_only{kernels}));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     sc.start();
 
     cv::Mat in_frame;
@@ -695,8 +755,13 @@ TEST(GAPI_Streaming_Types, XChangeVector)
     auto sc = c.compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size{768,576}},
                                  cv::GMatDesc{CV_8U,3,cv::Size{576,576}},
                                  cv::compile_args(cv::gapi::use_only{kernels}));
-    sc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi")),
-                         in_eye));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path),
+                             in_eye));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     sc.start();
 
     cv::Mat in_frame;
@@ -724,8 +789,13 @@ TEST(GAPI_Streaming_Types, OutputScalar)
     auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out))
         .compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size{768,576}});
 
-    const auto video_path = findDataFile("cv/video/768x576.avi");
-    sc.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(video_path));
+    std::string video_path;
+    video_path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(video_path));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     sc.start();
 
     cv::VideoCapture cap;
@@ -770,8 +840,13 @@ TEST(GAPI_Streaming_Types, OutputVector)
     };
 
     cv::Mat in_eye = cv::Mat::eye(cv::Size(256, 256), CV_8UC3);
-    const auto video_path = findDataFile("cv/video/768x576.avi");
-    sc.setSource(cv::gin(in_eye, gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(video_path)));
+    std::string video_path;
+    video_path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(cv::gin(in_eye, gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(video_path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
     sc.start();
 
     cv::VideoCapture cap;
@@ -794,6 +869,104 @@ TEST(GAPI_Streaming_Types, OutputVector)
     EXPECT_LT(0u, num_frames);
 }
 
+G_API_OP(DimsChans,
+         <std::tuple<cv::GArray<int>, cv::GOpaque<int>>(cv::GMat)>,
+         "test.streaming.dims_chans") {
+    static std::tuple<cv::GArrayDesc, cv::GOpaqueDesc> outMeta(const cv::GMatDesc &) {
+        return std::make_tuple(cv::empty_array_desc(),
+                               cv::empty_gopaque_desc());
+    }
+};
+
+GAPI_OCV_KERNEL(OCVDimsChans, DimsChans) {
+    static void run(const cv::Mat &in, std::vector<int> &ov, int &oi) {
+        ov = {in.cols, in.rows};
+        oi = in.channels();
+    }
+};
+
+struct GAPI_Streaming_TemplateTypes: ::testing::Test {
+    // There was a problem in GStreamingExecutor
+    // when outputs were formally not used by the graph
+    // but still should be in place as operation need
+    // to produce them, and host data type constructors
+    // were missing for GArray and GOpaque in this case.
+    // This test tests exactly this.
+
+    GAPI_Streaming_TemplateTypes() {
+        // Prepare everything for the test:
+        // Graph itself
+        blur = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+        cv::GMat blur_d = cv::gapi::streaming::desync(blur);
+        std::tie(vec, opq) = DimsChans::on(blur_d);
+
+        // Kernel package
+        pkg = cv::gapi::kernels<OCVDimsChans>();
+
+        // Input mat
+        in_mat = cv::Mat::eye(cv::Size(320,240), CV_8UC3);
+    }
+
+    cv::GMat in;
+    cv::GMat blur;
+    cv::GArray<int> vec;
+    cv::GOpaque<int> opq;
+    cv::gapi::GKernelPackage pkg;
+    cv::Mat in_mat;
+};
+
+TEST_F(GAPI_Streaming_TemplateTypes, UnusedVectorIsOK)
+{
+    // Declare graph without listing vec as output
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(blur, opq))
+        .compileStreaming(cv::compile_args(pkg));
+    sc.setSource(cv::gin(in_mat));
+    sc.start();
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<int> out_int;
+
+    int counter = 0;
+    while (sc.pull(cv::gout(out_mat, out_int))) {
+        if (counter++ == 10) {
+            // Stop the test after 10 iterations
+            sc.stop();
+            break;
+        }
+        GAPI_Assert(out_mat || out_int);
+        if (out_int) {
+            EXPECT_EQ(  3, out_int.value());
+        }
+    }
+}
+
+TEST_F(GAPI_Streaming_TemplateTypes, UnusedOpaqueIsOK)
+{
+    // Declare graph without listing opq as output
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(blur, vec))
+        .compileStreaming(cv::compile_args(pkg));
+    sc.setSource(cv::gin(in_mat));
+    sc.start();
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<std::vector<int> > out_vec;
+
+    int counter = 0;
+    while (sc.pull(cv::gout(out_mat, out_vec))) {
+        if (counter++ == 10) {
+            // Stop the test after 10 iterations
+            sc.stop();
+            break;
+        }
+        GAPI_Assert(out_mat || out_vec);
+        if (out_vec) {
+            EXPECT_EQ(320, out_vec.value()[0]);
+            EXPECT_EQ(240, out_vec.value()[1]);
+        }
+    }
+}
+
 struct GAPI_Streaming_Unit: public ::testing::Test {
     cv::Mat m;
 
@@ -825,17 +998,22 @@ struct GAPI_Streaming_Unit: public ::testing::Test {
 
 TEST_F(GAPI_Streaming_Unit, TestTwoVideoSourcesFail)
 {
-    const auto c_ptr = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi"));
     auto c_desc = cv::GMatDesc{CV_8U,3,{768,576}};
     auto m_desc = cv::descr_of(m);
-
-    sc = cc.compileStreaming(c_desc, m_desc);
-    EXPECT_NO_THROW(sc.setSource(cv::gin(c_ptr, m)));
-
-    sc = cc.compileStreaming(m_desc, c_desc);
-    EXPECT_NO_THROW(sc.setSource(cv::gin(m, c_ptr)));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc = cc.compileStreaming(c_desc, m_desc);
+        // FIXME: it should be EXPECT_NO_THROW()
+        sc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path), m));
+        sc = cc.compileStreaming(m_desc, c_desc);
+        // FIXME: it should be EXPECT_NO_THROW()
+        sc.setSource(cv::gin(m, gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
 
     sc = cc.compileStreaming(c_desc, c_desc);
+    auto c_ptr = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path);
     EXPECT_ANY_THROW(sc.setSource(cv::gin(c_ptr, c_ptr)));
 }
 
@@ -882,7 +1060,7 @@ TEST_F(GAPI_Streaming_Unit, StartStopStart_NoSetSource)
     EXPECT_NO_THROW(sc.setSource(cv::gin(m, m)));
     EXPECT_NO_THROW(sc.start());
     EXPECT_NO_THROW(sc.stop());
-    EXPECT_ANY_THROW(sc.start()); // Should fails since setSource was not called
+    EXPECT_ANY_THROW(sc.start()); // Should fail since setSource was not called
 }
 
 TEST_F(GAPI_Streaming_Unit, StartStopStress_Const)
@@ -904,11 +1082,15 @@ TEST_F(GAPI_Streaming_Unit, StartStopStress_Video)
     sc = cc.compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size{768,576}},
                              cv::GMatDesc{CV_8U,3,cv::Size{768,576}});
     m = cv::Mat::eye(cv::Size{768,576}, CV_8UC3);
+    auto path = findDataFile("cv/video/768x576.avi");
     for (int i = 0; i < 100; i++)
     {
-        auto src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(findDataFile("cv/video/768x576.avi"));
         sc.stop();
-        sc.setSource(cv::gin(src, m));
+        try {
+            sc.setSource(cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path), m));
+        } catch(...) {
+            throw SkipTestException("Video file can not be opened");
+        }
         sc.start();
         cv::Mat out;
         for (int j = 0; j < 5; j++) EXPECT_TRUE(sc.pull(cv::gout(out)));
@@ -983,4 +1165,427 @@ TEST_F(GAPI_Streaming_Unit, SetSource_After_Completion)
     EXPECT_EQ(0., cv::norm(out, out_ref, cv::NORM_INF));
 }
 
+// NB: Check pull overload for python
+TEST(Streaming, Python_Pull_Overload)
+{
+    cv::GMat in;
+    auto out = cv::gapi::copy(in);
+    cv::GComputation c(in, out);
+
+    cv::Size sz(3,3);
+    cv::Mat in_mat(sz, CV_8UC3);
+    cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar(255));
+
+    auto ccomp = c.compileStreaming(cv::descr_of(in_mat));
+
+    EXPECT_TRUE(ccomp);
+    EXPECT_FALSE(ccomp.running());
+
+    ccomp.setSource(cv::gin(in_mat));
+
+    ccomp.start();
+    EXPECT_TRUE(ccomp.running());
+
+    bool has_output;
+    cv::GRunArgs outputs;
+    std::tie(has_output, outputs) = ccomp.pull();
+
+    EXPECT_TRUE(has_output);
+    EXPECT_EQ(1u, outputs.size());
+
+    auto out_mat = cv::util::get<cv::Mat>(outputs[0]);
+    EXPECT_EQ(0., cv::norm(in_mat, out_mat, cv::NORM_INF));
+
+    ccomp.stop();
+    EXPECT_FALSE(ccomp.running());
+}
+
+TEST(GAPI_Streaming_Desync, SmokeTest_Regular)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+    cv::GMat out1 = cv::gapi::Canny(tmp1, 32, 128, 3);
+
+    // FIXME: Unary desync should not require tie!
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = tmp2 / cv::gapi::Sobel(tmp2, CV_8U, 1, 1);;
+
+    cv::Mat test_in = cv::Mat::eye(cv::Size(32,32), CV_8UC3);
+    cv::Mat test_out1, test_out2;
+    cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .apply(cv::gin(test_in), cv::gout(test_out1, test_out2));
+}
+
+TEST(GAPI_Streaming_Desync, SmokeTest_Streaming)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+    cv::GMat out1 = cv::gapi::Canny(tmp1, 32, 128, 3);
+
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = Delay::on(tmp2,10) / cv::gapi::Sobel(tmp2, CV_8U, 1, 1);
+
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .compileStreaming(cv::compile_args(cv::gapi::kernels<OCVDelay>()));
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    sc.start();
+
+    std::size_t out1_hits = 0u;
+    std::size_t out2_hits = 0u;
+    cv::optional<cv::Mat> test_out1, test_out2;
+    while (sc.pull(cv::gout(test_out1, test_out2))) {
+        GAPI_Assert(test_out1 || test_out2);
+        if (test_out1) out1_hits++;
+        if (test_out2) out2_hits++;
+    }
+    EXPECT_EQ(100u, out1_hits);      // out1 must be available for all frames
+    EXPECT_LE(out2_hits, out1_hits); // out2 must appear less times than out1
+    std::cout << "Got " << out1_hits << " out1's and " << out2_hits << " out2's" << std::endl;
+}
+
+TEST(GAPI_Streaming_Desync, SmokeTest_Streaming_TwoParts)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+    cv::GMat out1 = cv::gapi::Canny(tmp1, 32, 128, 3);
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = tmp2 / cv::gapi::Sobel(tmp2, CV_8U, 1, 1);
+
+    // Desynchronized path 2
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out3 = 0.5*tmp3 +  0.5*cv::gapi::medianBlur(tmp3, 7);
+
+    // The code should compile and execute well (desynchronized parts don't cross)
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2, out3))
+        .compileStreaming();
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    sc.start();
+
+    std::size_t test_frames = 0u;
+    cv::optional<cv::Mat> test_out1, test_out2, test_out3;
+    while (sc.pull(cv::gout(test_out1, test_out2, test_out3))) {
+        GAPI_Assert(test_out1 || test_out2 || test_out3);
+        if (test_out1) {
+            // count frames only for synchronized output
+            test_frames++;
+        }
+    }
+    EXPECT_EQ(100u, test_frames);
+}
+
+TEST(GAPI_Streaming_Desync, Negative_NestedDesync_Tier0)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = cv::gapi::medianBlur(tmp2, 3);
+
+    // Desynchronized path 2, nested from 1 (directly from desync)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp2);
+    cv::GMat out2 = 0.5*tmp3;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_NestedDesync_Tier1)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = cv::gapi::medianBlur(tmp2, 3);
+
+    // Desynchronized path 2, nested from 1 (indirectly from desync)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(out1);
+    cv::GMat out2 = 0.5*tmp3;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossMainPart_Tier0)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path: depends on both tmp1 and tmp2
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp1 + 0.5*tmp2;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(in, out1).compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossMainPart_Tier1)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path: depends on both tmp1 and tmp2
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp1 + 0.5*cv::gapi::medianBlur(tmp2, 3);
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(in, out1).compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossOtherDesync_Tier0)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp2;
+
+    // Desynchronized path 2 (depends on 1)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = 0.5*tmp3 + tmp2;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossOtherDesync_Tier1)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp2;
+
+    // Desynchronized path 2 (depends on 1)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = 0.5*cv::gapi::medianBlur(tmp3,3) + 1.0*tmp2;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_SynchronizedPull)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(out1);
+    cv::GMat out2 = 0.5*tmp1;
+
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .compileStreaming();
+
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    sc.start();
+
+    cv::Mat o1, o2;
+    EXPECT_ANY_THROW(sc.pull(cv::gout(o1, o2)));
+}
+
+TEST(GAPI_Streaming_Desync, UseSpecialPull)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(out1);
+    cv::GMat out2 = 0.5*tmp1;
+
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .compileStreaming();
+
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        sc.setSource(cv::gin(gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path)));
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    sc.start();
+
+    cv::optional<cv::Mat> o1, o2;
+    std::size_t num_frames = 0u;
+
+    while (sc.pull(cv::gout(o1, o2))) {
+        if (o1) num_frames++;
+    }
+    EXPECT_EQ(100u, num_frames);
+}
+
+G_API_OP(ProduceVector, <cv::GArray<int>(cv::GMat)>, "test.desync.vector") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+G_API_OP(ProduceOpaque, <cv::GOpaque<int>(cv::GMat)>, "test.desync.opaque") {
+    static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_gopaque_desc();
+    }
+};
+
+GAPI_OCV_KERNEL(OCVVector, ProduceVector) {
+    static void run(const cv::Mat& in, std::vector<int> &out) {
+        out = {in.cols, in.rows};
+    }
+};
+
+GAPI_OCV_KERNEL(OCVOpaque, ProduceOpaque) {
+    static void run(const cv::Mat &in, int &v) {
+        v = in.channels();
+    }
+};
+
+namespace {
+cv::GStreamingCompiled desyncTestObject() {
+    cv::GMat in;
+    cv::GMat blur = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat blur_d = cv::gapi::copy(cv::gapi::streaming::desync(blur));
+    cv::GMat d1 = Delay::on(blur_d, 10);
+    cv::GMat d2 = Delay::on(blur_d, 30);
+
+    cv::GArray<int>  vec = ProduceVector::on(d1);
+    cv::GOpaque<int> opq = ProduceOpaque::on(d2);
+
+    auto pkg = cv::gapi::kernels<OCVDelay, OCVVector, OCVOpaque>();
+    return cv::GComputation(cv::GIn(in), cv::GOut(blur, vec, opq))
+        .compileStreaming(cv::compile_args(pkg));
+}
+} // anonymous namespace
+
+TEST(GAPI_Streaming_Desync, MultipleDesyncOutputs_1) {
+    auto sc = desyncTestObject();
+    const cv::Mat in_mat = cv::Mat::eye(cv::Size(320,240), CV_8UC3);
+
+    sc.setSource(cv::gin(in_mat));
+    sc.start();
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<std::vector<int> > out_vec;
+    cv::optional<int> out_int;
+
+    int counter = 0;
+    while (sc.pull(cv::gout(out_mat, out_vec, out_int))) {
+        if (counter++ == 1000) {
+            // Stop the test after 1000 iterations
+            sc.stop();
+            break;
+        }
+        GAPI_Assert(out_mat || out_vec || out_int);
+
+        // out_vec and out_int are on the same desynchronized path
+        // they MUST arrive together. If one is available, the other
+        // also must be available.
+        if (out_vec) { ASSERT_TRUE(out_int.has_value()); }
+        if (out_int) { ASSERT_TRUE(out_vec.has_value()); }
+
+        if (out_vec || out_int) {
+            EXPECT_EQ(320, out_vec.value()[0]);
+            EXPECT_EQ(240, out_vec.value()[1]);
+            EXPECT_EQ(  3, out_int.value());
+        }
+    }
+}
+
+TEST(GAPI_Streaming_Desync, StartStop_Stress) {
+    auto sc = desyncTestObject();
+    const cv::Mat in_mat = cv::Mat::eye(cv::Size(320,240), CV_8UC3);
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<std::vector<int> > out_vec;
+    cv::optional<int> out_int;
+
+    for (int i = 0; i < 10; i++) {
+        sc.setSource(cv::gin(in_mat));
+        sc.start();
+        int counter = 0;
+        while (counter++ < 100) {
+            sc.pull(cv::gout(out_mat, out_vec, out_int));
+            GAPI_Assert(out_mat || out_vec || out_int);
+            if (out_vec) { ASSERT_TRUE(out_int.has_value()); }
+            if (out_int) { ASSERT_TRUE(out_vec.has_value()); }
+        }
+        sc.stop();
+    }
+}
+
+GAPI_FLUID_KERNEL(FluidCopy, cv::gapi::core::GCopy, false) {
+    static const int Window = 1;
+
+    static void run(const cv::gapi::fluid::View &in,
+                          cv::gapi::fluid::Buffer &out) {
+        const uint8_t *in_ptr = in.InLineB(0);
+        uint8_t *out_ptr = out.OutLineB(0);
+
+        const auto in_type = CV_MAKETYPE(in.meta().depth, in.meta().chan);
+        const auto out_type = CV_MAKETYPE(out.meta().depth, out.meta().chan);
+        GAPI_Assert(in_type == out_type);
+        std::copy_n(in_ptr, in.length()*CV_ELEM_SIZE(in_type), out_ptr);
+    }
+};
+
+
+TEST(GAPI_Streaming_Desync, DesyncObjectConsumedByTwoIslandsViaSeparateDesync) {
+    // See comment in the implementation of cv::gapi::streaming::desync (.cpp)
+    cv::GMat in;
+    cv::GMat tmp = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(tmp);
+    cv::GMat out1 = cv::gapi::copy(tmp1); // ran via Fluid backend
+
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp);
+    cv::GMat out2 = tmp2 * 0.5;           // ran via OCV backend
+
+    auto c = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2));
+    auto p = cv::gapi::kernels<FluidCopy>();
+
+    EXPECT_NO_THROW(c.compileStreaming(cv::compile_args(p)));
+}
+
+TEST(GAPI_Streaming_Desync, DesyncObjectConsumedByTwoIslandsViaSameDesync) {
+    // See comment in the implementation of cv::gapi::streaming::desync (.cpp)
+    cv::GMat in;
+    cv::GMat tmp = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(tmp);
+    cv::GMat out1 = cv::gapi::copy(tmp1); // ran via Fluid backend
+    cv::GMat out2 = out1 - 0.5*tmp1;      // ran via OCV backend
+
+    auto c = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2));
+    auto p = cv::gapi::kernels<FluidCopy>();
+
+    EXPECT_NO_THROW(c.compileStreaming(cv::compile_args(p)));
+}
+
 } // namespace opencv_test
diff --git a/modules/gapi/test/test_precomp.hpp b/modules/gapi/test/test_precomp.hpp
index 6253acfcb3..7b3c695443 100644
--- a/modules/gapi/test/test_precomp.hpp
+++ b/modules/gapi/test/test_precomp.hpp
@@ -34,4 +34,7 @@ static inline void countNonZero_is_forbidden_in_tests_use_norm_instead() {}
 }
 #define countNonZero() countNonZero_is_forbidden_in_tests_use_norm_instead()
 
+#undef RAND_MAX
+#define RAND_MAX RAND_MAX_is_banned_in_tests__use_cv_theRNG_instead
+
 #endif // __OPENCV_GAPI_TEST_PRECOMP_HPP__
diff --git a/modules/highgui/include/opencv2/highgui.hpp b/modules/highgui/include/opencv2/highgui.hpp
index f109640414..c5b5ed42c2 100644
--- a/modules/highgui/include/opencv2/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui.hpp
@@ -66,6 +66,7 @@ It provides easy interface to:
 -   Add trackbars to the windows, handle simple mouse events as well as keyboard commands.
 
 @{
+    @defgroup highgui_window_flags Flags related creating and manipulating HighGUI windows and mouse events
     @defgroup highgui_opengl OpenGL support
     @defgroup highgui_qt Qt New Functions
 
@@ -93,7 +94,7 @@ It provides easy interface to:
 
 
             namedWindow("main1",WINDOW_NORMAL);
-            namedWindow("main2",WINDOW_AUTOSIZE | CV_GUI_NORMAL);
+            namedWindow("main2",WINDOW_AUTOSIZE | WINDOW_GUI_NORMAL);
             createTrackbar( "track1", "main1", &value, 255,  NULL);
 
             String nameb1 = "button1";
@@ -178,6 +179,9 @@ namespace cv
 //! @addtogroup highgui
 //! @{
 
+//! @addtogroup highgui_window_flags
+//! @{
+
 //! Flags for cv::namedWindow
 enum WindowFlags {
        WINDOW_NORMAL     = 0x00000000, //!< the user can resize the window (no constraint) / also use to switch a fullscreen window to a normal size.
@@ -227,6 +231,11 @@ enum MouseEventFlags {
        EVENT_FLAG_ALTKEY    = 32 //!< indicates that ALT Key is pressed.
      };
 
+//! @} highgui_window_flags
+
+//! @addtogroup highgui_qt
+//! @{
+
 //! Qt font weight
 enum QtFontWeights {
         QT_FONT_LIGHT           = 25, //!< Weight of 25
@@ -251,6 +260,8 @@ enum QtButtonTypes {
        QT_NEW_BUTTONBAR = 1024  //!< Button should create a new buttonbar
      };
 
+//! @} highgui_qt
+
 /** @brief Callback function for mouse events. see cv::setMouseCallback
 @param event one of the cv::MouseEventTypes constants.
 @param x The x-coordinate of the mouse event.
@@ -389,7 +400,7 @@ videos, it will display the video frame-by-frame)
  */
 CV_EXPORTS_W void imshow(const String& winname, InputArray mat);
 
-/** @brief Resizes window to the specified size
+/** @brief Resizes the window to the specified size
 
 @note
 
@@ -408,7 +419,7 @@ CV_EXPORTS_W void resizeWindow(const String& winname, int width, int height);
 */
 CV_EXPORTS_W void resizeWindow(const String& winname, const cv::Size& size);
 
-/** @brief Moves window to the specified position
+/** @brief Moves the window to the specified position
 
 @param winname Name of the window.
 @param x The new x-coordinate of the window.
@@ -476,8 +487,6 @@ For cv::EVENT_MOUSEWHEEL positive and negative values mean forward and backward
 respectively. For cv::EVENT_MOUSEHWHEEL, where available, positive and negative values mean right and
 left scrolling, respectively.
 
-With the C API, the macro CV_GET_WHEEL_DELTA(flags) can be used alternatively.
-
 @note
 
 Mouse-wheel events are currently supported only on Windows.
@@ -486,8 +495,9 @@ Mouse-wheel events are currently supported only on Windows.
  */
 CV_EXPORTS int getMouseWheelDelta(int flags);
 
-/** @brief Selects ROI on the given image.
-Function creates a window and allows user to select a ROI using mouse.
+/** @brief Allows users to select a ROI on the given image.
+
+The function creates a window and allows users to select a ROI using the mouse.
 Controls: use `space` or `enter` to finish selection, use key `c` to cancel selection (function will return the zero cv::Rect).
 
 @param windowName name of the window where selection process will be shown.
@@ -506,8 +516,9 @@ CV_EXPORTS_W Rect selectROI(const String& windowName, InputArray img, bool showC
  */
 CV_EXPORTS_W Rect selectROI(InputArray img, bool showCrosshair = true, bool fromCenter = false);
 
-/** @brief Selects ROIs on the given image.
-Function creates a window and allows user to select a ROIs using mouse.
+/** @brief Allows users to select multiple ROIs on the given image.
+
+The function creates a window and allows users to select multiple ROIs using the mouse.
 Controls: use `space` or `enter` to finish current selection and start a new one,
 use `esc` to terminate multiple ROI selection process.
 
diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt
index f8bfd18e1f..8ae85e62c5 100644
--- a/modules/imgcodecs/CMakeLists.txt
+++ b/modules/imgcodecs/CMakeLists.txt
@@ -113,9 +113,17 @@ file(GLOB imgcodecs_ext_hdrs
      "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/legacy/*.h"
      )
 
+if(APPLE OR APPLE_FRAMEWORK)
+  list(APPEND imgcodecs_srcs ${CMAKE_CURRENT_LIST_DIR}/src/apple_conversions.h)
+  list(APPEND imgcodecs_srcs ${CMAKE_CURRENT_LIST_DIR}/src/apple_conversions.mm)
+endif()
 if(IOS)
   list(APPEND imgcodecs_srcs ${CMAKE_CURRENT_LIST_DIR}/src/ios_conversions.mm)
-  list(APPEND IMGCODECS_LIBRARIES "-framework UIKit" "-framework AssetsLibrary")
+  list(APPEND IMGCODECS_LIBRARIES "-framework UIKit")
+endif()
+if(APPLE AND (NOT IOS))
+  list(APPEND imgcodecs_srcs ${CMAKE_CURRENT_LIST_DIR}/src/macosx_conversions.mm)
+  list(APPEND IMGCODECS_LIBRARIES "-framework AppKit")
 endif()
 if(APPLE_FRAMEWORK)
   list(APPEND IMGCODECS_LIBRARIES "-framework Accelerate" "-framework CoreGraphics" "-framework QuartzCore")
diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index 97ca866e1b..25c678d89d 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -49,7 +49,9 @@
   @defgroup imgcodecs Image file reading and writing
   @{
     @defgroup imgcodecs_c C API
+    @defgroup imgcodecs_flags Flags used for image file reading and writing
     @defgroup imgcodecs_ios iOS glue
+    @defgroup imgcodecs_macosx MacOS(OSX) glue
   @}
 */
 
@@ -60,6 +62,9 @@ namespace cv
 //! @addtogroup imgcodecs
 //! @{
 
+//! @addtogroup imgcodecs_flags
+//! @{
+
 //! Imread flags
 enum ImreadModes {
        IMREAD_UNCHANGED            = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). Ignore EXIF orientation.
@@ -131,6 +136,8 @@ enum ImwritePAMFlags {
        IMWRITE_PAM_FORMAT_RGB_ALPHA = 5,
      };
 
+//! @} imgcodecs_flags
+
 /** @brief Loads an image from a file.
 
 @anchor imread
@@ -224,6 +231,14 @@ It also demonstrates how to save multiple images in a TIFF file:
 CV_EXPORTS_W bool imwrite( const String& filename, InputArray img,
               const std::vector<int>& params = std::vector<int>());
 
+/// @overload multi-image overload for bindings
+CV_WRAP static inline
+bool imwritemulti(const String& filename, InputArrayOfArrays img,
+                  const std::vector<int>& params = std::vector<int>())
+{
+    return imwrite(filename, img, params);
+}
+
 /** @brief Reads an image from a buffer in memory.
 
 The function imdecode reads an image from the specified buffer in the memory. If the buffer is too short or
diff --git a/modules/imgcodecs/include/opencv2/imgcodecs/ios.h b/modules/imgcodecs/include/opencv2/imgcodecs/ios.h
index 0f15820892..5f17218170 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs/ios.h
+++ b/modules/imgcodecs/include/opencv2/imgcodecs/ios.h
@@ -50,6 +50,8 @@
 //! @addtogroup imgcodecs_ios
 //! @{
 
+CV_EXPORTS CGImageRef MatToCGImage(const cv::Mat& image) CF_RETURNS_RETAINED;
+CV_EXPORTS void CGImageToMat(const CGImageRef image, cv::Mat& m, bool alphaExist = false);
 CV_EXPORTS UIImage* MatToUIImage(const cv::Mat& image);
 CV_EXPORTS void UIImageToMat(const UIImage* image,
                              cv::Mat& m, bool alphaExist = false);
diff --git a/modules/imgcodecs/include/opencv2/imgcodecs/macosx.h b/modules/imgcodecs/include/opencv2/imgcodecs/macosx.h
new file mode 100644
index 0000000000..cfb0770700
--- /dev/null
+++ b/modules/imgcodecs/include/opencv2/imgcodecs/macosx.h
@@ -0,0 +1,20 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#if !defined(__APPLE__) || !defined(__MACH__)
+#error This header should be used in macOS ObjC/Swift projects.
+#endif
+
+#import <AppKit/AppKit.h>
+#include "opencv2/core.hpp"
+
+//! @addtogroup imgcodecs_macosx
+//! @{
+
+CV_EXPORTS CGImageRef MatToCGImage(const cv::Mat& image) CF_RETURNS_RETAINED;
+CV_EXPORTS void CGImageToMat(const CGImageRef image, cv::Mat& m, bool alphaExist = false);
+CV_EXPORTS NSImage* MatToNSImage(const cv::Mat& image);
+CV_EXPORTS void NSImageToMat(const NSImage* image, cv::Mat& m, bool alphaExist = false);
+
+//! @}
diff --git a/modules/imgcodecs/misc/objc/ios/Mat+Converters.h b/modules/imgcodecs/misc/objc/ios/Mat+Converters.h
index d33abbf4f9..a3ee005c18 100644
--- a/modules/imgcodecs/misc/objc/ios/Mat+Converters.h
+++ b/modules/imgcodecs/misc/objc/ios/Mat+Converters.h
@@ -7,19 +7,22 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
 
+#import "Mat.h"
 #import <Foundation/Foundation.h>
 #import <UIKit/UIKit.h>
-#import "Mat.h"
 
 NS_ASSUME_NONNULL_BEGIN
 
 CV_EXPORTS @interface Mat (Converters)
 
+-(CGImageRef)toCGImage CF_RETURNS_RETAINED;
+-(instancetype)initWithCGImage:(CGImageRef)image;
+-(instancetype)initWithCGImage:(CGImageRef)image alphaExist:(BOOL)alphaExist;
 -(UIImage*)toUIImage;
 -(instancetype)initWithUIImage:(UIImage*)image;
 -(instancetype)initWithUIImage:(UIImage*)image alphaExist:(BOOL)alphaExist;
diff --git a/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm b/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm
index 3ea3117267..69250eb994 100644
--- a/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm
+++ b/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm
@@ -9,6 +9,22 @@
 
 @implementation Mat (Converters)
 
+-(CGImageRef)toCGImage {
+    return MatToCGImage(self.nativeRef);
+}
+
+-(instancetype)initWithCGImage:(CGImageRef)image {
+    return [self initWithCGImage:image alphaExist:NO];
+}
+
+-(instancetype)initWithCGImage:(CGImageRef)image alphaExist:(BOOL)alphaExist {
+    self = [self init];
+    if (self) {
+        CGImageToMat(image, self.nativeRef, (bool)alphaExist);
+    }
+    return self;
+}
+
 -(UIImage*)toUIImage {
     return MatToUIImage(self.nativeRef);
 }
diff --git a/modules/imgcodecs/misc/objc/macosx/Mat+Converters.h b/modules/imgcodecs/misc/objc/macosx/Mat+Converters.h
new file mode 100644
index 0000000000..63ac476599
--- /dev/null
+++ b/modules/imgcodecs/misc/objc/macosx/Mat+Converters.h
@@ -0,0 +1,32 @@
+//
+//  Mat+Converters.h
+//
+//  Created by Masaya Tsuruta on 2020/10/08.
+//
+
+#pragma once
+
+#ifdef __cplusplus
+#import "opencv2/core.hpp"
+#else
+#define CV_EXPORTS
+#endif
+
+#import "Mat.h"
+#import <Foundation/Foundation.h>
+#import <AppKit/AppKit.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+CV_EXPORTS @interface Mat (Converters)
+
+-(CGImageRef)toCGImage CF_RETURNS_RETAINED;
+-(instancetype)initWithCGImage:(CGImageRef)image;
+-(instancetype)initWithCGImage:(CGImageRef)image alphaExist:(BOOL)alphaExist;
+-(NSImage*)toNSImage;
+-(instancetype)initWithNSImage:(NSImage*)image;
+-(instancetype)initWithNSImage:(NSImage*)image alphaExist:(BOOL)alphaExist;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/modules/imgcodecs/misc/objc/macosx/Mat+Converters.mm b/modules/imgcodecs/misc/objc/macosx/Mat+Converters.mm
new file mode 100644
index 0000000000..725569784a
--- /dev/null
+++ b/modules/imgcodecs/misc/objc/macosx/Mat+Converters.mm
@@ -0,0 +1,44 @@
+//
+//  Mat+Converters.mm
+//
+//  Created by Masaya Tsuruta on 2020/10/08.
+//
+
+#import "Mat+Converters.h"
+#import <opencv2/imgcodecs/macosx.h>
+
+@implementation Mat (Converters)
+
+-(CGImageRef)toCGImage {
+    return MatToCGImage(self.nativeRef);
+}
+
+-(instancetype)initWithCGImage:(CGImageRef)image {
+    return [self initWithCGImage:image alphaExist:NO];
+}
+
+-(instancetype)initWithCGImage:(CGImageRef)image alphaExist:(BOOL)alphaExist {
+    self = [self init];
+    if (self) {
+        CGImageToMat(image, self.nativeRef, (bool)alphaExist);
+    }
+    return self;
+}
+
+-(NSImage*)toNSImage {
+    return MatToNSImage(self.nativeRef);
+}
+
+-(instancetype)initWithNSImage:(NSImage*)image {
+    return [self initWithNSImage:image alphaExist:NO];
+}
+
+-(instancetype)initWithNSImage:(NSImage*)image alphaExist:(BOOL)alphaExist {
+    self = [self init];
+    if (self) {
+        NSImageToMat(image, self.nativeRef, (bool)alphaExist);
+    }
+    return self;
+}
+
+@end
diff --git a/modules/imgcodecs/src/apple_conversions.h b/modules/imgcodecs/src/apple_conversions.h
new file mode 100644
index 0000000000..27e8955bfc
--- /dev/null
+++ b/modules/imgcodecs/src/apple_conversions.h
@@ -0,0 +1,11 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#import <Accelerate/Accelerate.h>
+#import <AVFoundation/AVFoundation.h>
+#import <ImageIO/ImageIO.h>
+#include "opencv2/core.hpp"
+
+CV_EXPORTS CGImageRef MatToCGImage(const cv::Mat& image) CF_RETURNS_RETAINED;
+CV_EXPORTS void CGImageToMat(const CGImageRef image, cv::Mat& m, bool alphaExist);
diff --git a/modules/imgcodecs/src/apple_conversions.mm b/modules/imgcodecs/src/apple_conversions.mm
new file mode 100644
index 0000000000..6126039ce0
--- /dev/null
+++ b/modules/imgcodecs/src/apple_conversions.mm
@@ -0,0 +1,94 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "apple_conversions.h"
+#include "precomp.hpp"
+
+CGImageRef MatToCGImage(const cv::Mat& image) {
+    NSData *data = [NSData dataWithBytes:image.data
+                                  length:image.step.p[0] * image.rows];
+
+    CGColorSpaceRef colorSpace;
+
+    if (image.elemSize() == 1) {
+        colorSpace = CGColorSpaceCreateDeviceGray();
+    } else {
+        colorSpace = CGColorSpaceCreateDeviceRGB();
+    }
+
+    CGDataProviderRef provider =
+            CGDataProviderCreateWithCFData((__bridge CFDataRef)data);
+
+    // Preserve alpha transparency, if exists
+    bool alpha = image.channels() == 4;
+    CGBitmapInfo bitmapInfo = (alpha ? kCGImageAlphaLast : kCGImageAlphaNone) | kCGBitmapByteOrderDefault;
+
+    // Creating CGImage from cv::Mat
+    CGImageRef imageRef = CGImageCreate(image.cols,
+                                        image.rows,
+                                        8 * image.elemSize1(),
+                                        8 * image.elemSize(),
+                                        image.step.p[0],
+                                        colorSpace,
+                                        bitmapInfo,
+                                        provider,
+                                        NULL,
+                                        false,
+                                        kCGRenderingIntentDefault
+                                        );
+
+    CGDataProviderRelease(provider);
+    CGColorSpaceRelease(colorSpace);
+
+    return imageRef;
+}
+
+void CGImageToMat(const CGImageRef image, cv::Mat& m, bool alphaExist) {
+    CGColorSpaceRef colorSpace = CGImageGetColorSpace(image);
+    CGFloat cols = CGImageGetWidth(image), rows = CGImageGetHeight(image);
+    CGContextRef contextRef;
+    CGBitmapInfo bitmapInfo = kCGImageAlphaPremultipliedLast;
+    if (CGColorSpaceGetModel(colorSpace) == kCGColorSpaceModelMonochrome)
+    {
+        m.create(rows, cols, CV_8UC1); // 8 bits per component, 1 channel
+        bitmapInfo = kCGImageAlphaNone;
+        if (!alphaExist)
+            bitmapInfo = kCGImageAlphaNone;
+        else
+            m = cv::Scalar(0);
+        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
+                                           m.step[0], colorSpace,
+                                           bitmapInfo);
+    }
+    else if (CGColorSpaceGetModel(colorSpace) == kCGColorSpaceModelIndexed)
+    {
+        // CGBitmapContextCreate() does not support indexed color spaces.
+        colorSpace = CGColorSpaceCreateDeviceRGB();
+        m.create(rows, cols, CV_8UC4); // 8 bits per component, 4 channels
+        if (!alphaExist)
+            bitmapInfo = kCGImageAlphaNoneSkipLast |
+                                kCGBitmapByteOrderDefault;
+        else
+            m = cv::Scalar(0);
+        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
+                                           m.step[0], colorSpace,
+                                           bitmapInfo);
+        CGColorSpaceRelease(colorSpace);
+    }
+    else
+    {
+        m.create(rows, cols, CV_8UC4); // 8 bits per component, 4 channels
+        if (!alphaExist)
+            bitmapInfo = kCGImageAlphaNoneSkipLast |
+                                kCGBitmapByteOrderDefault;
+        else
+            m = cv::Scalar(0);
+        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
+                                           m.step[0], colorSpace,
+                                           bitmapInfo);
+    }
+    CGContextDrawImage(contextRef, CGRectMake(0, 0, cols, rows),
+                       image);
+    CGContextRelease(contextRef);
+}
diff --git a/modules/imgcodecs/src/ios_conversions.mm b/modules/imgcodecs/src/ios_conversions.mm
index 53fb788d65..2aba323a2d 100644
--- a/modules/imgcodecs/src/ios_conversions.mm
+++ b/modules/imgcodecs/src/ios_conversions.mm
@@ -41,105 +41,23 @@
 //M*/
 
 #import <UIKit/UIKit.h>
-#import <Accelerate/Accelerate.h>
-#import <AVFoundation/AVFoundation.h>
-#import <ImageIO/ImageIO.h>
-#include "opencv2/core.hpp"
-#include "precomp.hpp"
+#include "apple_conversions.h"
 
 CV_EXPORTS UIImage* MatToUIImage(const cv::Mat& image);
 CV_EXPORTS void UIImageToMat(const UIImage* image, cv::Mat& m, bool alphaExist);
 
 UIImage* MatToUIImage(const cv::Mat& image) {
-
-    NSData *data = [NSData dataWithBytes:image.data
-                                  length:image.step.p[0] * image.rows];
-
-    CGColorSpaceRef colorSpace;
-
-    if (image.elemSize() == 1) {
-        colorSpace = CGColorSpaceCreateDeviceGray();
-    } else {
-        colorSpace = CGColorSpaceCreateDeviceRGB();
-    }
-
-    CGDataProviderRef provider =
-            CGDataProviderCreateWithCFData((__bridge CFDataRef)data);
-
-    // Preserve alpha transparency, if exists
-    bool alpha = image.channels() == 4;
-    CGBitmapInfo bitmapInfo = (alpha ? kCGImageAlphaLast : kCGImageAlphaNone) | kCGBitmapByteOrderDefault;
-
     // Creating CGImage from cv::Mat
-    CGImageRef imageRef = CGImageCreate(image.cols,
-                                        image.rows,
-                                        8 * image.elemSize1(),
-                                        8 * image.elemSize(),
-                                        image.step.p[0],
-                                        colorSpace,
-                                        bitmapInfo,
-                                        provider,
-                                        NULL,
-                                        false,
-                                        kCGRenderingIntentDefault
-                                        );
-
+    CGImageRef imageRef = MatToCGImage(image);
 
     // Getting UIImage from CGImage
-    UIImage *finalImage = [UIImage imageWithCGImage:imageRef];
+    UIImage *uiImage = [UIImage imageWithCGImage:imageRef];
     CGImageRelease(imageRef);
-    CGDataProviderRelease(provider);
-    CGColorSpaceRelease(colorSpace);
 
-    return finalImage;
+    return uiImage;
 }
 
-void UIImageToMat(const UIImage* image,
-                         cv::Mat& m, bool alphaExist) {
-    CGColorSpaceRef colorSpace = CGImageGetColorSpace(image.CGImage);
-    CGFloat cols = CGImageGetWidth(image.CGImage), rows = CGImageGetHeight(image.CGImage);
-    CGContextRef contextRef;
-    CGBitmapInfo bitmapInfo = kCGImageAlphaPremultipliedLast;
-    if (CGColorSpaceGetModel(colorSpace) == kCGColorSpaceModelMonochrome)
-    {
-        m.create(rows, cols, CV_8UC1); // 8 bits per component, 1 channel
-        bitmapInfo = kCGImageAlphaNone;
-        if (!alphaExist)
-            bitmapInfo = kCGImageAlphaNone;
-        else
-            m = cv::Scalar(0);
-        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
-                                           m.step[0], colorSpace,
-                                           bitmapInfo);
-    }
-    else if (CGColorSpaceGetModel(colorSpace) == kCGColorSpaceModelIndexed)
-    {
-        // CGBitmapContextCreate() does not support indexed color spaces.
-        colorSpace = CGColorSpaceCreateDeviceRGB();
-        m.create(rows, cols, CV_8UC4); // 8 bits per component, 4 channels
-        if (!alphaExist)
-            bitmapInfo = kCGImageAlphaNoneSkipLast |
-                                kCGBitmapByteOrderDefault;
-        else
-            m = cv::Scalar(0);
-        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
-                                           m.step[0], colorSpace,
-                                           bitmapInfo);
-        CGColorSpaceRelease(colorSpace);
-    }
-    else
-    {
-        m.create(rows, cols, CV_8UC4); // 8 bits per component, 4 channels
-        if (!alphaExist)
-            bitmapInfo = kCGImageAlphaNoneSkipLast |
-                                kCGBitmapByteOrderDefault;
-        else
-            m = cv::Scalar(0);
-        contextRef = CGBitmapContextCreate(m.data, m.cols, m.rows, 8,
-                                           m.step[0], colorSpace,
-                                           bitmapInfo);
-    }
-    CGContextDrawImage(contextRef, CGRectMake(0, 0, cols, rows),
-                       image.CGImage);
-    CGContextRelease(contextRef);
+void UIImageToMat(const UIImage* image, cv::Mat& m, bool alphaExist) {
+    CGImageRef imageRef = image.CGImage;
+    CGImageToMat(imageRef, m, alphaExist);
 }
diff --git a/modules/imgcodecs/src/macosx_conversions.mm b/modules/imgcodecs/src/macosx_conversions.mm
new file mode 100644
index 0000000000..c1827e71f1
--- /dev/null
+++ b/modules/imgcodecs/src/macosx_conversions.mm
@@ -0,0 +1,25 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#import <AppKit/AppKit.h>
+#include "apple_conversions.h"
+
+CV_EXPORTS NSImage* MatToNSImage(const cv::Mat& image);
+CV_EXPORTS void NSImageToMat(const NSImage* image, cv::Mat& m, bool alphaExist);
+
+NSImage* MatToNSImage(const cv::Mat& image) {
+    // Creating CGImage from cv::Mat
+    CGImageRef imageRef = MatToCGImage(image);
+
+    // Getting NSImage from CGImage
+    NSImage *nsImage = [[NSImage alloc] initWithCGImage:imageRef size:CGSizeMake(CGImageGetWidth(imageRef), CGImageGetHeight(imageRef))];
+    CGImageRelease(imageRef);
+
+    return nsImage;
+}
+
+void NSImageToMat(const NSImage* image, cv::Mat& m, bool alphaExist) {
+    CGImageRef imageRef = [image CGImageForProposedRect:NULL context:NULL hints:NULL];
+    CGImageToMat(imageRef, m, alphaExist);
+}
diff --git a/modules/imgproc/doc/pics/colorscale_deepgreen.jpg b/modules/imgproc/doc/pics/colormaps/colorscale_deepgreen.jpg
similarity index 100%
rename from modules/imgproc/doc/pics/colorscale_deepgreen.jpg
rename to modules/imgproc/doc/pics/colormaps/colorscale_deepgreen.jpg
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index 439af05063..00e3dfa353 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -2277,8 +2277,8 @@ CV_32FC1, or CV_32FC2. See convertMaps for details on converting a floating poin
 representation to fixed-point for speed.
 @param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map
 if map1 is (x,y) points), respectively.
-@param interpolation Interpolation method (see #InterpolationFlags). The method #INTER_AREA is
-not supported by this function.
+@param interpolation Interpolation method (see #InterpolationFlags). The methods #INTER_AREA
+and #INTER_LINEAR_EXACT are not supported by this function.
 @param borderMode Pixel extrapolation method (see #BorderTypes). When
 borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image that
 corresponds to the "outliers" in the source image are not modified by the function.
@@ -3281,7 +3281,7 @@ but also identifies the nearest connected component consisting of zero pixels
 (labelType==#DIST_LABEL_CCOMP) or the nearest zero pixel (labelType==#DIST_LABEL_PIXEL). Index of the
 component/pixel is stored in `labels(x, y)`. When labelType==#DIST_LABEL_CCOMP, the function
 automatically finds connected components of zero pixels in the input image and marks them with
-distinct labels. When labelType==#DIST_LABEL_CCOMP, the function scans through the input image and
+distinct labels. When labelType==#DIST_LABEL_PIXEL, the function scans through the input image and
 marks all the zero pixels with distinct labels.
 
 In this mode, the complexity is still linear. That is, the function provides a very fast way to
diff --git a/modules/imgproc/misc/objc/common/Moments.h b/modules/imgproc/misc/objc/common/Moments.h
index dfa5653bac..8ce3f75ea8 100644
--- a/modules/imgproc/misc/objc/common/Moments.h
+++ b/modules/imgproc/misc/objc/common/Moments.h
@@ -7,7 +7,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+#import "opencv2/core.hpp"
 #else
 #define CV_EXPORTS
 #endif
diff --git a/modules/imgproc/src/demosaicing.cpp b/modules/imgproc/src/demosaicing.cpp
index e02104d222..03bc781046 100644
--- a/modules/imgproc/src/demosaicing.cpp
+++ b/modules/imgproc/src/demosaicing.cpp
@@ -1566,9 +1566,9 @@ public:
             int x = 1;
             if (start_with_green)
             {
-                D[blue<<1] = (S[-sstep] + S[sstep]) >> 1;
+                D[blue<<1] = (S[-sstep] + S[sstep] + 1) >> 1;
                 D[1] = S[0];
-                D[2-(blue<<1)] = (S[-1] + S[1]) >> 1;
+                D[2-(blue<<1)] = (S[-1] + S[1] + 1) >> 1;
                 D += dcn;
                 ++S;
                 ++x;
@@ -1584,7 +1584,7 @@ public:
                 {
                     D[0] = S[0];
                     D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;
-                    D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1]) >> 2;
+                    D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2;
 
                     D[3] = (S[0] + S[2] + 1) >> 1;
                     D[4] = S[1];
diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py
index 77ca57f48b..e41117558a 100755
--- a/modules/java/generator/gen_java.py
+++ b/modules/java/generator/gen_java.py
@@ -123,7 +123,8 @@ T_CPP_MODULE = Template(read_contents(os.path.join(SCRIPT_DIR, 'templates/cpp_mo
 
 class GeneralInfo():
     def __init__(self, type, decl, namespaces):
-        self.namespace, self.classpath, self.classname, self.name = self.parseName(decl[0], namespaces)
+        self.symbol_id, self.parent_id, self.namespace, self.classpath, self.classname, self.name = self.parseName(decl[0], namespaces)
+        self.cname = get_cname(self.symbol_id)
 
         # parse doxygen comments
         self.params={}
@@ -150,6 +151,9 @@ class GeneralInfo():
         returns: (namespace, classpath, classname, name)
         '''
         name = name[name.find(" ")+1:].strip() # remove struct/class/const prefix
+        parent = name[:name.rfind('.')].strip()
+        if len(parent) == 0:
+            parent = None
         spaceName = ""
         localName = name # <classes>.<name>
         for namespace in sorted(namespaces, key=len, reverse=True):
@@ -159,31 +163,44 @@ class GeneralInfo():
                 break
         pieces = localName.split(".")
         if len(pieces) > 2: # <class>.<class>.<class>.<name>
-            return spaceName, ".".join(pieces[:-1]), pieces[-2], pieces[-1]
+            return name, parent, spaceName, ".".join(pieces[:-1]), pieces[-2], pieces[-1]
         elif len(pieces) == 2: # <class>.<name>
-            return spaceName, pieces[0], pieces[0], pieces[1]
+            return name, parent, spaceName, pieces[0], pieces[0], pieces[1]
         elif len(pieces) == 1: # <name>
-            return spaceName, "", "", pieces[0]
+            return name, parent, spaceName, "", "", pieces[0]
         else:
-            return spaceName, "", "" # error?!
+            return name, parent, spaceName, "", "" # error?!
 
-    def fullName(self, isCPP=False):
-        result = ".".join([self.fullClass(), self.name])
-        return result if not isCPP else get_cname(result)
+    def fullNameOrigin(self):
+        result = self.symbol_id
+        return result
 
-    def fullClass(self, isCPP=False):
+    def fullNameJAVA(self):
+        result = '.'.join([self.fullParentNameJAVA(), self.jname])
+        return result
+
+    def fullNameCPP(self):
+        result = self.cname
+        return result
+
+    def fullParentNameJAVA(self):
         result = ".".join([f for f in [self.namespace] + self.classpath.split(".") if len(f)>0])
-        return result if not isCPP else get_cname(result)
+        return result
+
+    def fullParentNameCPP(self):
+        result = get_cname(self.parent_id)
+        return result
 
 class ConstInfo(GeneralInfo):
     def __init__(self, decl, addedManually=False, namespaces=[], enumType=None):
         GeneralInfo.__init__(self, "const", decl, namespaces)
-        self.cname = get_cname(self.name)
         self.value = decl[1]
         self.enumType = enumType
         self.addedManually = addedManually
         if self.namespace in namespaces_dict:
-            self.name = '%s_%s' % (namespaces_dict[self.namespace], self.name)
+            prefix = namespaces_dict[self.namespace]
+            if prefix:
+                self.name = '%s_%s' % (prefix, self.name)
 
     def __repr__(self):
         return Template("CONST $name=$value$manual").substitute(name=self.name,
@@ -227,7 +244,6 @@ class ClassPropInfo():
 class ClassInfo(GeneralInfo):
     def __init__(self, decl, namespaces=[]): # [ 'class/struct cname', ': base', [modlist] ]
         GeneralInfo.__init__(self, "class", decl, namespaces)
-        self.cname = get_cname(self.name)
         self.methods = []
         self.methods_suffixes = {}
         self.consts = [] # using a list to save the occurrence order
@@ -242,10 +258,30 @@ class ClassInfo(GeneralInfo):
         for m in decl[2]:
             if m.startswith("="):
                 self.jname = m[1:]
+
+        if self.classpath:
+            prefix = self.classpath.replace('.', '_')
+            self.name = '%s_%s' % (prefix, self.name)
+            self.jname = '%s_%s' % (prefix, self.jname)
+
+        if self.namespace in namespaces_dict:
+            prefix = namespaces_dict[self.namespace]
+            if prefix:
+                self.name = '%s_%s' % (prefix, self.name)
+                self.jname = '%s_%s' % (prefix, self.jname)
+
         self.base = ''
         if decl[1]:
-            #self.base = re.sub(r"\b"+self.jname+r"\b", "", decl[1].replace(":", "")).strip()
-            self.base = re.sub(r"^.*:", "", decl[1].split(",")[0]).strip().replace(self.jname, "")
+            # FIXIT Use generator to find type properly instead of hacks below
+            base_class = re.sub(r"^: ", "", decl[1])
+            base_class = re.sub(r"^cv::", "", base_class)
+            base_class = base_class.replace('::', '.')
+            base_info = ClassInfo(('class {}'.format(base_class), '', [], [], None, None), [self.namespace])
+            base_type_name = base_info.name
+            if not base_type_name in type_dict:
+                base_type_name = re.sub(r"^.*:", "", decl[1].split(",")[0]).strip().replace(self.jname, "")
+            self.base = base_type_name
+            self.addImports(self.base)
 
     def __repr__(self):
         return Template("CLASS $namespace::$classpath.$name : $base").substitute(**self.__dict__)
@@ -267,8 +303,8 @@ class ClassInfo(GeneralInfo):
 
     def getAllMethods(self):
         result = []
-        result.extend([fi for fi in sorted(self.methods) if fi.isconstructor])
-        result.extend([fi for fi in sorted(self.methods) if not fi.isconstructor])
+        result += [fi for fi in self.methods if fi.isconstructor]
+        result += [fi for fi in self.methods if not fi.isconstructor]
         return result
 
     def addMethod(self, fi):
@@ -358,11 +394,26 @@ class FuncInfo(GeneralInfo):
         self.isconstructor = self.name == self.classname
         if "[" in self.name:
             self.jname = "getelem"
-        if self.namespace in namespaces_dict:
-            self.jname = '%s_%s' % (namespaces_dict[self.namespace], self.jname)
         for m in decl[2]:
-            if m.startswith("="):
+            if m.startswith("="):  # alias from WRAP_AS
                 self.jname = m[1:]
+        if self.classpath and self.classname != self.classpath:
+            prefix = self.classpath.replace('.', '_')
+            self.classname = prefix #'%s_%s' % (prefix, self.classname)
+            if self.isconstructor:
+                self.name = prefix #'%s_%s' % (prefix, self.name)
+                self.jname = prefix #'%s_%s' % (prefix, self.jname)
+
+        if self.namespace in namespaces_dict:
+            prefix = namespaces_dict[self.namespace]
+            if prefix:
+                if self.classname:
+                    self.classname = '%s_%s' % (prefix, self.classname)
+                    if self.isconstructor:
+                        self.jname = '%s_%s' % (prefix, self.jname)
+                else:
+                    self.jname = '%s_%s' % (prefix, self.jname)
+
         self.static = ["","static"][ "/S" in decl[2] ]
         self.ctype = re.sub(r"^CvTermCriteria", "TermCriteria", decl[1] or "")
         self.args = []
@@ -374,6 +425,12 @@ class FuncInfo(GeneralInfo):
             arg[3] = arg_fix_map.get('attrib', arg[3]) #fixing arg attrib
             self.args.append(ArgInfo(arg))
 
+    def fullClassJAVA(self):
+        return self.fullParentNameJAVA()
+
+    def fullClassCPP(self):
+        return self.fullParentNameCPP()
+
     def __repr__(self):
         return Template("FUNC <$ctype $namespace.$classpath.$name $args>").substitute(**self.__dict__)
 
@@ -387,8 +444,9 @@ class JavaWrapperGenerator(object):
         self.clear()
 
     def clear(self):
-        self.namespaces = set(["cv"])
-        self.classes = { "Mat" : ClassInfo([ 'class Mat', '', [], [] ], self.namespaces) }
+        self.namespaces = ["cv"]
+        classinfo_Mat = ClassInfo([ 'class cv.Mat', '', [], [] ], self.namespaces)
+        self.classes = { "Mat" : classinfo_Mat }
         self.module = ""
         self.Module = ""
         self.ported_func_list = []
@@ -411,7 +469,7 @@ class JavaWrapperGenerator(object):
         type_dict.setdefault(name, {}).update(
             { "j_type" : classinfo.jname,
               "jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),),
-              "jni_name" : "(*("+classinfo.fullName(isCPP=True)+"*)%(n)s_nativeObj)", "jni_type" : "jlong",
+              "jni_name" : "(*("+classinfo.fullNameCPP()+"*)%(n)s_nativeObj)", "jni_type" : "jlong",
               "suffix" : "J",
               "j_import" : "org.opencv.%s.%s" % (self.module, classinfo.jname)
             }
@@ -419,7 +477,7 @@ class JavaWrapperGenerator(object):
         type_dict.setdefault(name+'*', {}).update(
             { "j_type" : classinfo.jname,
               "jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),),
-              "jni_name" : "("+classinfo.fullName(isCPP=True)+"*)%(n)s_nativeObj", "jni_type" : "jlong",
+              "jni_name" : "("+classinfo.fullNameCPP()+"*)%(n)s_nativeObj", "jni_type" : "jlong",
               "suffix" : "J",
               "j_import" : "org.opencv.%s.%s" % (self.module, classinfo.jname)
             }
@@ -446,7 +504,7 @@ class JavaWrapperGenerator(object):
         type_dict.setdefault("Ptr_"+name, {}).update(
             { "j_type" : classinfo.jname,
               "jn_type" : "long", "jn_args" : (("__int64", ".getNativeObjAddr()"),),
-              "jni_name" : "*((Ptr<"+classinfo.fullName(isCPP=True)+">*)%(n)s_nativeObj)", "jni_type" : "jlong",
+              "jni_name" : "*((Ptr<"+classinfo.fullNameCPP()+">*)%(n)s_nativeObj)", "jni_type" : "jlong",
               "suffix" : "J",
               "j_import" : "org.opencv.%s.%s" % (self.module, classinfo.jname)
             }
@@ -489,14 +547,15 @@ class JavaWrapperGenerator(object):
     def add_func(self, decl):
         fi = FuncInfo(decl, namespaces=self.namespaces)
         classname = fi.classname or self.Module
+        class_symbol_id = classname if self.isWrapped(classname) else fi.classpath.replace('.', '_') #('.'.join([fi.namespace, fi.classpath])[3:])
         if classname in class_ignore_list:
             logging.info('ignored: %s', fi)
         elif classname in ManualFuncs and fi.jname in ManualFuncs[classname]:
             logging.info('manual: %s', fi)
-        elif not self.isWrapped(classname):
+        elif not self.isWrapped(class_symbol_id):
             logging.warning('not found: %s', fi)
         else:
-            self.getClass(classname).addMethod(fi)
+            self.getClass(class_symbol_id).addMethod(fi)
             logging.info('ok: %s', fi)
             # calc args with def val
             cnt = len([a for a in fi.args if a.defval])
@@ -521,7 +580,7 @@ class JavaWrapperGenerator(object):
         # TODO: support UMat versions of declarations (implement UMat-wrapper for Java)
         parser = hdr_parser.CppHeaderParser(generate_umat_decls=False)
 
-        self.add_class( ['class ' + self.Module, '', [], []] ) # [ 'class/struct cname', ':bases', [modlist] [props] ]
+        self.add_class( ['class cv.' + self.Module, '', [], []] ) # [ 'class/struct cname', ':bases', [modlist] [props] ]
 
         # scan the headers and build more descriptive maps of classes, consts, functions
         includes = []
@@ -530,9 +589,9 @@ class JavaWrapperGenerator(object):
             includes.append('#include "' + hdr + '"')
         for hdr in srcfiles:
             decls = parser.parse(hdr)
-            self.namespaces = parser.namespaces
+            self.namespaces = sorted(parser.namespaces)
             logging.info("\n\n===== Header: %s =====", hdr)
-            logging.info("Namespaces: %s", parser.namespaces)
+            logging.info("Namespaces: %s", sorted(parser.namespaces))
             if decls:
                 includes.append('#include "' + hdr + '"')
             else:
@@ -554,7 +613,7 @@ class JavaWrapperGenerator(object):
         moduleCppCode = StringIO()
         package_path = os.path.join(output_java_path, module)
         mkdir_p(package_path)
-        for ci in self.classes.values():
+        for ci in sorted(self.classes.values(), key=lambda x: x.symbol_id):
             if ci.name == "Mat":
                 continue
             ci.initCodeStreams(self.Module)
@@ -578,13 +637,13 @@ class JavaWrapperGenerator(object):
         report.write("\n".join(self.ported_func_list))
         report.write("\n\nSKIPPED FUNCs LIST (%i of %i):\n\n" % (len(self.skipped_func_list), total_count))
         report.write("".join(self.skipped_func_list))
-        for i in self.def_args_hist.keys():
+        for i in sorted(self.def_args_hist.keys()):
             report.write("\n%i def args - %i funcs" % (i, self.def_args_hist[i]))
         return report.getvalue()
 
-    def fullTypeName(self, t):
+    def fullTypeNameCPP(self, t):
         if self.isWrapped(t):
-            return self.getClass(t).fullName(isCPP=True)
+            return self.getClass(t).fullNameCPP()
         else:
             return cast_from(t)
 
@@ -897,7 +956,7 @@ class JavaWrapperGenerator(object):
                 default = ""
             elif not fi.ctype: # c-tor
                 if self.isSmartClass(ci):
-                    ret = "return (jlong)(new Ptr<%(ctype)s>(_retval_));" % { 'ctype': fi.fullClass(isCPP=True) }
+                    ret = "return (jlong)(new Ptr<%(ctype)s>(_retval_));" % { 'ctype': fi.fullClassCPP() }
                 else:
                     ret = "return (jlong) _retval_;"
             elif "v_type" in type_dict[fi.ctype]: # c-tor
@@ -907,9 +966,9 @@ class JavaWrapperGenerator(object):
                 ret = "return env->NewStringUTF(_retval_.c_str());"
                 default = 'return env->NewStringUTF("");'
             elif self.isWrapped(fi.ctype): # wrapped class:
-                ret = "return (jlong) new %s(_retval_);" % self.fullTypeName(fi.ctype)
+                ret = "return (jlong) new %s(_retval_);" % self.fullTypeNameCPP(fi.ctype)
             elif fi.ctype.startswith('Ptr_'):
-                c_prologue.append("typedef Ptr<%s> %s;" % (self.fullTypeName(fi.ctype[4:]), fi.ctype))
+                c_prologue.append("typedef Ptr<%s> %s;" % (self.fullTypeNameCPP(fi.ctype[4:]), fi.ctype))
                 ret = "return (jlong)(new %(ctype)s(_retval_));" % { 'ctype':fi.ctype }
             elif self.isWrapped(ret_type): # pointer to wrapped class:
                 ret = "return (jlong) _retval_;"
@@ -924,12 +983,12 @@ class JavaWrapperGenerator(object):
                 else:
                     name = prop_name + ";//"
 
-            cvname = fi.fullName(isCPP=True)
-            retval = self.fullTypeName(fi.ctype) + " _retval_ = " if ret else "return "
+            cvname = fi.fullNameCPP()
+            retval = self.fullTypeNameCPP(fi.ctype) + " _retval_ = " if ret else "return "
             if fi.ctype == "void":
                 retval = ""
             elif fi.ctype == "String":
-                retval = "cv::" + self.fullTypeName(fi.ctype) + " _retval_ = "
+                retval = "cv::" + self.fullTypeNameCPP(fi.ctype) + " _retval_ = "
             elif fi.ctype == "string":
                 retval = "std::string _retval_ = "
             elif "v_type" in type_dict[fi.ctype]: # vector is returned
@@ -945,18 +1004,18 @@ class JavaWrapperGenerator(object):
             if fi.classname:
                 if not fi.ctype: # c-tor
                     if self.isSmartClass(ci):
-                        retval = self.smartWrap(ci, fi.fullClass(isCPP=True)) + " _retval_ = "
-                        cvname = "makePtr<" + fi.fullClass(isCPP=True) +">"
+                        retval = self.smartWrap(ci, fi.fullClassCPP()) + " _retval_ = "
+                        cvname = "makePtr<" + fi.fullClassCPP() +">"
                     else:
-                        retval = fi.fullClass(isCPP=True) + "* _retval_ = "
-                        cvname = "new " + fi.fullClass(isCPP=True)
+                        retval = fi.fullClassCPP() + "* _retval_ = "
+                        cvname = "new " + fi.fullClassCPP()
                 elif fi.static:
-                    cvname = fi.fullName(isCPP=True)
+                    cvname = fi.fullNameCPP()
                 else:
                     cvname = ("me->" if  not self.isSmartClass(ci) else "(*me)->") + name
                     c_prologue.append(
                         "%(cls)s* me = (%(cls)s*) self; //TODO: check for NULL"
-                            % { "cls" : self.smartWrap(ci, fi.fullClass(isCPP=True))}
+                            % { "cls" : self.smartWrap(ci, fi.fullClassCPP())}
                     )
             cvargs = []
             for a in args:
@@ -981,13 +1040,12 @@ class JavaWrapperGenerator(object):
             clazz = ci.jname
             cpp_code.write ( Template(
 """
-${namespace}
-
 JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname ($argst);
 
 JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname
   ($args)
 {
+    ${namespace}
     static const char method_name[] = "$module::$fname()";
     try {
         LOGD("%s", method_name);$prologue
@@ -1014,7 +1072,7 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname
         cvargs = " " + ", ".join(cvargs) + " " if cvargs else "",
         default = "\n    " + default if default else "",
         retval = retval,
-        namespace = ('using namespace ' + ci.namespace.replace('.', '::') + ';') if ci.namespace else ''
+        namespace = ('using namespace ' + ci.namespace.replace('.', '::') + ';') if ci.namespace and ci.namespace != 'cv' else ''
     ) )
 
             # adding method signature to dictionary
@@ -1048,10 +1106,11 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname
         if ci.consts:
             enumTypes = set(map(lambda c: c.enumType, ci.consts))
             grouped_consts = {enumType: [c for c in ci.consts if c.enumType == enumType] for enumType in enumTypes}
-            for typeName, consts in grouped_consts.items():
+            for typeName in sorted(grouped_consts.keys(), key=lambda x: str(x) if x is not None else ""):
+                consts = grouped_consts[typeName]
                 logging.info("%s", consts)
                 if typeName:
-                    typeName = typeName.rsplit(".", 1)[-1]
+                    typeNameShort = typeName.rsplit(".", 1)[-1]
 ###################### Utilize Java enums ######################
 #                    ci.j_code.write("""
 #    public enum {1} {{
@@ -1065,9 +1124,9 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname
 #                    )
 ################################################################
                     ci.j_code.write("""
-    // C++: enum {1}
+    // C++: enum {1} ({2})
     public static final int
-            {0};\n\n""".format((",\n"+" "*12).join(["%s = %s" % (c.name, c.value) for c in consts]), typeName)
+            {0};\n\n""".format((",\n"+" "*12).join(["%s = %s" % (c.name, c.value) for c in consts]), typeNameShort, typeName)
                     )
                 else:
                     ci.j_code.write("""
@@ -1080,22 +1139,25 @@ JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname
             self.gen_func(ci, fi)
         # props
         for pi in ci.props:
+            basename = ci.fullNameOrigin()
             # getter
-            getter_name = ci.fullName() + ".get_" + pi.name
+            getter_name = basename + ".get_" + pi.name
             fi = FuncInfo( [getter_name, pi.ctype, [], []], self.namespaces ) # [ funcname, return_ctype, [modifiers], [args] ]
             self.gen_func(ci, fi, pi.name)
             if pi.rw:
                 #setter
-                setter_name = ci.fullName() + ".set_" + pi.name
+                setter_name = basename + ".set_" + pi.name
                 fi = FuncInfo( [ setter_name, "void", [], [ [pi.ctype, pi.name, "", [], ""] ] ], self.namespaces)
                 self.gen_func(ci, fi, pi.name)
 
         # manual ports
         if ci.name in ManualFuncs:
-            for func in ManualFuncs[ci.name].keys():
-                ci.j_code.write ( "\n".join(ManualFuncs[ci.name][func]["j_code"]) )
-                ci.jn_code.write( "\n".join(ManualFuncs[ci.name][func]["jn_code"]) )
-                ci.cpp_code.write( "\n".join(ManualFuncs[ci.name][func]["cpp_code"]) )
+            for func in sorted(ManualFuncs[ci.name].keys()):
+                logging.info("manual function: %s", func)
+                fn = ManualFuncs[ci.name][func]
+                ci.j_code.write("\n".join(fn["j_code"]))
+                ci.jn_code.write("\n".join(fn["jn_code"]))
+                ci.cpp_code.write("\n".join(fn["cpp_code"]))
 
         if ci.name != self.Module or ci.base:
             # finalize()
@@ -1128,7 +1190,7 @@ JNIEXPORT void JNICALL Java_org_opencv_%(module)s_%(j_cls)s_delete
     delete (%(cls)s*) self;
 }
 
-""" % {"module" : module.replace('_', '_1'), "cls" : self.smartWrap(ci, ci.fullName(isCPP=True)), "j_cls" : ci.jname.replace('_', '_1')}
+""" % {"module" : module.replace('_', '_1'), "cls" : self.smartWrap(ci, ci.fullNameCPP()), "j_cls" : ci.jname.replace('_', '_1')}
             )
 
     def getClass(self, classname):
@@ -1323,7 +1385,7 @@ if __name__ == "__main__":
     # initialize logger
     logging.basicConfig(filename='gen_java.log', format=None, filemode='w', level=logging.INFO)
     handler = logging.StreamHandler()
-    handler.setLevel(logging.WARNING)
+    handler.setLevel(os.environ.get('LOG_LEVEL', logging.WARNING))
     logging.getLogger().addHandler(handler)
 
     # parse command line parameters
diff --git a/modules/js/CMakeLists.txt b/modules/js/CMakeLists.txt
index cefc1bd9fa..c905c7bd5c 100644
--- a/modules/js/CMakeLists.txt
+++ b/modules/js/CMakeLists.txt
@@ -180,3 +180,22 @@ endforeach()
 
 add_custom_target(${PROJECT_NAME}_perf ALL
                   DEPENDS ${OCV_JS_PATH} ${opencv_perf_js_file_deps})
+
+#loader
+set(opencv_loader_js_bin_dir "${EXECUTABLE_OUTPUT_PATH}")
+set(loader_dir ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+set(opencv_loader_js_file_deps "")
+
+# make sure the build directory exists
+file(MAKE_DIRECTORY "${opencv_loader_js_bin_dir}")
+
+add_custom_command(
+        TARGET ${PROJECT_NAME} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+                ${loader_dir}/loader.js
+                ${opencv_loader_js_bin_dir}/loader.js)
+list(APPEND opencv_loader_js_file_deps "${loader_dir}/loader.js" "${opencv_loader_js_bin_dir}/loader.js")
+
+add_custom_target(${PROJECT_NAME}_loader ALL
+                  DEPENDS ${OCV_JS_PATH} ${opencv_loader_js_file_deps})
\ No newline at end of file
diff --git a/modules/js/perf/base.js b/modules/js/perf/base.js
index 6c2e772e30..3948f21254 100644
--- a/modules/js/perf/base.js
+++ b/modules/js/perf/base.js
@@ -2,17 +2,28 @@ if (typeof window === 'undefined') {
   var cv = require("../opencv");
 }
 
-const cvSize = {
-  szODD: new cv.Size(127, 61),
-  szQVGA: new cv.Size(320, 240),
-  szVGA: new cv.Size(640, 480),
-  szqHD: new cv.Size(960, 540),
-  sz720p: new cv.Size(1280, 720),
-  sz1080p: new cv.Size(1920, 1080),
-  sz130x60: new cv.Size(130, 60),
-  sz213x120: new cv.Size(120 * 1280 / 720, 120),
+let gCvSize;
+
+function getCvSize() {
+  if (gCvSize === undefined) {
+    gCvSize = {
+      szODD: new cv.Size(127, 61),
+      szQVGA: new cv.Size(320, 240),
+      szVGA: new cv.Size(640, 480),
+      szSVGA: new cv.Size(800, 600),
+      szqHD: new cv.Size(960, 540),
+      szXGA: new cv.Size(1024, 768),
+      sz720p: new cv.Size(1280, 720),
+      szSXGA: new cv.Size(1280, 1024),
+      sz1080p: new cv.Size(1920, 1080),
+      sz130x60: new cv.Size(130, 60),
+      sz213x120: new cv.Size(120 * 1280 / 720, 120),
+    };
+  }
+
+  return gCvSize;
 }
 
 if (typeof window === 'undefined') {
-  exports.cvSize = cvSize;
+  exports.getCvSize = getCvSize;
 }
\ No newline at end of file
diff --git a/modules/js/perf/perf_64bits.html b/modules/js/perf/perf_64bits.html
new file mode 100644
index 0000000000..efbe808fbd
--- /dev/null
+++ b/modules/js/perf/perf_64bits.html
@@ -0,0 +1,67 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Functions for 64-bit Perf</h4>
+              <h7>CountnonZero, Mat::dot, Split, Merge</h7>
+          </div>
+          <div>
+            <h4>Mat Shape</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (1000x1000)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../opencv.js" type="text/javascript"></script>
+    <script src="./perf_64bits.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_64bits.js b/modules/js/perf/perf_64bits.js
new file mode 100644
index 0000000000..dc4e234d4c
--- /dev/null
+++ b/modules/js/perf/perf_64bits.js
@@ -0,0 +1,180 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if (isNodeJs) {
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+  console.log('opencv.js loaded');
+  if (isNodeJs) {
+    global.cv = cv;
+  } else {
+    runButton.removeAttribute('disabled');
+    runButton.setAttribute('class', 'btn btn-primary');
+    runButton.innerHTML = 'Run';
+  }
+  let totalCaseNum, currentCaseId;
+
+
+  function addCountNonZeroCase(suite) {
+    suite.add('countNonZero', function() {
+      cv.countNonZero(mat);
+    }, {
+      'setup': function() {
+        let size = this.params.size;
+        let mat = cv.Mat.eye(size[0], size[1], cv.CV_64F);
+      }, 'teardown': function() {
+        mat.delete();
+      }
+    });
+  }
+
+  function addMatDotCase(suite) {
+    suite.add('Mat::dot', function() {
+      mat.dot(matT);
+    }, {
+      'setup': function() {
+        let size = this.params.size;
+        let mat = cv.Mat.ones(size[0], size[1], cv.CV_64FC1);
+        let matT = mat.t();
+      }, 'teardown': function() {
+        mat.delete();
+        matT.delete();
+      }
+    });
+  }
+
+  function addSplitCase(suite) {
+    suite.add('Split', function() {
+      cv.split(mat, planes);
+    }, {
+      'setup': function() {
+        let size = this.params.size;
+        let mat = cv.Mat.ones(size[0], size[1], cv.CV_64FC3);
+        let planes = new cv.MatVector();
+      }, 'teardown': function() {
+        mat.delete();
+        planes.delete();
+      }
+    });
+  }
+
+  function addMergeCase(suite) {
+    suite.add('Merge', function() {
+      cv.merge(planes, mat);
+    }, {
+      'setup': function() {
+        let size = this.params.size;
+        let mat = new cv.Mat();
+        let mat1 = cv.Mat.ones(size[0], size[1], cv.CV_64FC3);
+        let planes = new cv.MatVector();
+        cv.split(mat1, planes);
+      }, 'teardown': function() {
+        mat.delete();
+        mat1.delete();
+        planes.delete();
+      }
+    });
+  }
+
+  function setInitParams(suite, sizeArray) {
+    for( let i =0; i < suite.length; i++) {
+      suite[i].params = {
+        size: sizeArray
+      };
+    }
+  }
+
+  function log(message) {
+    console.log(message);
+    if (!isNodeJs) {
+      logElement.innerHTML += `\n${'\t' + message}`;
+    }
+  }
+
+  function setBenchmarkSuite(suite) {
+    suite
+    // add listeners
+    .on('cycle', function(event) {
+      ++currentCaseId;
+      let size = event.target.params.size;
+      log(`=== ${event.target.name} ${currentCaseId} ===`);
+      log(`params: (${parseInt(size[0])}x${parseInt(size[1])})`);
+      log('elapsed time:' +String(event.target.times.elapsed*1000)+' ms');
+      log('mean time:' +String(event.target.stats.mean*1000)+' ms');
+      log('stddev time:' +String(event.target.stats.deviation*1000)+' ms');
+      log(String(event.target));
+    })
+    .on('error', function(event) { log(`test case ${event.target.name} failed`); })
+    .on('complete', function(event) {
+      log(`\n ###################################`)
+      log(`Finished testing ${event.currentTarget.length} cases \n`);
+      if (!isNodeJs) {
+        runButton.removeAttribute('disabled');
+        runButton.setAttribute('class', 'btn btn-primary');
+        runButton.innerHTML = 'Run';
+      }
+    });
+  }
+
+  function genBenchmarkCase(paramsContent) {
+    let suite = new Benchmark.Suite;
+    var sizeArray;
+    totalCaseNum = 4;
+    currentCaseId = 0;
+    if (/\([0-9]+x[0-9]+\)/g.test(paramsContent.toString())) {
+      let params = paramsContent.toString().match(/\([0-9]+x[0-9]+\)/g)[0];
+      let sizeStrs = (params.match(/[0-9]+/g) || []).slice(0, 2).toString().split(",");
+      sizeArray = sizeStrs.map(Number);
+    } else {
+      log("no getting invalid params, run all the cases with Mat of shape (1000 x 1000)");
+      sizeArray = [1000, 1000];
+    }
+    addCountNonZeroCase(suite);
+    addMatDotCase(suite);
+    addSplitCase(suite);
+    addMergeCase(suite);
+    setInitParams(suite, sizeArray)
+    setBenchmarkSuite(suite);
+    log(`Running ${totalCaseNum} tests from 64-bit intrinsics`);
+    suite.run({ 'async': true }); // run the benchmark
+  }
+
+
+  // set test filter params
+  if (isNodeJs) {
+    const args = process.argv.slice(2);
+    let paramsContent = '';
+    if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*\w+\)/g.test(args.toString())) {
+      paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+\)/g)[0];
+    }
+    genBenchmarkCase(paramsContent);
+  } else {
+    runButton.onclick = function()　{
+      let paramsContent = paramsElement.value;
+      genBenchmarkCase(paramsContent);
+      if (totalCaseNum !== 0) {
+        runButton.setAttribute("disabled", "disabled");
+        runButton.setAttribute('class', 'btn btn-primary disabled');
+        runButton.innerHTML = "Running";
+      }
+    }
+  }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_helpfunc.js b/modules/js/perf/perf_helpfunc.js
index e07e3a297e..e42f4ad807 100644
--- a/modules/js/perf/perf_helpfunc.js
+++ b/modules/js/perf/perf_helpfunc.js
@@ -16,14 +16,57 @@ var fillGradient = function(cv, img, delta=5) {
   }
 }
 
+var smoothBorder = function(cv, img, color, delta=5) {
+  let ch = img.channels();
+  console.assert(!img.empty() && img.depth() == cv.CV_8U && ch <= 4);
+
+  let n = 100/delta;
+  let nR = Math.min(n, (img.rows+1)/2);
+  let nC = Math.min(n, (img.cols+1)/2);
+  let s = new cv.Scalar();
+
+  for (let r = 0; r < nR; r++) {
+    let k1 = r*delta/100.0, k2 = 1-k1;
+    for(let c = 0; c < img.cols; c++) {
+      let view = img.ptr(r, c);
+      for(let i = 0; i < ch; i++) s[i] = view[i];
+      for(let i = 0; i < ch; i++) view[i] = s[i]*k1 + color[i] * k2;
+    }
+    for(let c=0; c < img.cols; c++) {
+      let view = img.ptr(img.rows-r-1, c);
+      for(let i = 0; i < ch; i++) s[i] = view[i];
+      for(let i = 0; i < ch; i++) view[i] = s[i]*k1 + color[i] * k2;
+    }
+  }
+  for (let r = 0; r < img.rows; r++) {
+    for(let c = 0; c < nC; c++) {
+      let k1 = c*delta/100.0, k2 = 1-k1;
+      let view = img.ptr(r, c);
+      for(let i = 0; i < ch; i++) s[i] = view[i];
+      for(let i = 0; i < ch; i++) view[i] = s[i]*k1 + color[i] * k2;
+    }
+    for(let c = 0; c < n; c++) {
+      let k1 = c*delta/100.0, k2 = 1-k1;
+      let view = img.ptr(r, img.cols-c-1);
+      for(let i = 0; i < ch; i++) s[i] = view[i];
+      for(let i = 0; i < ch; i++) view[i] = s[i]*k1 + color[i] * k2;
+    }
+  }
+}
+
 var cvtStr2cvSize = function(strSize) {
   let size;
+
+  let cvSize = getCvSize();
   switch(strSize) {
     case "127,61": size = cvSize.szODD;break;
     case '320,240': size = cvSize.szQVGA;break;
     case '640,480': size = cvSize.szVGA;break;
+    case '800,600': size = cvSize.szSVGA;break;
     case '960,540': size = cvSize.szqHD;break;
+    case '1024,768': size = cvSize.szXGA;break;
     case '1280,720': size = cvSize.sz720p;break;
+    case '1280,1024': size = cvSize.szSXGA;break;
     case '1920,1080': size = cvSize.sz1080p;break;
     case "130,60": size = cvSize.sz130x60;break;
     case '213,120': size = cvSize.sz213x120;break;
@@ -52,8 +95,209 @@ function permute (source, target) {
   return result;
 }
 
+var constructMode = function (startStr, sChannel, dChannel) {
+  let modeList = []
+  for (let j in dChannel) {
+    modeList.push(startStr+sChannel+"2"+dChannel[j])
+  }
+  return modeList;
+}
+
+var enableButton = function () {
+  runButton.removeAttribute('disabled');
+  runButton.setAttribute('class', 'btn btn-primary');
+  runButton.innerHTML = 'Run';
+}
+
+var disableButton = function () {
+  runButton.setAttribute("disabled", "disabled");
+  runButton.setAttribute('class', 'btn btn-primary disabled');
+  runButton.innerHTML = "Running";
+}
+
+var log = function (message) {
+  console.log(message);
+  if (!isNodeJs) {
+    logElement.innerHTML += `\n${'\t' + message}`;
+  }
+}
+
+var addKernelCase = function (suite, params, type, kernelFunc) {
+  kernelFunc(suite, type);
+  let index = suite.length - 1;
+  suite[index].params = params;
+}
+
+function constructParamLog(params, kernel) {
+  let paramLog = '';
+  if (kernel == "cvtcolor") {
+    let mode = params.mode;
+    let size = params.size;
+    paramLog = `params: (${parseInt(size[0])}x${parseInt(size[1])}, ${mode})`;
+  } else if (kernel == "resize") {
+    let matType = params.matType;
+    let size1 = params.from;
+    let size2 = params.to;
+    paramLog = `params: (${matType},${parseInt(size1.width)}x${parseInt(size1.height)},`+
+    `${parseInt(size2.width)}x${parseInt(size2.height)})`;
+  } else if (kernel == "threshold") {
+    let matSize = params.matSize;
+    let matType = params.matType;
+    let threshType = params.threshType;
+    paramLog = `params: (${parseInt(matSize.width)}x${parseInt(matSize.height)},`+
+    `${matType},${threshType})`;
+  } else if (kernel == "sobel") {
+    let size = params.size;
+    let ddepth = params.ddepth;
+    let dxdy = params.dxdy;
+    let ksize = params.ksize;
+    let borderType = params.borderType;
+    paramLog = `params: (${parseInt(size[0])}x${parseInt(size[1])},`+
+    `${ddepth},${dxdy},${borderType}, ksize:${ksize})`;
+  } else if (kernel == "filter2d") {
+    let size = params.size;
+    let ksize = params.ksize;
+    let borderMode = params.borderMode;
+    paramLog = `params: (${parseInt(size.width)}x${parseInt(size.height)},`+
+    `${ksize},${borderMode})`;
+  } else if (kernel == "scharr") {
+    let size = params.size;
+    let ddepth = params.ddepth;
+    let dxdy = params.dxdy;
+    let borderType = params.borderType;
+    paramLog = `params: (${parseInt(size[0])}x${parseInt(size[1])},`+
+    `${ddepth},${dxdy},${borderType})`;
+  } else if (kernel == "gaussianBlur" || kernel == "blur") {
+    let size = params.size;
+    let matType = params.matType;
+    let borderType = params.borderType;
+    let ksize = params.ksize;
+    paramLog = `params: (${parseInt(size.width)}x${parseInt(size.height)},`+
+    `${matType},${borderType}, ksize: (${ksize}x${ksize}))`;
+  } else if (kernel == "medianBlur") {
+    let size = params.size;
+    let matType = params.matType;
+    let ksize = params.ksize;
+    paramLog = `params: (${parseInt(size.width)}x${parseInt(size.height)},`+
+    `${matType}, ksize: ${ksize})`;
+  } else if (kernel == "erode" || kernel == "dilate" || kernel == "pyrDown") {
+    let size = params.size;
+    let matType = params.matType;
+    paramLog = `params: (${parseInt(size.width)}x${parseInt(size.height)},`+
+    `${matType})`;
+  } else if (kernel == "remap") {
+    let size = params.size;
+    let matType = params.matType;
+    let mapType = params.mapType;
+    let interType = params.interType;
+    paramLog = `params: (${parseInt(size.width)}x${parseInt(size.height)},`+
+    `${matType}, ${mapType}, ${interType})`;
+  } else if (kernel == "warpAffine" || kernel == "warpPerspective") {
+    let size = params.size;
+    let interType = params.interType;
+    let borderMode = params.borderMode;
+    paramLog = `params: (${parseInt(size.width)}x${parseInt(size.height)},`+
+    `${interType}, ${borderMode})`;
+  }
+  return paramLog;
+}
+
+var setBenchmarkSuite =  function (suite, kernel, currentCaseId) {
+  suite
+  // add listeners
+  .on('cycle', function(event) {
+    ++currentCaseId;
+    let params = event.target.params;
+    paramLog = constructParamLog(params, kernel);
+
+    log(`=== ${event.target.name} ${currentCaseId} ===`);
+    log(paramLog);
+    log('elapsed time:' +String(event.target.times.elapsed*1000)+' ms');
+    log('mean time:' +String(event.target.stats.mean*1000)+' ms');
+    log('stddev time:' +String(event.target.stats.deviation*1000)+' ms');
+    log(String(event.target));
+  })
+  .on('error', function(event) { log(`test case ${event.target.name} failed`); })
+  .on('complete', function(event) {
+    log(`\n ###################################`)
+    log(`Finished testing ${event.currentTarget.length} cases \n`);
+    if (!isNodeJs) {
+      runButton.removeAttribute('disabled');
+      runButton.setAttribute('class', 'btn btn-primary');
+      runButton.innerHTML = 'Run';
+    }
+  });
+}
+
+var decodeParams2Case = function(paramContent, paramsList, combinations) {
+  let sizeString = (paramContent.match(/[0-9]+x[0-9]+/g) || []).toString();
+  let sizes = (sizeString.match(/[0-9]+/g) || []);
+  let paramSize = paramsList.length;
+  let paramObjs = []
+  let sizeCount = 0;
+  for (let i = 0; i < paramSize; i++) {
+      let param = paramsList[i];
+      let paramName = param.name;
+      let paramValue = param.value;
+      let paramReg = param.reg;
+      let paramIndex = param.index;
+
+      if(paramValue != "") {
+        paramObjs.push({name: paramName, value: paramValue, index: paramIndex});
+      } else if (paramName.startsWith('size')) {
+        let sizeStr = sizes.slice(sizeCount, sizeCount+2).toString();
+        paramValue = cvtStr2cvSize(sizeStr);
+        sizeCount += 2;
+        paramObjs.push({name: paramName, value: paramValue, index: paramIndex});
+      } else {
+        for (let index in paramReg) {
+          let reg = eval(paramReg[index]);
+          if ('loc' in param) {
+            paramValue = (paramContent.match(reg) || [])[param.loc].toString();
+          } else {
+            paramValue = (paramContent.match(reg) || []).toString();
+          }
+
+          if (paramValue != "") {
+            paramObjs.push({name: paramName, value: paramValue, index: paramIndex});
+            break;
+          }
+        }
+      }
+  }
+
+  let location = [];
+  for (let i = 0; i < combinations.length; ++i) {
+    let combination = combinations[i];
+    for (let j = 0; j < combination.length; ++j) {
+      if (judgeCombin(combination[j], paramObjs)) {
+        location.push([i,j]);
+      }
+    }
+  }
+  return location;
+}
+
+function judgeCombin(combination, paramObjs) {
+  for (let i =0; i < paramObjs.length; i++) {
+    if (paramObjs[i].value != combination[paramObjs[i].index]){
+      return false;
+    }
+  }
+  return true;
+}
+
+
 if (typeof window === 'undefined') {
+  exports.enableButton = enableButton;
+  exports.disableButton = disableButton;
   exports.fillGradient = fillGradient;
+  exports.smoothBorder = smoothBorder;
   exports.cvtStr2cvSize = cvtStr2cvSize;
   exports.combine = combine;
+  exports.constructMode = constructMode;
+  exports.log = log;
+  exports.decodeParams2Case = decodeParams2Case;
+  exports.setBenchmarkSuite = setBenchmarkSuite;
+  exports.addKernelCase = addKernelCase;
 }
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_blur.html b/modules/js/perf/perf_imgproc/perf_blur.html
new file mode 100644
index 0000000000..c6fae45db0
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_blur.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>Blur</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (1280x720, CV_8UC1, BORDER_REPLICATE)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_blur.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_blur.js b/modules/js/perf/perf_imgproc/perf_blur.js
new file mode 100644
index 0000000000..59712fb478
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_blur.js
@@ -0,0 +1,130 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const BlurSize = [cvSize.szODD, cvSize.szQVGA, cvSize.szVGA, cvSize.sz720p];
+    const Blur5x16Size = [cvSize.szVGA, cvSize.sz720p];
+    const BlurType = ["CV_8UC1", "CV_8UC4", "CV_16UC1", "CV_16SC1", "CV_32FC1"];
+    const BlurType5x5 = ["CV_8UC1", "CV_8UC4", "CV_16UC1", "CV_16SC1", "CV_32FC1", "CV_32FC3"];
+    const BorderType3x3 = ["BORDER_REPLICATE", "BORDER_CONSTANT"];
+    const BorderTypeAll = ["BORDER_REPLICATE", "BORDER_CONSTANT", "BORDER_REFLECT", "BORDER_REFLECT101"];
+
+    const combiBlur3x3 = combine(BlurSize, BlurType, BorderType3x3);
+    const combiBlur16x16 = combine(Blur5x16Size, BlurType, BorderTypeAll);
+    const combiBlur5x5 = combine(Blur5x16Size, BlurType5x5, BorderTypeAll);
+
+    function addBlurCase(suite, type) {
+        suite.add('blur', function() {
+            cv.blur(src, dst, ksize, new cv.Point(-1,-1), borderType);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let matType = cv[this.params.matType];
+                let borderType = cv[this.params.borderType];
+                let ksizeNum = this.params.ksize;
+                let ksize = new cv.Size(ksizeNum, ksizeNum);
+                let src = new cv.Mat(size, matType);
+                let dst = new cv.Mat(size, matType);
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+              }
+          });
+    }
+
+    function addBlurModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let matType = combination[i][1];
+        let borderType = combination[i][2];
+        let ksizeArray = [3, 16, 5];
+
+        let params = {size: size, matType:matType, ksize: ksizeArray[type], borderType:borderType};
+        addKernelCase(suite, params, type, addBlurCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/"], index:1});
+          paramObjs.push({name:"borderMode", value: "", reg:["/BORDER\_\\w+/"], index:2});
+          let locationList = decodeParams2Case(params, paramObjs,blurCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addBlurModeCase(suite, [blurCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addBlurModeCase(suite, combiBlur3x3, 0);
+        addBlurModeCase(suite, combiBlur16x16, 1);
+        addBlurModeCase(suite, combiBlur5x5, 2);
+      }
+      setBenchmarkSuite(suite, "blur", currentCaseId);
+      log(`Running ${totalCaseNum} tests from blur`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let blurCombinations = [combiBlur3x3, combiBlur16x16, combiBlur5x5];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_cvtcolor.js b/modules/js/perf/perf_imgproc/perf_cvtcolor.js
index 752691ef77..b5007985cc 100644
--- a/modules/js/perf/perf_imgproc/perf_cvtcolor.js
+++ b/modules/js/perf/perf_imgproc/perf_cvtcolor.js
@@ -11,17 +11,17 @@ if (isNodeJs) {
   var logElement = document.getElementById('log');
 }
 
-cv.onRuntimeInitialized = () => {
+function perf() {
+
   console.log('opencv.js loaded');
   if (isNodeJs) {
     global.cv = cv;
     global.combine = HelpFunc.combine;
     global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
-    global.cvSize = Base.cvSize;
+    global.cvSize = Base.getCvSize();
   } else {
-    runButton.removeAttribute('disabled');
-    runButton.setAttribute('class', 'btn btn-primary');
-    runButton.innerHTML = 'Run';
+    enableButton();
+    cvSize = getCvSize();
   }
   let totalCaseNum, currentCaseId;
 
@@ -73,127 +73,78 @@ cv.onRuntimeInitialized = () => {
     cv.CX_YUV2RGBA      = cv.COLOR_COLORCVT_MAX + cv.COLOR_YUV2RGB
   };
 
-  const CvtMode = [
-    "COLOR_BGR2BGR555", "COLOR_BGR2BGR565", "COLOR_BGR2BGRA", "COLOR_BGR2GRAY",
-    "COLOR_BGR2HLS", "COLOR_BGR2HLS_FULL", "COLOR_BGR2HSV", "COLOR_BGR2HSV_FULL",
-    "COLOR_BGR2Lab", "COLOR_BGR2Luv", "COLOR_BGR2RGB", "COLOR_BGR2RGBA", "COLOR_BGR2XYZ",
-    "COLOR_BGR2YCrCb", "COLOR_BGR2YUV", "COLOR_BGR5552BGR", "COLOR_BGR5552BGRA",
-
-    "COLOR_BGR5552GRAY", "COLOR_BGR5552RGB", "COLOR_BGR5552RGBA", "COLOR_BGR5652BGR",
-    "COLOR_BGR5652BGRA", "COLOR_BGR5652GRAY", "COLOR_BGR5652RGB", "COLOR_BGR5652RGBA",
-
-    "COLOR_BGRA2BGR", "COLOR_BGRA2BGR555", "COLOR_BGRA2BGR565", "COLOR_BGRA2GRAY", "COLOR_BGRA2RGBA",
-    "CX_BGRA2HLS", "CX_BGRA2HLS_FULL", "CX_BGRA2HSV", "CX_BGRA2HSV_FULL",
-    "CX_BGRA2Lab", "CX_BGRA2Luv", "CX_BGRA2XYZ",
-    "CX_BGRA2YCrCb", "CX_BGRA2YUV",
-
-    "COLOR_GRAY2BGR", "COLOR_GRAY2BGR555", "COLOR_GRAY2BGR565", "COLOR_GRAY2BGRA",
-
-    "COLOR_HLS2BGR", "COLOR_HLS2BGR_FULL", "COLOR_HLS2RGB", "COLOR_HLS2RGB_FULL",
-    "CX_HLS2BGRA", "CX_HLS2BGRA_FULL", "CX_HLS2RGBA", "CX_HLS2RGBA_FULL",
-
-    "COLOR_HSV2BGR", "COLOR_HSV2BGR_FULL", "COLOR_HSV2RGB", "COLOR_HSV2RGB_FULL",
-    "CX_HSV2BGRA", "CX_HSV2BGRA_FULL", "CX_HSV2RGBA", "CX_HSV2RGBA_FULL",
-
-    "COLOR_Lab2BGR", "COLOR_Lab2LBGR", "COLOR_Lab2LRGB", "COLOR_Lab2RGB",
-    "CX_Lab2BGRA", "CX_Lab2LBGRA", "CX_Lab2LRGBA", "CX_Lab2RGBA",
-
-    "COLOR_LBGR2Lab", "COLOR_LBGR2Luv", "COLOR_LRGB2Lab", "COLOR_LRGB2Luv",
-    "CX_LBGRA2Lab", "CX_LBGRA2Luv", "CX_LRGBA2Lab", "CX_LRGBA2Luv",
-
-    "COLOR_Luv2BGR", "COLOR_Luv2LBGR", "COLOR_Luv2LRGB", "COLOR_Luv2RGB",
-    "CX_Luv2BGRA", "CX_Luv2LBGRA", "CX_Luv2LRGBA", "CX_Luv2RGBA",
-
-    "COLOR_RGB2BGR555", "COLOR_RGB2BGR565", "COLOR_RGB2GRAY",
-    "COLOR_RGB2HLS", "COLOR_RGB2HLS_FULL", "COLOR_RGB2HSV", "COLOR_RGB2HSV_FULL",
-    "COLOR_RGB2Lab", "COLOR_RGB2Luv", "COLOR_RGB2XYZ", "COLOR_RGB2YCrCb", "COLOR_RGB2YUV",
-
-    "COLOR_RGBA2BGR", "COLOR_RGBA2BGR555", "COLOR_RGBA2BGR565", "COLOR_RGBA2GRAY",
-    "CX_RGBA2HLS", "CX_RGBA2HLS_FULL", "CX_RGBA2HSV", "CX_RGBA2HSV_FULL",
-    "CX_RGBA2Lab", "CX_RGBA2Luv", "CX_RGBA2XYZ",
-    "CX_RGBA2YCrCb", "CX_RGBA2YUV",
-
-    "COLOR_XYZ2BGR", "COLOR_XYZ2RGB", "CX_XYZ2BGRA", "CX_XYZ2RGBA",
-
-    "COLOR_YCrCb2BGR", "COLOR_YCrCb2RGB", "CX_YCrCb2BGRA", "CX_YCrCb2RGBA",
-    "COLOR_YUV2BGR", "COLOR_YUV2RGB", "CX_YUV2BGRA", "CX_YUV2RGBA"
-  ];
-  const CvtModeSize = [cvSize.szODD, cvSize.szVGA, cvSize.sz1080p];
-  const combiCvtMode = combine(CvtModeSize, CvtMode);
-
   // didn't support 16u and 32f perf tests according to
   // https://github.com/opencv/opencv/commit/4e679e1cc5b075ec006b29a58b4fe117523fba1d
-  const CvtMode16U = [
-    "COLOR_BGR2BGRA", "COLOR_BGR2GRAY",
-    "COLOR_BGR2RGB", "COLOR_BGR2RGBA", "COLOR_BGR2XYZ",
-    "COLOR_BGR2YCrCb", "COLOR_BGR2YUV",
+  function constructCvtMode16U() {
+    let cvtMode16U = [];
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "BGR", ["BGRA", "GRAY", "RGB", "RGBA", "XYZ", "YCrCb", "YUV"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "BGRA", ["BGR", "GRAY", "RGBA"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("CX_", "BGRA", ["XYZ", "YCrCb", "YUV"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "GRAY", ["BGR", "BGRA"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "RGB", ["GRAY", "XYZ", "YCrCb", "YUV"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "RGBA", ["BGR", "GRAY"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("CX_", "RGBA", ["XYZ", "YCrCb", "YUV"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "XYZ", ["BGR", "RGB"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("CX_", "XYZ", ["BGRA", "RGBA"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "YCrCb", ["BGR", "RGB"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("CX_", "YCrCb", ["BGRA", "RGBA"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("COLOR_", "YUV", ["BGR", "RGB"]));
+    cvtMode16U = cvtMode16U.concat(constructMode("CX_", "YUV", ["BGRA", "RGBA"]));
 
-    "COLOR_BGRA2BGR", "COLOR_BGRA2GRAY", "COLOR_BGRA2RGBA",
-    "CX_BGRA2XYZ",
-    "CX_BGRA2YCrCb", "CX_BGRA2YUV",
+    return cvtMode16U;
+  }
 
-    "COLOR_GRAY2BGR", "COLOR_GRAY2BGRA",
+  const CvtMode16U = constructCvtMode16U();
 
-    "COLOR_RGB2GRAY",
-    "COLOR_RGB2XYZ", "COLOR_RGB2YCrCb", "COLOR_RGB2YUV",
-
-    "COLOR_RGBA2BGR", "COLOR_RGBA2GRAY",
-    "CX_RGBA2XYZ",
-    "CX_RGBA2YCrCb", "CX_RGBA2YUV",
-
-    "COLOR_XYZ2BGR", "COLOR_XYZ2RGB", "CX_XYZ2BGRA", "CX_XYZ2RGBA",
-
-    "COLOR_YCrCb2BGR", "COLOR_YCrCb2RGB", "CX_YCrCb2BGRA", "CX_YCrCb2RGBA",
-    "COLOR_YUV2BGR", "COLOR_YUV2RGB", "CX_YUV2BGRA", "CX_YUV2RGBA"
-  ];
   const CvtMode16USize = [cvSize.szODD, cvSize.szVGA, cvSize.sz1080p];
   const combiCvtMode16U = combine(CvtMode16USize, CvtMode16U);
 
-  const CvtMode32F = [
-    "COLOR_BGR2BGRA", "COLOR_BGR2GRAY",
-    "COLOR_BGR2HLS", "COLOR_BGR2HLS_FULL", "COLOR_BGR2HSV", "COLOR_BGR2HSV_FULL",
-    "COLOR_BGR2Lab", "COLOR_BGR2Luv", "COLOR_BGR2RGB", "COLOR_BGR2RGBA", "COLOR_BGR2XYZ",
-    "COLOR_BGR2YCrCb", "COLOR_BGR2YUV",
+  function constructCvtMode32F(source) {
+    let cvtMode32F = source;
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "BGR", ["HLS", "HLS_FULL", "HSV", "HSV_FULL", "Lab", "Luv"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "BGRA", ["HLS", "HLS_FULL", "HSV", "HSV_FULL", "Lab", "Luv"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "HLS", ["BGR", "BGR_FULL", "RGB", "RGB_FULL"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "HLS", ["BGRA", "BGRA_FULL", "RGBA", "RGBA_FULL"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "HSV", ["BGR", "BGR_FULL", "RGB", "RGB_FULL"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "HSV", ["BGRA", "BGRA_FULL", "RGBA", "RGBA_FULL"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "Lab", ["BGR", "LBGR", "RGB", "LRGB"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "Lab", ["BGRA", "LBGRA", "RGBA", "LRGBA"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "Luv", ["BGR", "LBGR", "RGB", "LRGB"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "Luv", ["BGRA", "LBGRA", "RGBA", "LRGBA"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "LBGR", ["Lab", "Luv"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "LBGRA", ["Lab", "Luv"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "LRGB", ["Lab", "Luv"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "LRGBA", ["Lab", "Luv"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("COLOR_", "RGB", ["HLS", "HLS_FULL", "HSV", "HSV_FULL", "Lab", "Luv"]));
+    cvtMode32F = cvtMode32F.concat(constructMode("CX_", "RGBA", ["HLS", "HLS_FULL", "HSV", "HSV_FULL", "Lab", "Luv"]));
 
-    "COLOR_BGRA2BGR", "COLOR_BGRA2GRAY", "COLOR_BGRA2RGBA",
-    "CX_BGRA2HLS", "CX_BGRA2HLS_FULL", "CX_BGRA2HSV", "CX_BGRA2HSV_FULL",
-    "CX_BGRA2Lab", "CX_BGRA2Luv", "CX_BGRA2XYZ",
-    "CX_BGRA2YCrCb", "CX_BGRA2YUV",
+    return cvtMode32F;
+  }
 
-    "COLOR_GRAY2BGR", "COLOR_GRAY2BGRA",
+  const CvtMode32F = constructCvtMode32F(CvtMode16U);
 
-    "COLOR_HLS2BGR", "COLOR_HLS2BGR_FULL", "COLOR_HLS2RGB", "COLOR_HLS2RGB_FULL",
-    "CX_HLS2BGRA", "CX_HLS2BGRA_FULL", "CX_HLS2RGBA", "CX_HLS2RGBA_FULL",
-
-    "COLOR_HSV2BGR", "COLOR_HSV2BGR_FULL", "COLOR_HSV2RGB", "COLOR_HSV2RGB_FULL",
-    "CX_HSV2BGRA", "CX_HSV2BGRA_FULL", "CX_HSV2RGBA", "CX_HSV2RGBA_FULL",
-
-    "COLOR_Lab2BGR", "COLOR_Lab2LBGR", "COLOR_Lab2LRGB", "COLOR_Lab2RGB",
-    "CX_Lab2BGRA", "CX_Lab2LBGRA", "CX_Lab2LRGBA", "CX_Lab2RGBA",
-
-    "COLOR_LBGR2Lab", "COLOR_LBGR2Luv", "COLOR_LRGB2Lab", "COLOR_LRGB2Luv",
-    "CX_LBGRA2Lab", "CX_LBGRA2Luv", "CX_LRGBA2Lab", "CX_LRGBA2Luv",
-
-    "COLOR_Luv2BGR", "COLOR_Luv2LBGR", "COLOR_Luv2LRGB", "COLOR_Luv2RGB",
-    "CX_Luv2BGRA", "CX_Luv2LBGRA", "CX_Luv2LRGBA", "CX_Luv2RGBA",
-
-    "COLOR_RGB2GRAY",
-    "COLOR_RGB2HLS", "COLOR_RGB2HLS_FULL", "COLOR_RGB2HSV", "COLOR_RGB2HSV_FULL",
-    "COLOR_RGB2Lab", "COLOR_RGB2Luv", "COLOR_RGB2XYZ", "COLOR_RGB2YCrCb", "COLOR_RGB2YUV",
-
-    "COLOR_RGBA2BGR", "COLOR_RGBA2GRAY",
-    "CX_RGBA2HLS", "CX_RGBA2HLS_FULL", "CX_RGBA2HSV", "CX_RGBA2HSV_FULL",
-    "CX_RGBA2Lab", "CX_RGBA2Luv", "CX_RGBA2XYZ",
-    "CX_RGBA2YCrCb", "CX_RGBA2YUV",
-
-    "COLOR_XYZ2BGR", "COLOR_XYZ2RGB", "CX_XYZ2BGRA", "CX_XYZ2RGBA",
-
-    "COLOR_YCrCb2BGR", "COLOR_YCrCb2RGB", "CX_YCrCb2BGRA", "CX_YCrCb2RGBA",
-    "COLOR_YUV2BGR", "COLOR_YUV2RGB", "CX_YUV2BGRA", "CX_YUV2RGBA"
-  ];
   const CvtMode32FSize = [cvSize.szODD, cvSize.szVGA, cvSize.sz1080p];
   const combiCvtMode32F = combine(CvtMode32FSize, CvtMode32F);
 
+  function constructeCvtMode(source) {
+    let cvtMode = source
+    cvtMode = cvtMode.concat(constructMode("COLOR_", "BGR", ["BGR555", "BGR565"]));
+    cvtMode = cvtMode.concat(constructMode("COLOR_", "BGR555", ["BGR", "BGRA", "GRAY", "RGB", "RGBA"]));
+    cvtMode = cvtMode.concat(constructMode("COLOR_", "BGR565", ["BGR", "BGRA", "GRAY", "RGB", "RGBA"]));
+    cvtMode = cvtMode.concat(constructMode("COLOR_", "BGRA", ["BGR555", "BGR565"]));
+    cvtMode = cvtMode.concat(constructMode("COLOR_", "GRAY", ["BGR555", "BGR565"]));
+    cvtMode = cvtMode.concat(constructMode("COLOR_", "RGB", ["BGR555", "BGR565"]));
+    cvtMode = cvtMode.concat(constructMode("COLOR_", "RGBA", ["BGR555", "BGR565"]));
+
+    return cvtMode;
+  }
+
+  const CvtMode = constructeCvtMode(CvtMode32F);
+
+  const CvtModeSize = [cvSize.szODD, cvSize.szVGA, cvSize.sz1080p];
+  // combiCvtMode permute size and mode
+  const combiCvtMode = combine(CvtModeSize, CvtMode);
+
   const CvtModeBayer = [
     "COLOR_BayerBG2BGR", "COLOR_BayerBG2BGRA", "COLOR_BayerBG2BGR_VNG", "COLOR_BayerBG2GRAY",
     "COLOR_BayerGB2BGR", "COLOR_BayerGB2BGRA", "COLOR_BayerGB2BGR_VNG", "COLOR_BayerGB2GRAY",
@@ -357,7 +308,7 @@ cv.onRuntimeInitialized = () => {
     return [mat1Type, mat2Type];
   }
 
-  function addCvtColorCase(suite) {
+  function addCvtColorCase(suite, type) {
     suite.add('cvtColor', function() {
       cv.cvtColor(mat1, mat2, mode, 0);
       }, {
@@ -375,154 +326,22 @@ cv.onRuntimeInitialized = () => {
     });
   }
 
-  function addCvtModeCase(suite, combination) {
+  function addCvtModeCase(suite, combination, type) {
     totalCaseNum += combination.length;
     for(let i = 0; i < combination.length; ++i) {
       let size = combination[i][0];
       let mode = combination[i][1];
       let chPair = getConversionInfo(mode);
       let matType = getMatType(chPair);
-      let sizeArray = [size.width, size.height];
-
-      addCvtColorCase(suite);
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        size: sizeArray,
-        matType: matType,
-        mode: mode
-      };
-    };
-  }
-
-  function addCvtModeBayerCase(suite, combination) {
-    totalCaseNum += combination.length;
-    for(let i = 0; i < combination.length; ++i) {
-      let size = combination[i][0];
-      let mode = combination[i][1];
-      let chPair = getConversionInfo(mode);
-      let matType = getMatType(chPair);
-      let sizeArray = [size.width, size.height];
-
-      addCvtColorCase(suite);
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        size: sizeArray,
-        matType: matType,
-        mode: mode
-      };
-    };
-  }
-
-  function addCvtMode2Case(suite, combination) {
-    totalCaseNum += combination.length;
-    for(let i = 0; i < combination.length; ++i) {
-      let size = combination[i][0];
-      let mode = combination[i][1];
-      let chPair = getConversionInfo(mode);
-      let matType = getMatType(chPair);
-      let sizeArray = [size.width, size.height+size.height/2];
-
-      addCvtColorCase(suite);
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        size: sizeArray,
-        matType: matType,
-        mode: mode
-      };
-    };
-  }
-
-  function addCvtMode3Case(suite, combination) {
-    totalCaseNum += combination.length;
-    for(let i = 0; i < combination.length; ++i) {
-      let size = combination[i][0];
-      let mode = combination[i][1];
-      let chPair = getConversionInfo(mode);
-      let matType = getMatType(chPair);
-      let sizeArray = [size.width, size.height+size.height/2];
-
-      addCvtColorCase(suite);
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        size: sizeArray,
-        matType: matType,
-        mode: mode
-      };
-    };
-  }
-
-  function addEdgeAwareBayerModeCase(suite, combination) {
-    totalCaseNum += combination.length;
-    for(let i = 0; i < combination.length; ++i) {
-      let size = combination[i][0];
-      let mode = combination[i][1];
-      let chPair = getConversionInfo(mode);
-      let matType = getMatType(chPair);
-      let sizeArray = [size.width, size.height];
-
-      addCvtColorCase(suite);
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        size: sizeArray,
-        matType: matType,
-        mode: mode
-      };
-    };
-  }
-
-  function decodeParams2Case(suite, params) {
-    let sizeStr = (params.match(/[0-9]+/g) || []).slice(0, 2).toString();
-    let mode = (params.match(/CX\_[A-z]+2[A-z]+/) || params.match(/COLOR\_[A-z]+2[A-z]+/) || []).toString();
-    let size = cvtStr2cvSize(sizeStr);
-
-    // check if the params match and add case
-    for (let i = 0; i < combinations.length; ++i) {
-      let combination = combinations[i];
-      for (let j = 0; j < combination.length; ++j) {
-        if (size === combination[j][0] && mode === combination[j][1]) {
-          cvtFunc[i](suite, [combination[j]]);
-        }
+      let sizeArray;
+      if (type == 0) {
+        sizeArray = [size.width, size.height];
+      } else {
+        sizeArray = [size.width, size.height+size.height/2];
       }
-    }
-  }
-
-  function log(message) {
-    console.log(message);
-    if (!isNodeJs) {
-      logElement.innerHTML += `\n${'\t' + message}`;
-    }
-  }
-
-  function setBenchmarkSuite(suite) {
-    suite
-    // add listeners
-    .on('cycle', function(event) {
-      ++currentCaseId;
-      let params = event.target.params;
-      let mode = params.mode;
-      let size = params.size;
-      log(`=== ${event.target.name} ${currentCaseId} ===`);
-      log(`params: (${parseInt(size[0])}x${parseInt(size[1])}, ${mode})`);
-      log('elapsed time:' +String(event.target.times.elapsed*1000)+' ms');
-      log('mean time:' +String(event.target.stats.mean*1000)+' ms');
-      log('stddev time:' +String(event.target.stats.deviation*1000)+' ms');
-      log(String(event.target));
-    })
-    .on('error', function(event) { log(`test case ${event.target.name} failed`); })
-    .on('complete', function(event) {
-      log(`\n ###################################`)
-      log(`Finished testing ${event.currentTarget.length} cases \n`);
-      if (!isNodeJs) {
-        runButton.removeAttribute('disabled');
-        runButton.setAttribute('class', 'btn btn-primary');
-        runButton.innerHTML = 'Run';
-      }
-    });
+      let params = {size:sizeArray, matType: matType, mode: mode};
+      addKernelCase(suite, params, type, addCvtColorCase);
+    };
   }
 
   function genBenchmarkCase(paramsContent) {
@@ -531,23 +350,33 @@ cv.onRuntimeInitialized = () => {
     currentCaseId = 0;
     if (/\([0-9]+x[0-9]+,[\ ]*\w+\)/g.test(paramsContent.toString())) {
       let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+\)/g)[0];
-      decodeParams2Case(suite, params);
+      let paramObjs = [];
+      paramObjs.push({name:"mode", value:"", reg:["/CX\_[A-z]+2[A-z]+/", "/COLOR\_[A-z]+2[A-z]+/"], index:1});
+      paramObjs.push({name:"size", value:"", reg:[""], index:0});
+
+      let locationList = decodeParams2Case(params, paramObjs,combinations);
+      for (let i = 0; i < locationList.length; i++){
+        let first = locationList[i][0];
+        let second = locationList[i][1];
+        if (first < 2) {
+          addCvtModeCase(suite, [combinations[first][second]], 0);
+        } else {
+          addCvtModeCase(suite, [combinations[first][second]], 1);
+        }
+      }
     } else {
       log("no filter or getting invalid params, run all the cases");
-      addCvtModeCase(suite, combiCvtMode);
-      addCvtModeBayerCase(suite, combiCvtModeBayer);
-      addCvtMode2Case(suite, combiCvtMode2);
-      addCvtMode3Case(suite, combiCvtMode3);
+      addCvtModeCase(suite, combiCvtMode, 0);
+      addCvtModeCase(suite, combiCvtModeBayer, 0);
+      addCvtModeCase(suite, combiCvtMode2, 1);
+      addCvtModeCase(suite, combiCvtMode3, 1);
     }
-    setBenchmarkSuite(suite);
+    setBenchmarkSuite(suite, "cvtcolor", currentCaseId);
     log(`Running ${totalCaseNum} tests from CvtColor`);
     suite.run({ 'async': true }); // run the benchmark
   }
 
-
-
   // init
-  let cvtFunc = [addCvtModeCase, addCvtModeBayerCase, addCvtMode2Case, addCvtMode3Case];//, addEdgeAwareBayerModeCase];
   let combinations = [combiCvtMode, combiCvtModeBayer, combiCvtMode2, combiCvtMode3];//, combiEdgeAwareBayer];
 
   // set test filter params
@@ -563,10 +392,19 @@ cv.onRuntimeInitialized = () => {
       let paramsContent = paramsElement.value;
       genBenchmarkCase(paramsContent);
       if (totalCaseNum !== 0) {
-        runButton.setAttribute("disabled", "disabled");
-        runButton.setAttribute('class', 'btn btn-primary disabled');
-        runButton.innerHTML = "Running";
+        disableButton();
       }
     }
   }
-};
\ No newline at end of file
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_dilate.html b/modules/js/perf/perf_imgproc/perf_dilate.html
new file mode 100644
index 0000000000..49c61f4be3
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_dilate.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>Dilate</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (1024x768, CV_8UC1)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_dilate.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_dilate.js b/modules/js/perf/perf_imgproc/perf_dilate.js
new file mode 100644
index 0000000000..c4e14c7be2
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_dilate.js
@@ -0,0 +1,117 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const DilateSize = [cvSize.szQVGA, cvSize.szVGA, cvSize.szSVGA, cvSize.szXGA, cvSize.szSXGA];
+    const DilateType = ["CV_8UC1", "CV_8UC4"];
+    const combiDilate = combine(DilateSize, DilateType);
+
+    function addDialteCase(suite, type) {
+        suite.add('dilate', function() {
+            cv.dilate(src, dst, kernel);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let matType = cv[this.params.matType];
+                let src = new cv.Mat(size, matType);
+                let dst = new cv.Mat(size, matType);
+                let kernel = new cv.Mat();
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+                kernel.delete();
+              }
+          });
+    }
+
+    function addDilateModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let matType = combination[i][1];
+
+        let params = {size: size, matType:matType};
+        addKernelCase(suite, params, type, addDialteCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/"], index:1});
+          let locationList = decodeParams2Case(params, paramObjs, dilateCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addDilateModeCase(suite, [dilateCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addDilateModeCase(suite, combiDilate, 0);
+      }
+      setBenchmarkSuite(suite, "dilate", currentCaseId);
+      log(`Running ${totalCaseNum} tests from dilate`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let dilateCombinations = [combiDilate];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_erode.html b/modules/js/perf/perf_imgproc/perf_erode.html
new file mode 100644
index 0000000000..2db653bd7a
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_erode.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>Erode</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (1024x768, CV_8UC1)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_erode.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_erode.js b/modules/js/perf/perf_imgproc/perf_erode.js
new file mode 100644
index 0000000000..95aba6fa21
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_erode.js
@@ -0,0 +1,117 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const ErodeSize = [cvSize.szQVGA, cvSize.szVGA, cvSize.szSVGA, cvSize.szXGA, cvSize.szSXGA];
+    const ErodeType = ["CV_8UC1", "CV_8UC4"];
+    const combiErode = combine(ErodeSize, ErodeType);
+
+    function addErodeCase(suite, type) {
+        suite.add('erode', function() {
+            cv.erode(src, dst, kernel);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let matType = cv[this.params.matType];
+                let src = new cv.Mat(size, matType);
+                let dst = new cv.Mat(size, matType);
+                let kernel = new cv.Mat();
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+                kernel.delete();
+              }
+          });
+    }
+
+    function addErodeModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let matType = combination[i][1];
+
+        let params = {size: size, matType:matType};
+        addKernelCase(suite, params, type, addErodeCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/"], index:1});
+          let locationList = decodeParams2Case(params, paramObjs, erodeCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addErodeModeCase(suite, [erodeCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addErodeModeCase(suite, combiErode, 0);
+      }
+      setBenchmarkSuite(suite, "erode", currentCaseId);
+      log(`Running ${totalCaseNum} tests from erode`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let erodeCombinations = [combiErode];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_filter2D.html b/modules/js/perf/perf_imgproc/perf_filter2D.html
new file mode 100644
index 0000000000..347fa8076d
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_filter2D.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>Filter2D</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (320x240, 3, BORDER_CONSTANT)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_filter2D.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_filter2D.js b/modules/js/perf/perf_imgproc/perf_filter2D.js
new file mode 100644
index 0000000000..d92dc2b55a
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_filter2D.js
@@ -0,0 +1,127 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const Filter2dSize = [cvSize.szQVGA, cvSize.sz1080p];
+    const Filter2dKsize = ["3", "5"];
+    const Filter2dBorderMode = ["BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT_101"];
+    const DISABLED_Filter2dBorderMode = ["BORDER_CONSTANT", "BORDER_REPLICATE"];
+    const combiFilter2dCase = combine(Filter2dSize, Filter2dKsize, Filter2dBorderMode);
+    const combiDISABLEDFilter2dCase = combine(Filter2dSize, Filter2dKsize, DISABLED_Filter2dBorderMode);
+
+    function addFilter2dCase(suite, type) {
+        suite.add('filter2d', function() {
+            cv.filter2D(src, dst, cv.CV_8UC4, kernel, new cv.Point(1, 1), 0.0, borderMode);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let ksize = parseInt(this.params.ksize);
+                let borderMode = cv[this.params.borderMode];
+
+                let src = new cv.Mat(size, cv.CV_8UC4);
+                let dst = new cv.Mat(size, cv.CV_8UC4);
+                let kernelElement = [];
+                for (let i = 0; i < ksize*ksize; i++) {
+                    let randNum = Math.random();
+                    kernelElement.push(-3.0+randNum*13.0);
+                }
+                let kernel = cv.matFromArray(ksize, ksize, cv.CV_32FC1, kernelElement);
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+              }
+          });
+    }
+
+    function addFilter2dModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let ksize = combination[i][1];
+        let borderMode = combination[i][2];
+        let params = {size: size, ksize: ksize, borderMode:borderMode};
+        addKernelCase(suite, params, type, addFilter2dCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+        let suite = new Benchmark.Suite;
+        totalCaseNum = 0;
+        currentCaseId = 0;
+
+        if (/\([0-9]+x[0-9]+,[\ ]*[0-9],[\ ]*BORDER\_\w+\)/g.test(paramsContent.toString())) {
+            let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*[0-9],[\ ]*BORDER\_\w+\)/g)[0];
+            let paramObjs = [];
+            paramObjs.push({name:"size", value:"", reg:[""], index:0});
+            paramObjs.push({name:"ksize", value:"", reg:["/\\b[0-9]\\b/"], index:1});
+            paramObjs.push({name:"borderMode", value: "", reg:["/BORDER\_\\w+/"], index:2});
+            let locationList = decodeParams2Case(params, paramObjs,filter2dCombinations);
+
+            for (let i = 0; i < locationList.length; i++){
+                let first = locationList[i][0];
+                let second = locationList[i][1];
+                addFilter2dModeCase(suite, [filter2dCombinations[first][second]], 0);
+              }
+        } else {
+          log("no filter or getting invalid params, run all the cases");
+          addFilter2dModeCase(suite, combiFilter2dCase, 0);
+        }
+        setBenchmarkSuite(suite, "filter2d", currentCaseId);
+        log(`Running ${totalCaseNum} tests from Filter2d`);
+        suite.run({ 'async': true }); // run the benchmark
+    }
+
+    let filter2dCombinations = [combiFilter2dCase];//,combiDISABLEDFilter2dCase];
+
+    if (isNodeJs) {
+        const args = process.argv.slice(2);
+        let paramsContent = '';
+        if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*[0-9],[\ ]*BORDER\_\w+\)/g.test(args.toString())) {
+          paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*[0-9],[\ ]*BORDER\_\w+\)/g)[0];
+        }
+        genBenchmarkCase(paramsContent);
+      } else {
+        runButton.onclick = function()　{
+          let paramsContent = paramsElement.value;
+          genBenchmarkCase(paramsContent);
+          if (totalCaseNum !== 0) {
+            disableButton();
+          }
+        }
+      }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_gaussianBlur.html b/modules/js/perf/perf_imgproc/perf_gaussianBlur.html
new file mode 100644
index 0000000000..3f56c22f7d
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_gaussianBlur.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>gaussianBlur</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (1280x720, CV_8UC1, BORDER_REPLICATE)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_gaussianBlur.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_gaussianBlur.js b/modules/js/perf/perf_imgproc/perf_gaussianBlur.js
new file mode 100644
index 0000000000..33c5401a7e
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_gaussianBlur.js
@@ -0,0 +1,126 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const GaussianBlurSize = [cvSize.szODD, cvSize.szQVGA, cvSize.szVGA, cvSize.sz720p];
+    const GaussianBlurType = ["CV_8UC1", "CV_8UC4", "CV_16UC1", "CV_16SC1", "CV_32FC1"];
+    const BorderType3x3 = ["BORDER_REPLICATE", "BORDER_CONSTANT"];
+    const BorderType3x3ROI = ["BORDER_REPLICATE", "BORDER_CONSTANT", "BORDER_REFLECT", "BORDER_REFLECT101"];
+
+    const combiGaussianBlurBorder3x3 = combine(GaussianBlurSize, GaussianBlurType, BorderType3x3);
+    const combiGaussianBlurBorder3x3ROI = combine(GaussianBlurSize, GaussianBlurType, BorderType3x3ROI);
+
+    function addGaussianBlurCase(suite, type) {
+        suite.add('gaussianBlur', function() {
+            cv.GaussianBlur(src, dst, ksize, 1, 0, borderType);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let matType = cv[this.params.matType];
+                let borderType = cv[this.params.borderType];
+                let type = this.params.type;
+                let src = new cv.Mat(size, matType);
+                let dst = new cv.Mat(size, matType);
+                let ksizeNum = this.params.ksize;
+                let ksize = new cv.Size(ksizeNum, ksizeNum);
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+              }
+          });
+    }
+
+    function addGaussianBlurModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let matType = combination[i][1];
+        let borderType = combination[i][2];
+        let ksizeArray = [3, 5];
+        let params = {size: size, matType:matType, ksize: ksizeArray[type], borderType:borderType};
+        addKernelCase(suite, params, type, addGaussianBlurCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/"], index:1});
+          paramObjs.push({name:"borderMode", value: "", reg:["/BORDER\_\\w+/"], index:2});
+          let locationList = decodeParams2Case(params, paramObjs,gaussianBlurCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addGaussianBlurModeCase(suite, [gaussianBlurCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addGaussianBlurModeCase(suite, combiGaussianBlurBorder3x3, 0);
+        addGaussianBlurModeCase(suite, combiGaussianBlurBorder3x3ROI, 1);
+      }
+      setBenchmarkSuite(suite, "gaussianBlur", currentCaseId);
+      log(`Running ${totalCaseNum} tests from gaussianBlur`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let gaussianBlurCombinations = [combiGaussianBlurBorder3x3, combiGaussianBlurBorder3x3ROI];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_medianBlur.html b/modules/js/perf/perf_imgproc/perf_medianBlur.html
new file mode 100644
index 0000000000..6e390beec2
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_medianBlur.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>MedianBlur</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (1280x720, CV_8UC1, 3)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_medianBlur.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_medianBlur.js b/modules/js/perf/perf_imgproc/perf_medianBlur.js
new file mode 100644
index 0000000000..69b7ba3ead
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_medianBlur.js
@@ -0,0 +1,118 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const MedianBlurSize = [cvSize.szODD, cvSize.szQVGA, cvSize.szVGA, cvSize.sz720p];
+    const MedianBlurType = ["CV_8UC1", "CV_8UC4", "CV_16UC1", "CV_16SC1", "CV_32FC1"];
+    const combiMedianBlur = combine(MedianBlurSize, MedianBlurType, [3,5]);
+
+    function addMedianBlurCase(suite, type) {
+        suite.add('medianBlur', function() {
+            cv.medianBlur(src, dst, ksize);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let matType = cv[this.params.matType];
+                let ksize = this.params.ksize;
+                let src = new cv.Mat(size, matType);
+                let dst = new cv.Mat(size, matType);
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+              }
+          });
+    }
+
+    function addMedianBlurModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let matType = combination[i][1];
+        let ksize = combination[i][2];
+
+        let params = {size: size, matType:matType, ksize: ksize};
+        addKernelCase(suite, params, type, addMedianBlurCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*(3|5)\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*(3|5)\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/"], index:1});
+          paramObjs.push({name:"ksize", value: "", reg:["/\\b[0-9]\\b/"], index:2});
+          let locationList = decodeParams2Case(params, paramObjs, medianBlurCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addMedianBlurModeCase(suite, [medianBlurCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addMedianBlurModeCase(suite, combiMedianBlur, 0);
+      }
+      setBenchmarkSuite(suite, "medianBlur", currentCaseId);
+      log(`Running ${totalCaseNum} tests from medianBlur`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let medianBlurCombinations = [combiMedianBlur];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*(3|5)\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*(3|5)\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_pyrDown.html b/modules/js/perf/perf_imgproc/perf_pyrDown.html
new file mode 100644
index 0000000000..f90ac5f55e
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_pyrDown.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>pyrDown</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (1920x1080, CV_8UC3)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_pyrDown.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_pyrDown.js b/modules/js/perf/perf_imgproc/perf_pyrDown.js
new file mode 100644
index 0000000000..a98b109ade
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_pyrDown.js
@@ -0,0 +1,116 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const PyrDownSize = [cvSize.sz1080p, cvSize.sz720p, cvSize.szVGA, cvSize.szQVGA, cvSize.szODD];
+    const PyrDownType = ["CV_8UC1", "CV_8UC3", "CV_8UC4", "CV_16SC1", "CV_16SC3", "CV_16SC4", "CV_32FC1", "CV_32FC3", "CV_32FC4"];
+
+    const combiPyrDown = combine(PyrDownSize, PyrDownType);
+
+    function addPryDownCase(suite, type) {
+        suite.add('pyrDown', function() {
+            cv.pyrDown(src, dst);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let matType = cv[this.params.matType];
+                let src = new cv.Mat(size, matType);
+                let dst = new cv.Mat((size.height + 1)/2, (size.height + 1)/2, matType)
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+              }
+          });
+    }
+
+    function addPyrDownModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let matType = combination[i][1];
+
+        let params = {size: size, matType:matType};
+        addKernelCase(suite, params, type, addPryDownCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/"], index:1});
+          let locationList = decodeParams2Case(params, paramObjs, pyrDownCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addPyrDownModeCase(suite, [pyrDownCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addPyrDownModeCase(suite, combiPyrDown, 0);
+      }
+      setBenchmarkSuite(suite, "pyrDown", currentCaseId);
+      log(`Running ${totalCaseNum} tests from pyrDown`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let pyrDownCombinations = [combiPyrDown];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_remap.html b/modules/js/perf/perf_imgproc/perf_remap.html
new file mode 100644
index 0000000000..6812adb0a0
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_remap.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>Remap</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (640x480, CV_16UC1, CV_16SC2, INTER_NEAREST)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_remap.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_remap.js b/modules/js/perf/perf_imgproc/perf_remap.js
new file mode 100644
index 0000000000..fe2e5d7541
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_remap.js
@@ -0,0 +1,182 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const RemapSize = [cvSize.szVGA, cvSize.sz1080p];
+    const RemapSrcType = ["CV_16UC1", "CV_16SC1", "CV_32FC1"];
+    const RemapType = ["CV_16SC2", "CV_32FC1", "CV_32FC2"];
+    const InterType = ["INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LANCZOS4"];
+    const combiRemap = combine(RemapSize, RemapSrcType, RemapType, InterType);
+
+    function addRemapCase(suite, type) {
+        suite.add('remap', function() {
+            cv.remap(src, dst, map1, map2, interType);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let matType = cv[this.params.matType];
+                let mapType = cv[this.params.mapType];
+                let interType = cv[this.params.interType];
+
+
+                let src = new cv.Mat(size, matType);
+                let dst = new cv.Mat(size, matType);
+                let map1 = new cv.Mat(size, mapType);
+                let map2;
+                if (mapType == cv.CV_32FC1) {
+                  map2 = new cv.Mat(size, mapType);
+                } else if (interType != cv.INTER_NEAREST && mapType == cv.CV_16SC2) {
+                  map2 = new cv.Mat.zeros(size, cv.CV_16UC1);
+                } else {
+                  map2 = new cv.Mat();
+                }
+
+                for (let j = 0; j < map1.rows; j++) {
+                  for (let i = 0; i <  map1.cols; i++) {
+                    let randNum = Math.random();
+                    let view, view1;
+                    switch(matType) {
+                      case cv.CV_16UC1:
+                        view = src.ushortPtr(j,i);
+                        view[0] = Math.floor(randNum*256);
+                        break;
+                      case cv.CV_16SC1:
+                        view = src.shortPtr(j,i);
+                        view[0] = Math.floor(randNum*256);
+                        break;
+                      case cv.CV_32FC1:
+                        view = src.floatPtr(j,i);
+                        view[0] = randNum*256;
+                        break;
+                      default:
+                        console.error("Unknown conversion type 1");
+                        break;
+                    }
+
+                    switch(mapType) {
+                      case cv.CV_32FC1:
+                        view1 = map1.floatPtr(j,i);
+                        let view2 = map2.floatPtr(j,i);
+                        view1[0] = src.cols - i - 1;
+                        view2[0] = j;
+                        break;
+                      case cv.CV_32FC2:
+                        view1 = map1.floatPtr(j,i);
+                        view1[0] = src.cols - i - 1;
+                        view1[1] = j;
+                        break;
+                      case cv.CV_16SC2:
+                        view1 = map1.shortPtr(j,i);
+                        view1[0] = src.cols - i - 1;
+                        view1[1] = j;
+                        break;
+                      default:
+                        console.error("Unknown conversion type 2");
+                        break;
+                    }
+                  }
+                }
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+                map1.delete();
+                map2.delete();
+              }
+          });
+    }
+
+    function addRemapModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let matType = combination[i][1];
+        let mapType = combination[i][2];
+        let interType = combination[i][3];
+
+        let params = {size: size, matType:matType, mapType:mapType, interType:interType};
+        addKernelCase(suite, params, type, addRemapCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*CV\_\w+,[\ ]*INTER\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*CV\_\w+,[\ ]*INTER\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/"], index:1});
+          paramObjs.push({name:"mapType", value:"", reg:["/CV\_[0-9]+[FSUfsu]C[0-9]/g"], index:2, loc:1});
+          paramObjs.push({name:"interType", value: "", reg:["/INTER\_\\w+/"], index:3});
+          let locationList = decodeParams2Case(params, paramObjs, remapCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addRemapModeCase(suite, [remapCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addRemapModeCase(suite, combiRemap, 0);
+      }
+      setBenchmarkSuite(suite, "remap", currentCaseId);
+      log(`Running ${totalCaseNum} tests from remap`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let remapCombinations = [combiRemap];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*CV\_\w+,[\ ]*INTER\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*CV\_\w+,[\ ]*INTER\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_resize.js b/modules/js/perf/perf_imgproc/perf_resize.js
index 4e71db3806..3eef30f0e3 100644
--- a/modules/js/perf/perf_imgproc/perf_resize.js
+++ b/modules/js/perf/perf_imgproc/perf_resize.js
@@ -11,18 +11,17 @@ if　(isNodeJs)　{
   var logElement = document.getElementById('log');
 }
 
-cv.onRuntimeInitialized = () => {
+function perf() {
+
   console.log('opencv.js loaded');
   if (isNodeJs) {
     global.cv = cv;
     global.combine = HelpFunc.combine;
-    global.fillGradient = HelpFunc.fillGradient;
     global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
-    global.cvSize = Base.cvSize;
+    global.cvSize = Base.getCvSize();
   } else {
-    runButton.removeAttribute('disabled');
-    runButton.setAttribute('class', 'btn btn-primary');
-    runButton.innerHTML = 'Run';
+    enableButton();
+    cvSize = getCvSize();
   }
   let totalCaseNum, currentCaseId;
 
@@ -59,185 +58,80 @@ cv.onRuntimeInitialized = () => {
   const scalesAreaFast = [2];
   const combiAreaFast = combine(matTypesAreaFast, sizesAreaFast, scalesAreaFast);
 
-  function addResizeUpLinearCase(suite, combination) {
-    totalCaseNum += combination.length;
-    for (let i = 0; i < combination.length; ++i) {
-      let matType = combination[i][0];
-      let from = combination[i][1];
-      let to = combination[i][2];
-
-      suite.add('resize', function() {
-        cv.resize(src, dst, to, 0, 0, cv.INTER_LINEAR_EXACT);
-        }, {
-          'setup': function() {
-            let from = this.params.from;
-            let to = this.params.to;
-            let matType = cv[this.params.matType];
-            let src = new cv.Mat(from, matType);
-            let dst = new cv.Mat(to, matType);
-            fillGradient(cv, src);
-              },
-          'teardown': function() {
-            src.delete();
-            dst.delete();
-          }
-      });
-
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        from: from,
-        to: to,
-        matType: matType
-      };
-    }
-  }
-
-  function addResizeDownLinearCase(suite, combination) {
-    totalCaseNum += combination.length;
-    for (let i = 0; i < combination.length; ++i) {
-      let matType = combination[i][0];
-      let from = combination[i][1];
-      let to = combination[i][2];
-
-      suite.add('resize', function() {
-        cv.resize(src, dst, to, 0, 0, cv.INTER_LINEAR_EXACT);
-        }, {
-          'setup': function() {
-            let from = this.params.from;
-            let to = this.params.to;
-            let matType = cv[this.params.matType];
-            let src = new cv.Mat(from, matType);
-            let dst = new cv.Mat(to, matType);
-            fillGradient(cv, src);
-              },
-          'teardown': function() {
-            src.delete();
-            dst.delete();
-          }
-      });
-
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        from: from,
-        to: to,
-        matType: matType
-      };
-    }
-  }
-
-  function addResizeAreaFastCase(suite, combination) {
-    totalCaseNum += combination.length;
-    for (let i = 0; i < combination.length; ++i) {
-      let matType = combination[i][0];
-      let from = combination[i][1];
-      let scale = combination[i][2];
-      from.width = (Math.floor(from.width/scale))*scale;
-      from.height = (Math.floor(from.height/scale))*scale;
-      let to = {
-        width: from.width/scale,
-        height: from.height/scale};  // for params print
-
-      suite.add('resize', function() {
+  function addResizeCase(suite, type) {
+    suite.add('resize', function() {
+      if (type == "area") {
         cv.resize(src, dst, dst.size(), 0, 0, cv.INTER_AREA);
-        }, {
-          'setup': function() {
-            let from = this.params.from;
-            let scale = this.params.scale;
-            let matType = cv[this.params.matType];
-            let src = new cv.Mat(from, matType);
-            let dst = new cv.Mat(from.height/scale, from.width/scale, matType);
-              },
-          'teardown': function() {
-            src.delete();
-            dst.delete();
+      } else {
+        cv.resize(src, dst, to, 0, 0, cv.INTER_LINEAR_EXACT);
+      }
+    }, {
+        'setup': function() {
+          let from = this.params.from;
+          let to = this.params.to;
+          let matType = cv[this.params.matType];
+          let src = new cv.Mat(from, matType);
+          let type = this.params.modeType;
+          let dst;
+          if (type == "area") {
+            dst = new cv.Mat(from.height/scale, from.width/scale, matType);
+          } else {
+            dst = new cv.Mat(to, matType);
+            fillGradient(cv, src);
           }
-      });
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        from: from,
-        scale: scale,
-        matType: matType
-      };
-    }
-  }
-
-  function decodeParams2Case(suite, params) {
-    let sizeString = (params.match(/[0-9]+x[0-9]+/g) || []).slice(0, 2).toString();
-    let sizes = (sizeString.match(/[0-9]+/g) || []);
-    let size1Str = sizes.slice(0, 2).toString();
-    let size2Str = sizes.slice(2, 5).toString();
-    let matType = (params.match(/CV\_[0-9]+[A-z][A-z][0-9]/) || []).toString();
-    let size1 = cvtStr2cvSize(size1Str);
-    let size2 = cvtStr2cvSize(size2Str);
-    // check if the params match and add case
-    for (let i = 0; i < combinations.length; ++i) {
-      let combination = combinations[i];
-      for (let j = 0; j < combination.length; ++j) {
-        if (matType === combination[j][0] && size1 === combination[j][1] && size2 === combination[j][2]) {
-          resizeFunc[i](suite, [combination[j]]);
+          },
+        'teardown': function() {
+          src.delete();
+          dst.delete();
         }
-      }
-    }
-  }
-
-  function log(message) {
-    console.log(message);
-    if (!isNodeJs) {
-      logElement.innerHTML += `\n${'\t'.repeat(1) + message}`;
-    }
-  }
-
-  function setBenchmarkSuite(suite) {
-    suite
-    // add listeners
-    .on('cycle', function(event) {
-      ++currentCaseId;
-      let params = event.target.params;
-      let matType = params.matType;
-      let size1 = params.from;
-      let size2 = params.to;
-      log(`=== ${event.target.name} ${currentCaseId} ===`);
-      log(`params: (${matType},${parseInt(size1.width)}x${parseInt(size1.height)},`+
-          `${parseInt(size2.width)}x${parseInt(size2.height)})`);
-      log('elapsed time:' +String(event.target.times.elapsed*1000)+' ms');
-      log('mean time:' +String(event.target.stats.mean*1000)+' ms');
-      log('stddev time:' +String(event.target.stats.deviation*1000)+' ms');
-      log(String(event.target));
-    })
-    .on('error', function(event) { log(`test case ${event.target.name} failed`); })
-    .on('complete', function(event) {
-      log(`\n ###################################`)
-      log(`Finished testing ${event.currentTarget.length} cases \n`);
-      if (!isNodeJs) {
-        runButton.removeAttribute('disabled');
-        runButton.setAttribute('class', 'btn btn-primary');
-        runButton.innerHTML = 'Run';
-      }
     });
   }
 
+  function addResizeModeCase(suite, combination, type) {
+    totalCaseNum += combination.length;
+    for (let i = 0; i < combination.length; ++i) {
+      let matType = combination[i][0];
+      let from = combination[i][1];
+      let params;
+      if (type == "area") {
+        let scale = combination[i][2];
+        params = { from: from, scale: scale, matType: matType, modeType: type };
+      } else {
+        let to = combination[i][2];
+        params = { from: from, to: to, matType: matType, modeType: type};
+      }
+      addKernelCase(suite, params, type, addResizeCase)
+    }
+  }
+
   function genBenchmarkCase(paramsContent) {
     let suite = new Benchmark.Suite;
     totalCaseNum = 0;
     currentCaseId = 0;
     if (/\(\w+,[\ ]*[0-9]+x[0-9]+,[\ ]*[0-9]+x[0-9]+\)/g.test(paramsContent.toString())) {
       let params = paramsContent.toString().match(/\(\w+,[\ ]*[0-9]+x[0-9]+,[\ ]*[0-9]+x[0-9]+\)/g)[0];
-      decodeParams2Case(suite, params);
+      let paramObjs = [];
+      paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[A-z][A-z][0-9]/"], index:0});
+      paramObjs.push({name:"size1", value:"", reg:[""], index:1});
+      paramObjs.push({name:"size2", value:"", reg:[""], index:2});
+      let locationList = decodeParams2Case(params, paramObjs,combinations);
+
+      for (let i = 0; i < locationList.length; i++){
+        let first = locationList[i][0];
+        let second = locationList[i][1];
+        addResizeModeCase(suite, [combinations[first][second]], "linear");
+      }
     } else {
       log("no filter or getting invalid params, run all the cases");
-      addResizeUpLinearCase(suite, combiUpLinear);
-      addResizeDownLinearCase(suite, combiDownLinear);
+      addResizeModeCase(suite, combiUpLinear, "linear");
+      addResizeModeCase(suite, combiDownLinear, "linear");
     }
-    setBenchmarkSuite(suite);
+    setBenchmarkSuite(suite, "resize", currentCaseId);
     log(`Running ${totalCaseNum} tests from Resize`);
     suite.run({ 'async': true }); // run the benchmark
   }
 
   // init
-  let resizeFunc = [addResizeUpLinearCase, addResizeDownLinearCase];//, addResizeAreaFastCase];
   let combinations = [combiUpLinear, combiDownLinear];//, combiAreaFast];
 
   // set test filter params
@@ -253,10 +147,19 @@ cv.onRuntimeInitialized = () => {
       let paramsContent = paramsElement.value;
       genBenchmarkCase(paramsContent);
       if (totalCaseNum !== 0) {
-        runButton.setAttribute("disabled", "disabled");
-        runButton.setAttribute('class', 'btn btn-primary disabled');
-        runButton.innerHTML = "Running";
+        disableButton();
       }
     }
   }
-};
\ No newline at end of file
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_scharr.html b/modules/js/perf/perf_imgproc/perf_scharr.html
new file mode 100644
index 0000000000..720ca741eb
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_scharr.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>Scharr</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (640x480, CV_16SC1, (0,1), BORDER_REPLICATE)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_scharr.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_scharr.js b/modules/js/perf/perf_imgproc/perf_scharr.js
new file mode 100644
index 0000000000..a76a93078c
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_scharr.js
@@ -0,0 +1,156 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const ScharrSize = [cvSize.szODD, cvSize.szQVGA, cvSize.szVGA];
+    const Scharrdxdy = ["(1,0)", "(0,1)"];
+    const BorderType3x3 = ["BORDER_REPLICATE", "BORDER_CONSTANT"];
+    const BorderType3x3ROI = ["BORDER_DEFAULT", "BORDER_REPLICATE|BORDER_ISOLATED", "BORDER_CONSTANT|BORDER_ISOLATED"];
+
+    const combiScharrBorder3x3 = combine(ScharrSize, ["CV_16SC1", "CV_32FC1"], Scharrdxdy, BorderType3x3);
+    const combiScharrBorder3x3ROI = combine(ScharrSize, ["CV_16SC1", "CV_32FC1"], Scharrdxdy, BorderType3x3ROI);
+
+    function addScharrCase(suite, type) {
+        suite.add('scharr', function() {
+            cv.Scharr(src, dst, ddepth, dx, dy, 1, 0, borderType);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let ddepth = cv[this.params.ddepth];
+                let dxdy = this.params.dxdy;
+                let type = this.params.type;
+                let src, dst;
+                if (type == 0) {
+                  src = new cv.Mat(size[1], size[0], cv.CV_8U);
+                  dst = new cv.Mat(size[1], size[0], ddepth);
+                } else {
+                  src = new cv.Mat(size[1]+10, size[0]+10, cv.CV_8U);
+                  dst = new cv.Mat(size[1]+10, size[0]+10, ddepth);
+                  src = src.colRange(5, size[0]+5);
+                  src = src.rowRange(5, size[1]+5);
+                  dst = dst.colRange(5, size[0]+5);
+                  dst = dst.rowRange(5, size[1]+5);
+                }
+
+                let dx = parseInt(dxdy[1]);
+                let dy = parseInt(dxdy[3]);
+                let borderTypeArray = this.params.borderType;
+                let borderType;
+                if (borderTypeArray.length == 1) {
+                  borderType = cv[borderTypeArray[0]];
+                } else {
+                  borderType = cv[borderTypeArray[0]] | cv[borderTypeArray[1]];
+                }
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+              }
+          });
+    }
+
+    function addScharrModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let ddepth = combination[i][1];
+        let dxdy = combination[i][2];
+        let borderType = combination[i][3];
+        let sizeArray = [size.width, size.height];
+
+        let borderTypeArray = borderType.split("|");
+        let params = {size: sizeArray, ddepth: ddepth, dxdy: dxdy, borderType:borderTypeArray, type:type};
+        addKernelCase(suite, params, type, addScharrCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+        let suite = new Benchmark.Suite;
+        totalCaseNum = 0;
+        currentCaseId = 0;
+        let params = "";
+        let paramObjs = [];
+        paramObjs.push({name:"size", value:"", reg:[""], index:0});
+        paramObjs.push({name:"ddepth", value:"", reg:["/CV\_[0-9]+[FSUfsu]C1/g"], index:1});
+        paramObjs.push({name:"dxdy", value:"", reg:["/\\([0-2],[0-2]\\)/"], index:2});
+
+        if (/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g.test(paramsContent.toString())) {
+            params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g)[0];
+            paramObjs.push({name:"boderType", value:"", reg:["/BORDER\_\\w+/"], index:3});
+        } else if (/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g.test(paramsContent.toString())) {
+            params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g)[0];
+            paramObjs.push({name:"boderType", value:"", reg:["/BORDER\_\\w+\\|BORDER\_\\w+/"], index:3});
+        }
+
+        if (params != ""){
+            let locationList = decodeParams2Case(params, paramObjs,scharrCombinations);
+            for (let i = 0; i < locationList.length; i++){
+                let first = locationList[i][0];
+                let second = locationList[i][1];
+                addScharrModeCase(suite, [scharrCombinations[first][second]], first);
+              }
+        } else {
+          log("no filter or getting invalid params, run all the cases");
+          addScharrModeCase(suite, combiScharrBorder3x3, 0);
+          addScharrModeCase(suite, combiScharrBorder3x3ROI, 1);
+        }
+        setBenchmarkSuite(suite, "scharr", currentCaseId);
+        log(`Running ${totalCaseNum} tests from Scharr`);
+        suite.run({ 'async': true }); // run the benchmark
+    }
+
+    let scharrCombinations = [combiScharrBorder3x3, combiScharrBorder3x3ROI];
+
+    if (isNodeJs) {
+        const args = process.argv.slice(2);
+        let paramsContent = '';
+        if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g.test(args.toString())) {
+          paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g)[0];
+        } else if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g.test(args.toString())) {
+          paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g)[0];
+        }
+        genBenchmarkCase(paramsContent);
+      } else {
+        runButton.onclick = function()　{
+          let paramsContent = paramsElement.value;
+          genBenchmarkCase(paramsContent);
+          if (totalCaseNum !== 0) {
+            disableButton();
+          }
+        }
+      }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_sobel.html b/modules/js/perf/perf_imgproc/perf_sobel.html
new file mode 100644
index 0000000000..b41c940a23
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_sobel.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>Sobel</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (640x480, CV_16SC1, (0,1), BORDER_REPLICATE)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_sobel.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_sobel.js b/modules/js/perf/perf_imgproc/perf_sobel.js
new file mode 100644
index 0000000000..b7064e852a
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_sobel.js
@@ -0,0 +1,170 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const SobelSize = [cvSize.szODD, cvSize.szQVGA, cvSize.szVGA];
+    const Sobel3x3dxdy = ["(0,1)", "(1,0)", "(1,1)", "(0,2)", "(2,0)", "(2,2)"];
+    const Sobeldxdy = ["(0,1)", "(1,0)", "(1,1)", "(0,2)", "(2,0)"];
+    const BorderType3x3 = ["BORDER_REPLICATE", "BORDER_CONSTANT"];
+    const BorderType3x3ROI = ["BORDER_DEFAULT", "BORDER_REPLICATE|BORDER_ISOLATED", "BORDER_CONSTANT|BORDER_ISOLATED"];
+    const BorderType = ["BORDER_REPLICATE", "BORDER_CONSTANT", "BORDER_REFLECT", "BORDER_REFLECT101"];
+    const BorderTypeROI = ["BORDER_DEFAULT", "BORDER_REPLICATE|BORDER_ISOLATED", "BORDER_CONSTANT|BORDER_ISOLATED", "BORDER_REFLECT|BORDER_ISOLATED", "BORDER_REFLECT101|BORDER_ISOLATED"]
+
+    const combiSobelBorder3x3 = combine(SobelSize, ["CV_16SC1", "CV_32FC1"], Sobel3x3dxdy, BorderType3x3);
+    const combiSobelBorder3x3ROI = combine(SobelSize, ["CV_16SC1", "CV_32FC1"], Sobel3x3dxdy, BorderType3x3ROI);
+    const combiSobelBorder5x5 = combine(SobelSize, ["CV_16SC1", "CV_32FC1"], Sobeldxdy, BorderType);
+    const combiSobelBorder5x5ROI = combine(SobelSize, ["CV_16SC1", "CV_32FC1"], Sobeldxdy, BorderTypeROI);
+
+    function addSobelCase(suite, type) {
+        suite.add('sobel', function() {
+            cv.Sobel(src, dst, ddepth, dx, dy, ksize, 1, 0, borderType);
+          }, {
+              'setup': function() {
+                let size = this.params.size;
+                let ddepth = cv[this.params.ddepth];
+                let dxdy = this.params.dxdy;
+                let ksize = this.params.ksize;
+                let type = this.params.type;
+                let src, dst;
+                if (type %2 == 0) {
+                  src = new cv.Mat(size[1], size[0], cv.CV_8U);
+                  dst = new cv.Mat(size[1], size[0], ddepth);
+                } else {
+                  src = new cv.Mat(size[1]+10, size[0]+10, cv.CV_8U);
+                  dst = new cv.Mat(size[1]+10, size[0]+10, ddepth);
+                  src = src.colRange(5, size[0]+5);
+                  src = src.rowRange(5, size[1]+5);
+                  dst = dst.colRange(5, size[0]+5);
+                  dst = dst.rowRange(5, size[1]+5);
+                }
+
+                let dx = parseInt(dxdy[1]);
+                let dy = parseInt(dxdy[3]);
+                let borderTypeArray = this.params.borderType;
+                let borderType;
+                if (borderTypeArray.length == 1) {
+                  borderType = cv[borderTypeArray[0]];
+                } else {
+                  borderType = cv[borderTypeArray[0]] | cv[borderTypeArray[1]];
+                }
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+              }
+          });
+    }
+
+    function addSobelModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let ddepth = combination[i][1];
+        let dxdy = combination[i][2];
+        let borderType = combination[i][3];
+        let sizeArray = [size.width, size.height];
+        let ksize;
+        if (type < 2) {
+          ksize = 3;
+        } else {
+          ksize = 5;
+        }
+
+        let borderTypeArray = borderType.split("|");
+        let params = {size: sizeArray, ddepth: ddepth, dxdy: dxdy, ksize:ksize, borderType:borderTypeArray, type:type};
+        addKernelCase(suite, params, type, addSobelCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+        let suite = new Benchmark.Suite;
+        totalCaseNum = 0;
+        currentCaseId = 0;
+        let params = "";
+        let paramObjs = [];
+        paramObjs.push({name:"size", value:"", reg:[""], index:0});
+        paramObjs.push({name:"ddepth", value:"", reg:["/CV\_[0-9]+[FSUfsu]C1/g"], index:1});
+        paramObjs.push({name:"dxdy", value:"", reg:["/\\([0-2],[0-2]\\)/"], index:2});
+
+        if (/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g.test(paramsContent.toString())) {
+            params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g)[0];
+            paramObjs.push({name:"boderType", value:"", reg:["/BORDER\_\\w+/"], index:3});
+        } else if (/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g.test(paramsContent.toString())) {
+            params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g)[0];
+            paramObjs.push({name:"boderType", value:"", reg:["/BORDER\_\\w+\\|BORDER\_\\w+/"], index:3});
+        }
+
+        if (params != ""){
+            let locationList = decodeParams2Case(params, paramObjs,sobelCombinations);
+            for (let i = 0; i < locationList.length; i++){
+                let first = locationList[i][0];
+                let second = locationList[i][1];
+                addSobelModeCase(suite, [sobelCombinations[first][second]], first);
+              }
+        } else {
+          log("no filter or getting invalid params, run all the cases");
+          addSobelModeCase(suite, combiSobelBorder3x3, 0);
+          addSobelModeCase(suite, combiSobelBorder3x3ROI, 1);
+          addSobelModeCase(suite, combiSobelBorder5x5, 2);
+          addSobelModeCase(suite, combiSobelBorder5x5ROI, 3);
+        }
+        setBenchmarkSuite(suite, "sobel", currentCaseId);
+        log(`Running ${totalCaseNum} tests from Sobel`);
+        suite.run({ 'async': true }); // run the benchmark
+    }
+
+    let sobelCombinations = [combiSobelBorder3x3, combiSobelBorder3x3ROI, combiSobelBorder5x5, combiSobelBorder5x5ROI];
+
+    if (isNodeJs) {
+        const args = process.argv.slice(2);
+        let paramsContent = '';
+        if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g.test(args.toString())) {
+          paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\)/g)[0];
+        } else if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g.test(args.toString())) {
+          paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\([0-2],[0-2]\),[\ ]*\w+\|\w+\)/g)[0];
+        }
+        genBenchmarkCase(paramsContent);
+      } else {
+        runButton.onclick = function()　{
+          let paramsContent = paramsElement.value;
+          genBenchmarkCase(paramsContent);
+          if (totalCaseNum !== 0) {
+            disableButton();
+          }
+        }
+      }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_threshold.js b/modules/js/perf/perf_imgproc/perf_threshold.js
index 2616a2feaa..381ddaeade 100644
--- a/modules/js/perf/perf_imgproc/perf_threshold.js
+++ b/modules/js/perf/perf_imgproc/perf_threshold.js
@@ -11,17 +11,17 @@ if　(isNodeJs)　{
   var logElement = document.getElementById('log');
 }
 
-cv.onRuntimeInitialized = () => {
+function perf() {
+
   console.log('opencv.js loaded');
   if (isNodeJs) {
     global.cv = cv;
     global.combine = HelpFunc.combine;
     global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
-    global.cvSize = Base.cvSize;
+    global.cvSize = Base.getCvSize();
   } else {
-    runButton.removeAttribute('disabled');
-    runButton.setAttribute('class', 'btn btn-primary');
-    runButton.innerHTML = 'Run';
+    enableButton();
+    cvSize = getCvSize();
   }
   let totalCaseNum, currentCaseId;
 
@@ -32,173 +32,105 @@ cv.onRuntimeInitialized = () => {
   const combiSizeMatTypeThreshType = combine(typicalMatSizes, matTypes, threshTypes);
   const combiSizeOnly = combine(typicalMatSizes, ['CV_8UC1'], ['THRESH_BINARY|THRESH_OTSU']);
 
-  function addSizeMatTypeThreshTypeCase(suite, combination) {
-    totalCaseNum += combination.length;
-    for (let i = 0; i < combination.length; ++i) {
-      let matSize = combination[i][0];
-      let matType = combination[i][1];
-      let threshType = combination[i][2];
 
-      suite.add('threshold', function() {
-        cv.threshold(src, dst, threshold, thresholdMax, threshType);
-        }, {
-          'setup': function() {
-            let matSize = this.params.matSize;
-            let matType = cv[this.params.matType];
-            let threshType = cv[this.params.threshType];
-            let threshold = 127.0;
-            let thresholdMax = 210.0;
-            let src = new cv.Mat(matSize, matType);
-            let dst = new cv.Mat(matSize, matType);
-            let srcView = src.data;
-            srcView[0] = 0;
-            srcView[1] = 100;
-            srcView[2] = 200;
-              },
-          'teardown': function() {
-            src.delete();
-            dst.delete();
-          }
-      });
-
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        matSize: matSize,
-        matType: matType,
-        threshType: threshType
-      };
-    }
-  }
-
-  function addSizeOnlyCase(suite, combination) {
-    totalCaseNum += combination.length;
-    for (let i = 0; i < combination.length; ++i) {
-      let matSize = combination[i][0];
-
-      suite.add('threshold', function() {
+  function addThresholdCase(suite, type) {
+    suite.add('threshold', function() {
+      if (type == "sizeonly") {
         cv.threshold(src, dst, threshold, thresholdMax, cv.THRESH_BINARY|cv.THRESH_OTSU);
-        }, {
-          'setup': function() {
-            let matSize = this.params.matSize;
-            let threshold = 127.0;
-            let thresholdMax = 210.0;
-            let src = new cv.Mat(matSize, cv.CV_8UC1);
-            let dst = new cv.Mat(matSize, cv.CV_8UC1);
-            let srcView = src.data;
-            srcView[0] = 0;
-            srcView[1] = 100;
-            srcView[2] = 200;
-              },
-          'teardown': function() {
-            src.delete();
-            dst.delete();
+      } else {
+        cv.threshold(src, dst, threshold, thresholdMax, threshType);
+      }
+      }, {
+        'setup': function() {
+          let matSize = this.params.matSize;
+          let type =  this.params.modeType;
+          let src, dst, matType, threshType;
+          if (type == "sizeonly") {
+            src = new cv.Mat(matSize, cv.CV_8UC1);
+            dst = new cv.Mat(matSize, cv.CV_8UC1);
+          } else {
+            matType = cv[this.params.matType];
+            threshType = cv[this.params.threshType];
+            src = new cv.Mat(matSize, matType);
+            dst = new cv.Mat(matSize, matType);
           }
-      });
-
-      // set init params
-      let index = suite.length - 1;
-      suite[index].params = {
-        matSize: matSize,
-        matType: 'CV_8UC1',
-        threshType: 'THRESH_BINARY|THRESH_OTSU'
-      };
-    }
-  }
-
-  function decodeParams2Case(suite, params, isSizeOnly) {
-    let sizeString = params.match(/[0-9]+x[0-9]+/g).toString();
-    let sizes = sizeString.match(/[0-9]+/g);
-    let size1Str = sizes.slice(0, 2).toString();
-    let matSize = cvtStr2cvSize(size1Str);
-    let matType, threshType;
-    if (isSizeOnly) {
-      matType = 'CV_8UC1';
-      threshType = 'THRESH_BINARY|THRESH_OTSU';
-    } else {
-      matType = (params.match(/CV\_[0-9]+[A-z][A-z][0-9]/) || []).toString();
-      threshType = (params.match(/THRESH\_[A-z]+\_?[A-z]*/) || []).toString();
-    }
-    // check if the params match and add case
-    for (let i = 0; i < combinations.length; ++i) {
-      let combination = combinations[i];
-      for (let j = 0; j < combination.length; ++j) {
-        if (matSize === combination[j][0] && matType === combination[j][1] && threshType === combination[j][2]) {
-          thresholdFunc[i](suite, [combination[j]]);
+          let threshold = 127.0;
+          let thresholdMax = 210.0;
+          let srcView = src.data;
+          srcView[0] = 0;
+          srcView[1] = 100;
+          srcView[2] = 200;
+            },
+        'teardown': function() {
+          src.delete();
+          dst.delete();
         }
-      }
-    }
-  }
-
-  function log(message) {
-    console.log(message);1
-    if (!isNodeJs) {
-      logElement.innerHTML += `\n${'\t'.repeat(1) + message}`;
-    }
-  }
-
-  function setBenchmarkSuite(suite) {
-    suite
-    // add listeners
-    .on('cycle', function(event) {
-      ++currentCaseId;
-      let params = event.target.params;
-      let matSize = params.matSize;
-      let matType = params.matType;
-      let threshType = params.threshType;
-      log(`=== ${event.target.name} ${currentCaseId} ===`);
-      log(`params: (${parseInt(matSize.width)}x${parseInt(matSize.height)},`+
-          `${matType},${threshType})`);
-      log('elapsed time:' +String(event.target.times.elapsed*1000)+' ms');
-      log('mean time:' +String(event.target.stats.mean*1000)+' ms');
-      log('stddev time:' +String(event.target.stats.deviation*1000)+' ms');
-      log(String(event.target));
-    })
-    .on('error', function(event) { log(`test case ${event.target.name} failed`); })
-    .on('complete', function(event) {
-      log(`\n ###################################`)
-      log(`Finished testing ${event.currentTarget.length} cases \n`);
-      if (!isNodeJs) {
-        runButton.removeAttribute('disabled');
-        runButton.setAttribute('class', 'btn btn-primary');
-        runButton.innerHTML = 'Run';
-      }
     });
   }
 
+  function addThresholdModecase(suite, combination, type) {
+    totalCaseNum += combination.length;
+    for (let i = 0; i < combination.length; ++i) {
+      let matSize = combination[i][0];
+      let matType = 'CV_8UC1';
+      let threshType = 'THRESH_BINARY|THRESH_OTSU';
+      if (type != "sizeonly") {
+        matType = combination[i][1];
+        threshType = combination[i][2];
+      }
+      let params = {matSize: matSize, matType: matType, threshType: threshType, modeType: type};
+      addKernelCase(suite, params, type, addThresholdCase);
+    }
+  }
+
   function genBenchmarkCase(paramsContent) {
     let suite = new Benchmark.Suite;
     totalCaseNum = 0;
     currentCaseId = 0;
-    if (/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\w+\)/g.test(paramsContent.toString())) {
-      let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\w+\)/g)[0];
-      let isSizeOnly = 0;
-      decodeParams2Case(suite, params, isSizeOnly);
+    let params = "";
+    let paramObjs = [];
+    paramObjs.push({name:"size", value:"", reg:[""], index:0});
+
+    if (/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*THRESH\_\w+\)/g.test(paramsContent.toString())) {
+      params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*THRESH\_\w+\)/g)[0];
+      paramObjs.push({name:"matType", value:"", reg:["/CV\_[0-9]+[A-z][A-z][0-9]/"], index:1});
+      paramObjs.push({name:"threshType", value:"", reg:["/THRESH\_[A-z]+\_?[A-z]*/"], index:2});
     } else if (/[\ ]*[0-9]+x[0-9]+[\ ]*/g.test(paramsContent.toString())) {
-      let params = paramsContent.toString().match(/[\ ]*[0-9]+x[0-9]+[\ ]*/g)[0];
-      let isSizeOnly = 1;
-      decodeParams2Case(suite, params, isSizeOnly);
+      params = paramsContent.toString().match(/[\ ]*[0-9]+x[0-9]+[\ ]*/g)[0];
+      paramObjs.push({name:"matType", value:"CV_8UC1", reg:[""], index:1});
+      paramObjs.push({name:"threshType", value:"THRESH_BINARY|THRESH_OTSU", reg:[""], index:2});
     }
-    else {
+
+    if(params != ""){
+      let locationList = decodeParams2Case(params, paramObjs,combinations);
+      for (let i = 0; i < locationList.length; i++){
+        let first = locationList[i][0];
+        let second = locationList[i][1];
+        if (first == 0) {
+          addThresholdModecase(suite, [combinations[first][second]], "normal");
+        } else {
+          addThresholdModecase(suite, [combinations[first][second]], "sizeonly");
+        }
+      }
+    } else {
       log("no filter or getting invalid params, run all the cases");
-      addSizeMatTypeThreshTypeCase(suite, combiSizeMatTypeThreshType);
-      addSizeOnlyCase(suite, combiSizeOnly);
+      addThresholdModecase(suite, combiSizeMatTypeThreshType, "normal");
+      addThresholdModecase(suite, combiSizeOnly, "sizeonly");
     }
-    setBenchmarkSuite(suite);
+    setBenchmarkSuite(suite, "threshold", currentCaseId);
     log(`Running ${totalCaseNum} tests from Threshold`);
     suite.run({ 'async': true }); // run the benchmark
   }
 
   // init
-  let thresholdFunc = [addSizeMatTypeThreshTypeCase, addSizeOnlyCase];
   let combinations = [combiSizeMatTypeThreshType, combiSizeOnly];
 
   // set test filter params
   if (isNodeJs) {
     const args = process.argv.slice(2);
     let paramsContent = '';
-    if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\w+\)/g.test(args.toString())) {
-      paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*\w+,[\ ]*\w+\)/g)[0];
+    if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*THRESH\_\w+\)/g.test(args.toString())) {
+      paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*CV\_\w+,[\ ]*THRESH\_\w+\)/g)[0];
     } else if (/--test_param_filter=[\ ]*[0-9]+x[0-9]+[\ ]*/g.test(args.toString())) {
       paramsContent = args.toString().match(/[\ ]*[0-9]+x[0-9]+[\ ]*/g)[0];
     }
@@ -208,10 +140,19 @@ cv.onRuntimeInitialized = () => {
       let paramsContent = paramsElement.value;
       genBenchmarkCase(paramsContent);
       if (totalCaseNum !== 0) {
-        runButton.setAttribute("disabled", "disabled");
-        runButton.setAttribute('class', 'btn btn-primary disabled');
-        runButton.innerHTML = "Running";
+        disableButton();
       }
     }
   }
-};
\ No newline at end of file
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_warpAffine.html b/modules/js/perf/perf_imgproc/perf_warpAffine.html
new file mode 100644
index 0000000000..53a0fd9d67
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_warpAffine.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>warpAffine</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (640x480, INTER_NEAREST, BORDER_CONSTANT)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_warpAffine.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_warpAffine.js b/modules/js/perf/perf_imgproc/perf_warpAffine.js
new file mode 100644
index 0000000000..c63cd60e61
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_warpAffine.js
@@ -0,0 +1,130 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const WarpAffineSize = [cvSize.szVGA, cvSize.sz720p, cvSize.sz1080p];
+    const InterType = ["INTER_NEAREST", "INTER_LINEAR"];
+    const BorderMode = ["BORDER_CONSTANT", "BORDER_REPLICATE"]
+    const combiWarpAffine = combine(WarpAffineSize, InterType, BorderMode);
+
+    function addWarpAffineCase(suite, type) {
+        suite.add('warpAffine', function() {
+            cv.warpAffine(src, dst, warpMat, sz, interType, borderMode, borderColor);
+          }, {
+              'setup': function() {
+                let sz = this.params.size;
+                let interType = cv[this.params.interType];
+                let borderMode = cv[this.params.borderMode];
+                let srcSize = new cv.Size(512, 512);
+
+                let borderColor = new cv.Scalar.all(150);
+                let src = new cv.Mat(srcSize, cv.CV_8UC4);
+                let dst = new cv.Mat(sz, cv.CV_8UC4);
+                fillGradient(cv, src);
+                if (borderMode == cv.BORDER_CONSTANT) {
+                  smoothBorder(cv, src, borderMode, 1);
+                }
+
+                let point = new cv.Point(src.cols/2.0, src.rows/2.0);
+                let warpMat = cv.getRotationMatrix2D(point, 30.0, 2.2);
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+                warpMat.delete();
+              }
+          });
+    }
+
+    function addWarpAffineModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let interType = combination[i][1];
+        let borderMode = combination[i][2];
+
+        let params = {size: size, interType:interType, borderMode:borderMode};
+        addKernelCase(suite, params, type, addWarpAffineCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"interType", value: "", reg:["/INTER\_\\w+/"], index:1});
+          paramObjs.push({name:"borderMode", value: "", reg:["/BORDER\_\\w+/"], index:2});
+          let locationList = decodeParams2Case(params, paramObjs, warpAffineCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addWarpAffineModeCase(suite, [warpAffineCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addWarpAffineModeCase(suite, combiWarpAffine, 0);
+      }
+      setBenchmarkSuite(suite, "warpAffine", currentCaseId);
+      log(`Running ${totalCaseNum} tests from warpAffine`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let warpAffineCombinations = [combiWarpAffine];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_warpPerspective.html b/modules/js/perf/perf_imgproc/perf_warpPerspective.html
new file mode 100644
index 0000000000..7fc4c89ad2
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_warpPerspective.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>OpenCV.js Performance Test</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
+    <style>
+      body {
+        font-size: 13px;
+      }
+      .top-margin {
+        margin-top:10px;
+      }
+      h1, h4 {
+        margin: 24px 0 0;
+      }
+      h1 {
+        font-size: 2.0em;
+      }
+      h4 {
+        font-size: 1.2em;
+      }
+      pre {
+        font-family: 'Consolas', 'Monaco', monospace, serif;
+        font-size: 12px;
+        tab-size: 2;
+      }
+      input[type=checkbox] {
+        vertical-align: middle;
+      }
+    </style>
+  </head>
+  <body>
+    <div class="container" id="container">
+      <div class="row">
+        <div class="col-12">
+          <h1>OpenCV.js Performance Test</h1>
+          <div>
+            <h4>Modules</h4>
+              <h7>Image Processing</h7>
+          </div>
+          <div>
+            <h4>Kernels</h4>
+              <h7>warpPerspective</h7>
+          </div>
+          <div>
+            <h4>Parameters Filter</h4>
+            <input type="text" id="params" min="1" size="40" placeholder="default: run all the case"/>  for example: (640x480, INTER_NEAREST, BORDER_CONSTANT)
+          </div>
+          <div class='row labels-wrapper' id='labelitem'></div>
+          <div class="row top-margin">
+          </div>
+          <div>
+          <button type="button" id="runButton" class="btn btn-primary disabled" disabled="disabled">Loading</button>
+            (It will take several minutes)</div>
+          <div class="row top-margin">
+          </div>
+          <div>
+            <pre id="log"></pre>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/platform/1.3.5/platform.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.js"></script>
+    <script src="../../opencv.js" type="text/javascript"></script>
+    <script src="../base.js"></script>
+    <script src="../perf_helpfunc.js"></script>
+    <script src="./perf_warpPerspective.js"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/modules/js/perf/perf_imgproc/perf_warpPerspective.js b/modules/js/perf/perf_imgproc/perf_warpPerspective.js
new file mode 100644
index 0000000000..dcde2fb22c
--- /dev/null
+++ b/modules/js/perf/perf_imgproc/perf_warpPerspective.js
@@ -0,0 +1,143 @@
+const isNodeJs = (typeof window) === 'undefined'? true : false;
+
+if　(isNodeJs)　{
+  var Benchmark = require('benchmark');
+  var cv = require('../../opencv');
+  var HelpFunc = require('../perf_helpfunc');
+  var Base = require('../base');
+} else {
+  var paramsElement = document.getElementById('params');
+  var runButton = document.getElementById('runButton');
+  var logElement = document.getElementById('log');
+}
+
+function perf() {
+
+    console.log('opencv.js loaded');
+    if (isNodeJs) {
+      global.cv = cv;
+      global.combine = HelpFunc.combine;
+      global.cvtStr2cvSize = HelpFunc.cvtStr2cvSize;
+      global.cvSize = Base.getCvSize();
+    } else {
+      enableButton();
+      cvSize = getCvSize();
+    }
+    let totalCaseNum, currentCaseId;
+
+    const WarpPersSize = [cvSize.szVGA, cvSize.sz720p, cvSize.sz1080p];
+    const InterType = ["INTER_NEAREST", "INTER_LINEAR"];
+    const BorderMode = ["BORDER_CONSTANT", "BORDER_REPLICATE"]
+    const combiWarpPers = combine(WarpPersSize, InterType, BorderMode);
+
+    function addWarpPerspectiveCase(suite, type) {
+        suite.add('warpPerspective', function() {
+            cv.warpPerspective(src, dst, warpMat, sz, interType, borderMode, borderColor);
+          }, {
+              'setup': function() {
+                let sz = this.params.size;
+                let interType = cv[this.params.interType];
+                let borderMode = cv[this.params.borderMode];
+                let srcSize = new cv.Size(512, 512);
+
+                let borderColor = new cv.Scalar.all(150);
+                let src = new cv.Mat(srcSize, cv.CV_8UC4);
+                let dst = new cv.Mat(sz, cv.CV_8UC4);
+                fillGradient(cv, src);
+                if (borderMode == cv.BORDER_CONSTANT) {
+                  smoothBorder(cv, src, borderMode, 1);
+                }
+
+                let rotMat = cv.getRotationMatrix2D(new cv.Point(src.cols/2.0, src.rows/2.0), 30.0, 2.2);
+                let warpMat = new cv.Mat(3, 3, cv.CV_64FC1);
+
+                for(r=0; r<2; r++) {
+                  for(c=0; c<3; c++) {
+                    view = warpMat.doublePtr(r,c)
+                    view[0] = rotMat.doubleAt(r, c);
+                  }
+                }
+                view = warpMat.doublePtr(2,0);
+                view[0] = 0.3/sz.width;
+                view = warpMat.doublePtr(2,1);
+                view[0] = 0.3/sz.height;
+                view = warpMat.doublePtr(2,2);
+                view[0] = 1;
+                },
+              'teardown': function() {
+                src.delete();
+                dst.delete();
+                warpMat.delete();
+              }
+          });
+    }
+
+    function addWarpPerspectiveModeCase(suite, combination, type) {
+      totalCaseNum += combination.length;
+      for (let i = 0; i < combination.length; ++i) {
+        let size =  combination[i][0];
+        let interType = combination[i][1];
+        let borderMode = combination[i][2];
+
+        let params = {size: size, interType:interType, borderMode:borderMode};
+        addKernelCase(suite, params, type, addWarpPerspectiveCase);
+      }
+    }
+
+    function genBenchmarkCase(paramsContent) {
+      let suite = new Benchmark.Suite;
+      totalCaseNum = 0;
+      currentCaseId = 0;
+
+      if (/\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g.test(paramsContent.toString())) {
+          let params = paramsContent.toString().match(/\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+          let paramObjs = [];
+          paramObjs.push({name:"size", value:"", reg:[""], index:0});
+          paramObjs.push({name:"interType", value: "", reg:["/INTER\_\\w+/"], index:1});
+          paramObjs.push({name:"borderMode", value: "", reg:["/BORDER\_\\w+/"], index:2});
+          let locationList = decodeParams2Case(params, paramObjs, warpPersCombinations);
+
+          for (let i = 0; i < locationList.length; i++){
+              let first = locationList[i][0];
+              let second = locationList[i][1];
+              addWarpPerspectiveModeCase(suite, [warpPersCombinations[first][second]], first);
+            }
+      } else {
+        log("no filter or getting invalid params, run all the cases");
+        addWarpPerspectiveModeCase(suite, combiWarpPers, 0);
+      }
+      setBenchmarkSuite(suite, "warpPerspective", currentCaseId);
+      log(`Running ${totalCaseNum} tests from warpPerspective`);
+      suite.run({ 'async': true }); // run the benchmark
+  }
+
+    let warpPersCombinations = [combiWarpPers];
+
+    if (isNodeJs) {
+      const args = process.argv.slice(2);
+      let paramsContent = '';
+      if (/--test_param_filter=\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g.test(args.toString())) {
+        paramsContent = args.toString().match(/\([0-9]+x[0-9]+,[\ ]*INTER\_\w+,[\ ]*BORDER\_\w+\)/g)[0];
+      }
+      genBenchmarkCase(paramsContent);
+    } else {
+      runButton.onclick = function()　{
+        let paramsContent = paramsElement.value;
+        genBenchmarkCase(paramsContent);
+        if (totalCaseNum !== 0) {
+          disableButton();
+        }
+      }
+    }
+};
+
+async function main() {
+  if (cv instanceof Promise) {
+    cv = await cv;
+    perf();
+  } else {
+    cv.onRuntimeInitialized = perf;
+  }
+}
+
+main();
\ No newline at end of file
diff --git a/modules/js/src/loader.js b/modules/js/src/loader.js
new file mode 100644
index 0000000000..ea100e8601
--- /dev/null
+++ b/modules/js/src/loader.js
@@ -0,0 +1,96 @@
+async function loadOpenCV(paths, onloadCallback) {
+    let OPENCV_URL = "";
+    let asmPath = "";
+    let wasmPath = "";
+    let simdPath = "";
+    let threadsPath = "";
+    let threadsSimdPath = "";
+
+    if(!(paths instanceof Object)) {
+        throw new Error("The first input should be a object that points the path to the OpenCV.js");
+    }
+
+    if ("asm" in paths) {
+        asmPath = paths["asm"];
+    }
+
+    if ("wasm" in paths) {
+        wasmPath = paths["wasm"];
+    }
+
+    if ("threads" in paths) {
+        threadsPath = paths["threads"];
+    }
+
+    if ("simd" in paths) {
+        simdPath = paths["simd"];
+    }
+
+    if ("threadsSimd" in paths) {
+        threadsSimdPath = paths["threadsSimd"];
+    }
+
+    let wasmSupported = !(typeof WebAssembly === 'undefined');
+    if (!wasmSupported && OPENCV_URL === "" && asmPath != "") {
+        OPENCV_URL = asmPath;
+        console.log("The OpenCV.js for Asm.js is loaded now");
+    } else if (!wasmSupported && asmPath == ""){
+        throw new Error("The browser supports the Asm.js only, but the path of OpenCV.js for Asm.js is empty");
+    }
+
+    let simdSupported = wasmSupported ? await wasmFeatureDetect.simd() : false;
+    let threadsSupported = wasmSupported ? await wasmFeatureDetect.threads() : false;
+
+    if (simdSupported && threadsSupported && threadsSimdPath != "") {
+        OPENCV_URL = threadsSimdPath;
+        console.log("The OpenCV.js with simd and threads optimization is loaded now");
+    } else if (simdSupported && simdPath != "") {
+        if (threadsSupported && threadsSimdPath === "") {
+            console.log("The browser supports simd and threads, but the path of OpenCV.js with simd and threads optimization is empty");
+        }
+        OPENCV_URL = simdPath;
+        console.log("The OpenCV.js with simd optimization is loaded now.");
+    } else if (threadsSupported && threadsPath != "") {
+        if (simdSupported && threadsSimdPath === "") {
+            console.log("The browser supports simd and threads, but the path of OpenCV.js with simd and threads optimization is empty");
+        }
+        OPENCV_URL = threadsPath;
+        console.log("The OpenCV.js with threads optimization is loaded now");
+    } else if (wasmSupported && wasmPath != "") {
+        if(simdSupported && threadsSupported) {
+            console.log("The browser supports simd and threads, but the path of OpenCV.js with simd and threads optimization is empty");
+        }
+
+        if (simdSupported) {
+            console.log("The browser supports simd optimization, but the path of OpenCV.js with simd optimization is empty");
+        }
+
+        if (threadsSupported) {
+            console.log("The browser supports threads optimization, but the path of OpenCV.js with threads optimization is empty");
+        }
+
+        OPENCV_URL = wasmPath;
+        console.log("The OpenCV.js for wasm is loaded now");
+    } else if (wasmSupported) {
+        console.log("The browser supports wasm, but the path of OpenCV.js for wasm is empty");
+    }
+
+    if (OPENCV_URL === "") {
+        throw new Error("No available OpenCV.js, please check your paths");
+    }
+
+    let script = document.createElement('script');
+    script.setAttribute('async', '');
+    script.setAttribute('type', 'text/javascript');
+    script.addEventListener('load', () => {
+        onloadCallback();
+    });
+    script.addEventListener('error', () => {
+        console.log('Failed to load opencv.js');
+    });
+    script.src = OPENCV_URL;
+    let node = document.getElementsByTagName('script')[0];
+    if (node.src != OPENCV_URL) {
+        node.parentNode.insertBefore(script, node);
+    }
+}
\ No newline at end of file
diff --git a/modules/js/src/make_umd.py b/modules/js/src/make_umd.py
index 8e50da585d..08d9e39e13 100644
--- a/modules/js/src/make_umd.py
+++ b/modules/js/src/make_umd.py
@@ -103,7 +103,7 @@ def make_umd(opencvjs, cvjs):
     Module = {};
   return cv(Module);
 }));
-    """ % (content)).lstrip())
+    """ % (content)).lstrip().encode())
 
 if __name__ == "__main__":
     if len(sys.argv) > 2:
diff --git a/modules/objc/CMakeLists.txt b/modules/objc/CMakeLists.txt
index d4ea6e3563..8cf24de56e 100644
--- a/modules/objc/CMakeLists.txt
+++ b/modules/objc/CMakeLists.txt
@@ -1,6 +1,19 @@
-if(OPENCV_INITIAL_PASS AND APPLE_FRAMEWORK AND NOT (BUILD_opencv_objc STREQUAL "OFF"))
+if(OPENCV_INITIAL_PASS)
   # generator for Objective-C source code and documentation signatures
   add_subdirectory(generator)
 endif()
 
+if(NOT APPLE_FRAMEWORK)
+  return()
+endif()
+
+set(the_description "The Objective-C bindings")
+ocv_add_module(objc BINDINGS opencv_core opencv_imgproc PRIVATE_REQUIRED opencv_objc_bindings_generator)
+
+add_custom_target(${the_module}
+    ALL
+    COMMENT "Objective-C framework"
+)
+add_dependencies(${the_module} gen_opencv_objc_source)
+
 #include(${CMAKE_CURRENT_SOURCE_DIR}/common.cmake)
diff --git a/modules/objc/generator/CMakeLists.txt b/modules/objc/generator/CMakeLists.txt
index dd6f58db6d..b3cbbd3f5f 100644
--- a/modules/objc/generator/CMakeLists.txt
+++ b/modules/objc/generator/CMakeLists.txt
@@ -1,16 +1,18 @@
-set(MODULE_NAME "objc")
+set(MODULE_NAME "objc_bindings_generator")
 set(OPENCV_MODULE_IS_PART_OF_WORLD FALSE)
 ocv_add_module(${MODULE_NAME} INTERNAL opencv_core opencv_imgproc)
 
-set(OPENCV_OBJC_SIGNATURES_FILE "${CMAKE_CURRENT_BINARY_DIR}/opencv_objc_signatures.json" CACHE INTERNAL "")
+#set(OPENCV_OBJC_SIGNATURES_FILE "${CMAKE_CURRENT_BINARY_DIR}/opencv_objc_signatures.json" CACHE INTERNAL "")
 set(OPENCV_OBJC_BINDINGS_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE INTERNAL "")
 
-file(REMOVE_RECURSE "${OPENCV_OBJC_BINDINGS_DIR}/gen")
-file(REMOVE "${OPENCV_DEPHELPER}/gen_opencv_objc_source")  # force re-run after CMake
+file(REMOVE_RECURSE "${OPENCV_OBJC_BINDINGS_DIR}/osx")
+file(REMOVE "${OPENCV_DEPHELPER}/gen_opencv_objc_source_osx")  # force re-run after CMake
+file(REMOVE_RECURSE "${OPENCV_OBJC_BINDINGS_DIR}/ios")
+file(REMOVE "${OPENCV_DEPHELPER}/gen_opencv_objc_source_ios")  # force re-run after CMake
 
 # This file is included from a subdirectory
 set(OBJC_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
-include(${OBJC_SOURCE_DIR}/common.cmake)
+include(${OBJC_SOURCE_DIR}/common.cmake)  # fill OPENCV_OBJC_MODULES
 
 # common files
 file(GLOB_RECURSE deps "${CMAKE_CURRENT_SOURCE_DIR}/templates/*")
@@ -30,15 +32,21 @@ foreach(m ${OPENCV_OBJC_MODULES})
   set(__modules_config "${__modules_config}    { \"name\": \"${m_}\", \"location\": \"${rel_path}\" }")
 endforeach(m)
 
+if(HAVE_opencv_objc)
+  set(__objc_build_dir "\"objc_build_dir\": \"${CMAKE_CURRENT_BINARY_DIR}/../objc\",")
+endif()
+
 set(CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/gen_objc.json")
 set(__config_str
 "{
   \"rootdir\": \"${OpenCV_SOURCE_DIR}\",
+  ${__objc_build_dir}
   \"modules\": [
 ${__modules_config}
   ]
 }
 ")
+#TODO: ocv_update_file("${CONFIG_FILE}" "${__config_str}" ON_CHANGE_REMOVE "${OPENCV_DEPHELPER}/gen_opencv_objc_source")
 if(EXISTS "${CONFIG_FILE}")
   file(READ "${CONFIG_FILE}" __content)
 else()
@@ -52,33 +60,66 @@ unset(__config_str)
 
 set(objc_generated_files
     # "${OPENCV_OBJC_SIGNATURES_FILE}"
-    "${OPENCV_DEPHELPER}/gen_opencv_objc_source"
 )
 
 string(REPLACE "opencv_" "" MODULES "${OPENCV_OBJC_MODULES}")
 
-if(IOS)
-  set(TARGET "ios")
-else()
-  set(TARGET "osx")
+if(NOT DEFINED OPENCV_OBJC_TARGET AND APPLE_FRAMEWORK)
+  if(IOS)
+    set(OPENCV_OBJC_TARGET "ios")
+  else()
+    set(OPENCV_OBJC_TARGET "osx")
+  endif()
 endif()
 
-add_custom_command(
-    OUTPUT ${objc_generated_files}
-    COMMAND ${PYTHON_DEFAULT_EXECUTABLE} "${OBJC_SOURCE_DIR}/generator/gen_objc.py" -p "${OBJC_SOURCE_DIR}/../python/src2/gen2.py" -c "${CONFIG_FILE}" -t "${TARGET}" -f "${FRAMEWORK_NAME}"
-    COMMAND ${CMAKE_COMMAND} -E touch "${OPENCV_DEPHELPER}/gen_opencv_objc_source"
-    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
-    DEPENDS "${OBJC_SOURCE_DIR}/generator/gen_objc.py"
-            "${OBJC_SOURCE_DIR}/../python/src2/gen2.py"
-            "${OBJC_SOURCE_DIR}/../python/src2/hdr_parser.py"
-            # don't, result of file(WRITE): "${CMAKE_CURRENT_BINARY_DIR}/gen_objc.json"
-            ${deps}
-            # not allowed (file(WRITE) result): "${CONFIG_FILE}"
-    COMMENT "Generate files for Objective-C bindings"
-)
+if(NOT DEFINED OPENCV_OBJC_FRAMEWORK_NAME)
+  if(DEFINED FRAMEWORK_NAME)
+    set(OPENCV_OBJC_FRAMEWORK_NAME "${FRAMEWORK_NAME}")
+  else()
+    set(OPENCV_OBJC_FRAMEWORK_NAME "opencv2")
+  endif()
+endif()
 
-add_custom_target(gen_opencv_objc_source ALL DEPENDS ${objc_generated_files}
-    SOURCES "${OBJC_SOURCE_DIR}/generator/gen_objc.py"
-            "${OBJC_SOURCE_DIR}/generator/templates/cmakelists.template"
-            "${CMAKE_CURRENT_BINARY_DIR}/gen_objc.json"
+set(objc_generated_targets "")
+
+macro(ocv_add_objc_generated_target TARGET)
+  set(objc_${TARGET}_generated_output_dependecy "${OPENCV_DEPHELPER}/gen_opencv_objc_source_${TARGET}")
+  file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}")
+  add_custom_command(
+      OUTPUT ${objc_generated_files} "${objc_${TARGET}_generated_output_dependecy}"
+      COMMAND ${PYTHON_DEFAULT_EXECUTABLE} "${OBJC_SOURCE_DIR}/generator/gen_objc.py"
+              -p "${OBJC_SOURCE_DIR}/../python/src2/gen2.py"
+              -c "${CONFIG_FILE}"
+              -t "${TARGET}"
+              -f "${OPENCV_OBJC_FRAMEWORK_NAME}"
+      COMMAND ${CMAKE_COMMAND} -E touch "${objc_${TARGET}_generated_output_dependecy}"
+      WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}"
+      DEPENDS "${OpenCV_SOURCE_DIR}/modules/objc/generator/gen_objc.py"
+              "${OpenCV_SOURCE_DIR}/modules/python/src2/gen2.py"
+              "${OpenCV_SOURCE_DIR}/modules/python/src2/hdr_parser.py"
+              # don't, result of file(WRITE): "${CMAKE_CURRENT_BINARY_DIR}/gen_objc.json"
+              ${deps}
+              # not allowed (file(WRITE) result): "${CONFIG_FILE}"
+      COMMENT "Generate files for Objective-C bindings (${TARGET})"
+  )
+  add_custom_target(gen_opencv_objc_source_${TARGET}
+      # excluded from all: ALL
+      DEPENDS ${objc_generated_files} ${objc_${TARGET}_generated_output_dependecy}
+      SOURCES "${OBJC_SOURCE_DIR}/generator/gen_objc.py"
+              "${OBJC_SOURCE_DIR}/generator/templates/cmakelists.template"
+              "${CMAKE_CURRENT_BINARY_DIR}/gen_objc.json"
+  )
+  list(APPEND objc_generated_targets gen_opencv_objc_source_${TARGET})
+endmacro()
+
+if(OPENCV_OBJC_TARGET)
+  ocv_add_objc_generated_target(${OPENCV_OBJC_TARGET})
+else()
+  ocv_add_objc_generated_target(osx)
+  ocv_add_objc_generated_target(ios)
+endif()
+
+add_custom_target(gen_opencv_objc_source
+    # excluded from all: ALL
+    DEPENDS ${objc_generated_targets}
 )
diff --git a/modules/objc/generator/gen_objc.py b/modules/objc/generator/gen_objc.py
index 1ae00ab5f1..bd9743c757 100755
--- a/modules/objc/generator/gen_objc.py
+++ b/modules/objc/generator/gen_objc.py
@@ -1,23 +1,20 @@
 #!/usr/bin/env python
 
+from __future__ import print_function, unicode_literals
 import sys, re, os.path, errno, fnmatch
 import json
 import logging
 import codecs
+import io
 from shutil import copyfile
 from pprint import pformat
 from string import Template
 from distutils.dir_util import copy_tree
 
-if sys.version_info[0] >= 3:
-    from io import StringIO
-else:
-    import io
-    class StringIO(io.StringIO):
-        def write(self, s):
-            if isinstance(s, str):
-                s = unicode(s)  # noqa: F821
-            return super(StringIO, self).write(s)
+try:
+    from io import StringIO # Python 3
+except:
+    from io import BytesIO as StringIO
 
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 
@@ -30,6 +27,10 @@ updated_files = 0
 
 module_imports = []
 
+# list of namespaces, which should be skipped by wrapper generator
+# the list is loaded from misc/objc/gen_dict.json defined for the module only
+namespace_ignore_list = []
+
 # list of class names, which should be skipped by wrapper generator
 # the list is loaded from misc/objc/gen_dict.json defined for the module and its dependencies
 class_ignore_list = []
@@ -92,6 +93,14 @@ method_dict = {
 
 modules = []
 
+
+class SkipSymbolException(Exception):
+    def __init__(self, text):
+        self.t = text
+    def __str__(self):
+        return self.t
+
+
 def read_contents(fname):
     with open(fname, 'r') as f:
         data = f.read()
@@ -107,6 +116,15 @@ def mkdir_p(path):
         else:
             raise
 
+def header_import(hdr):
+    """ converts absolute header path to import parameter """
+    pos = hdr.find('/include/')
+    hdr = hdr[pos+9 if pos >= 0 else 0:]
+    #pos = hdr.find('opencv2/')
+    #hdr = hdr[pos+8 if pos >= 0 else 0:]
+    return hdr
+
+
 T_OBJC_CLASS_HEADER = read_contents(os.path.join(SCRIPT_DIR, 'templates/objc_class_header.template'))
 T_OBJC_CLASS_BODY = read_contents(os.path.join(SCRIPT_DIR, 'templates/objc_class_body.template'))
 T_OBJC_MODULE_HEADER = read_contents(os.path.join(SCRIPT_DIR, 'templates/objc_module_header.template'))
@@ -114,7 +132,11 @@ T_OBJC_MODULE_BODY = read_contents(os.path.join(SCRIPT_DIR, 'templates/objc_modu
 
 class GeneralInfo():
     def __init__(self, type, decl, namespaces):
-        self.namespace, self.classpath, self.classname, self.name = self.parseName(decl[0], namespaces)
+        self.symbol_id, self.namespace, self.classpath, self.classname, self.name = self.parseName(decl[0], namespaces)
+
+        for ns_ignore in namespace_ignore_list:
+            if self.symbol_id.startswith(ns_ignore + '.'):
+                raise SkipSymbolException('ignored namespace ({}): {}'.format(ns_ignore, self.symbol_id))
 
         # parse doxygen comments
         self.params={}
@@ -152,13 +174,13 @@ class GeneralInfo():
                 break
         pieces = localName.split(".")
         if len(pieces) > 2: # <class>.<class>.<class>.<name>
-            return spaceName, ".".join(pieces[:-1]), pieces[-2], pieces[-1]
+            return name, spaceName, ".".join(pieces[:-1]), pieces[-2], pieces[-1]
         elif len(pieces) == 2: # <class>.<name>
-            return spaceName, pieces[0], pieces[0], pieces[1]
+            return name, spaceName, pieces[0], pieces[0], pieces[1]
         elif len(pieces) == 1: # <name>
-            return spaceName, "", "", pieces[0]
+            return name, spaceName, "", "", pieces[0]
         else:
-            return spaceName, "", "" # error?!
+            return name, spaceName, "", "" # error?!
 
     def fullName(self, isCPP=False):
         result = ".".join([self.fullClass(), self.name])
@@ -267,15 +289,16 @@ class ClassInfo(GeneralInfo):
         return Template("CLASS $namespace::$classpath.$name : $base").substitute(**self.__dict__)
 
     def getImports(self, module):
-        return ["#import \"%s.h\"" % c for c in sorted(filter(lambda m: m != self.name, map(lambda m: type_dict[m]["import_module"] if m in type_dict and "import_module" in type_dict[m] else m, self.imports)))]
+        return ["#import \"%s.h\"" % c for c in sorted([m for m in [type_dict[m]["import_module"] if m in type_dict and "import_module" in type_dict[m] else m for m in self.imports] if m != self.name])]
 
     def isEnum(self, c):
         return c in type_dict and type_dict[c].get("is_enum", False)
 
     def getForwardDeclarations(self, module):
-        enum_decl = filter(lambda x:self.isEnum(x) and type_dict[x]["import_module"] != module, self.imports)
-        class_decl = filter(lambda x: not self.isEnum(x), self.imports)
-        return ["#import \"%s.h\"" % type_dict[c]["import_module"] for c in enum_decl] + [""] + ["@class %s;" % c for c in sorted(class_decl)]
+        enum_decl = [x for x in self.imports if self.isEnum(x) and type_dict[x]["import_module"] != module]
+        enum_imports = sorted(list(set([type_dict[m]["import_module"] for m in enum_decl])))
+        class_decl = [x for x in self.imports if not self.isEnum(x)]
+        return ["#import \"%s.h\"" % c for c in enum_imports] + [""] + ["@class %s;" % c for c in sorted(class_decl)]
 
     def addImports(self, ctype, is_out_type):
         if ctype == self.cname:
@@ -295,8 +318,8 @@ class ClassInfo(GeneralInfo):
 
     def getAllMethods(self):
         result = []
-        result.extend([fi for fi in sorted(self.methods) if fi.isconstructor])
-        result.extend([fi for fi in sorted(self.methods) if not fi.isconstructor])
+        result += [fi for fi in self.methods if fi.isconstructor]
+        result += [fi for fi in self.methods if not fi.isconstructor]
         return result
 
     def addMethod(self, fi):
@@ -349,7 +372,7 @@ class ClassInfo(GeneralInfo):
                             module = M,
                             additionalImports = self.additionalImports.getvalue(),
                             importBaseClass = '#import "' + self.base + '.h"' if not self.is_base_class else "",
-                            forwardDeclarations = "\n".join(filter(None, self.getForwardDeclarations(objcM))),
+                            forwardDeclarations = "\n".join([_f for _f in self.getForwardDeclarations(objcM) if _f]),
                             enumDeclarations = self.enum_declarations.getvalue(),
                             nativePointerHandling = Template(
 """
@@ -655,7 +678,7 @@ def build_swift_logues(args):
 
 def add_method_to_dict(class_name, fi):
     static = fi.static if fi.classname else True
-    if not method_dict.has_key((class_name, fi.objc_name)):
+    if (class_name, fi.objc_name) not in method_dict:
         objc_method_name = ("+" if static else "-") + fi.objc_name + ":" + build_objc_method_name(fi.args)
         method_dict[(class_name, fi.objc_name)] = objc_method_name
 
@@ -663,7 +686,7 @@ def see_lookup(objc_class, see):
     semi_colon = see.find("::")
     see_class = see[:semi_colon] if semi_colon > 0 else objc_class
     see_method = see[(semi_colon + 2):] if semi_colon != -1 else see
-    if method_dict.has_key((see_class, see_method)):
+    if (see_class, see_method) in method_dict:
         method = method_dict[(see_class, see_method)]
         if see_class == objc_class:
             return method
@@ -679,7 +702,7 @@ class ObjectiveCWrapperGenerator(object):
         self.clear()
 
     def clear(self):
-        self.namespaces = set(["cv"])
+        self.namespaces = ["cv"]
         mat_class_info = ClassInfo([ 'class Mat', '', [], [] ], self.namespaces)
         mat_class_info.namespace = "cv"
         self.classes = { "Mat" : mat_class_info }
@@ -695,17 +718,21 @@ class ObjectiveCWrapperGenerator(object):
         classinfo = ClassInfo(decl, namespaces=self.namespaces)
         if classinfo.name in class_ignore_list:
             logging.info('ignored: %s', classinfo)
-            return
+            return None
         if classinfo.name != self.Module:
             self.classes[self.Module].member_classes.append(classinfo.objc_name)
         name = classinfo.cname
         if self.isWrapped(name) and not classinfo.base:
             logging.warning('duplicated: %s', classinfo)
-            return
+            return None
+        if name in self.classes:  # TODO implement inner namespaces
+            if self.classes[name].symbol_id != classinfo.symbol_id:
+                logging.warning('duplicated under new id: {} (was {})'.format(classinfo.symbol_id, self.classes[name].symbol_id))
+                return None
         self.classes[name] = classinfo
         if name in type_dict and not classinfo.base:
             logging.warning('duplicated: %s', classinfo)
-            return
+            return None
         if name != self.Module:
             type_dict.setdefault(name, {}).update(
                 { "objc_type" : classinfo.objc_name + "*",
@@ -721,10 +748,7 @@ class ObjectiveCWrapperGenerator(object):
 
         # class props
         for p in decl[3]:
-            if True: #"vector" not in p[0]:
-                classinfo.props.append( ClassPropInfo(p) )
-            else:
-                logging.warning("Skipped property: [%s]" % name, p)
+            classinfo.props.append( ClassPropInfo(p) )
 
         if name != self.Module:
             type_dict.setdefault("Ptr_"+name, {}).update(
@@ -736,6 +760,7 @@ class ObjectiveCWrapperGenerator(object):
             )
 
         logging.info('ok: class %s, name: %s, base: %s', classinfo, name, classinfo.base)
+        return classinfo
 
     def add_const(self, decl, scope=None, enumType=None): # [ "const cname", val, [], [] ]
         constinfo = ConstInfo(decl, namespaces=self.namespaces, enumType=enumType)
@@ -743,7 +768,7 @@ class ObjectiveCWrapperGenerator(object):
             logging.info('ignored: %s', constinfo)
         else:
             objc_type = enumType.rsplit(".", 1)[-1] if enumType else ""
-            if const_fix.has_key(constinfo.classname) and const_fix[constinfo.classname].has_key(objc_type) and const_fix[constinfo.classname][objc_type].has_key(constinfo.name):
+            if constinfo.classname in const_fix and objc_type in const_fix[constinfo.classname] and constinfo.name in const_fix[constinfo.classname][objc_type]:
                 fixed_const = const_fix[constinfo.classname][objc_type][constinfo.name]
                 constinfo.name = fixed_const
                 constinfo.cname = fixed_const
@@ -774,7 +799,7 @@ class ObjectiveCWrapperGenerator(object):
             objc_type = enumType.rsplit(".", 1)[-1]
             if objc_type in enum_ignore_list:
                 return
-            if enum_fix.has_key(constinfo.classname):
+            if constinfo.classname in enum_fix:
                 objc_type = enum_fix[constinfo.classname].get(objc_type, objc_type)
             import_module = constinfo.classname if constinfo.classname and constinfo.classname != objc_type else self.Module
             type_dict[ctype] = { "cast_from" : "int",
@@ -786,7 +811,8 @@ class ObjectiveCWrapperGenerator(object):
             type_dict[objc_type] = { "cast_to" : get_cname(enumType),
                                      "objc_type": objc_type,
                                      "is_enum": True,
-                                     "import_module": import_module}
+                                     "import_module": import_module,
+                                     "from_cpp": "(" + objc_type + ")%(n)s"}
             self.classes[self.Module].member_enums.append(objc_type)
 
         const_decls = decl[3]
@@ -801,12 +827,17 @@ class ObjectiveCWrapperGenerator(object):
             logging.info('ignored: %s', fi)
         elif classname in ManualFuncs and fi.objc_name in ManualFuncs[classname]:
             logging.info('manual: %s', fi)
-            if ManualFuncs[classname][fi.objc_name].has_key("objc_method_name"):
+            if "objc_method_name" in ManualFuncs[classname][fi.objc_name]:
                 method_dict[(classname, fi.objc_name)] = ManualFuncs[classname][fi.objc_name]["objc_method_name"]
         elif not self.isWrapped(classname):
             logging.warning('not found: %s', fi)
         else:
-            self.getClass(classname).addMethod(fi)
+            ci = self.getClass(classname)
+            if ci.symbol_id != fi.symbol_id[0:fi.symbol_id.rfind('.')] and ci.symbol_id != self.Module:
+                # TODO fix this (inner namepaces)
+                logging.warning('SKIP: mismatched class: {} (class: {})'.format(fi.symbol_id, ci.symbol_id))
+                return
+            ci.addMethod(fi)
             logging.info('ok: %s', fi)
             # calc args with def val
             cnt = len([a for a in fi.args if a.defval])
@@ -828,7 +859,7 @@ class ObjectiveCWrapperGenerator(object):
         updated_files += 1
 
     def get_namespace_prefix(self, cname):
-        namespace = self.classes[cname].namespace if self.classes.has_key(cname) else "cv"
+        namespace = self.classes[cname].namespace if cname in self.classes else "cv"
         return namespace.replace(".", "::") + "::"
 
     def gen(self, srcfiles, module, output_path, output_objc_path, common_headers, manual_classes):
@@ -841,34 +872,40 @@ class ObjectiveCWrapperGenerator(object):
         # TODO: support UMat versions of declarations (implement UMat-wrapper for Java)
         parser = hdr_parser.CppHeaderParser(generate_umat_decls=False)
 
-        self.add_class( ['class ' + self.Module, '', [], []]) # [ 'class/struct cname', ':bases', [modlist] [props] ]
+        module_ci = self.add_class( ['class ' + self.Module, '', [], []]) # [ 'class/struct cname', ':bases', [modlist] [props] ]
+        module_ci.header_import = module + '.hpp'
 
         # scan the headers and build more descriptive maps of classes, consts, functions
         includes = []
         for hdr in common_headers:
             logging.info("\n===== Common header : %s =====", hdr)
-            includes.append('#include "' + hdr + '"')
+            includes.append(header_import(hdr))
         for hdr in srcfiles:
             decls = parser.parse(hdr)
-            self.namespaces = parser.namespaces
+            self.namespaces = sorted(parser.namespaces)
             logging.info("\n\n===== Header: %s =====", hdr)
-            logging.info("Namespaces: %s", parser.namespaces)
+            logging.info("Namespaces: %s", sorted(parser.namespaces))
             if decls:
-                includes.append('#include "' + hdr + '"')
+                includes.append(header_import(hdr))
             else:
                 logging.info("Ignore header: %s", hdr)
             for decl in decls:
                 logging.info("\n--- Incoming ---\n%s", pformat(decl[:5], 4)) # without docstring
                 name = decl[0]
-                if name.startswith("struct") or name.startswith("class"):
-                    self.add_class(decl)
-                elif name.startswith("const"):
-                    self.add_const(decl)
-                elif name.startswith("enum"):
-                    # enum
-                    self.add_enum(decl)
-                else: # function
-                    self.add_func(decl)
+                try:
+                    if name.startswith("struct") or name.startswith("class"):
+                        ci = self.add_class(decl)
+                        if ci:
+                            ci.header_import = header_import(hdr)
+                    elif name.startswith("const"):
+                        self.add_const(decl)
+                    elif name.startswith("enum"):
+                        # enum
+                        self.add_enum(decl)
+                    else: # function
+                        self.add_func(decl)
+                except SkipSymbolException as e:
+                    logging.info('SKIP: {} due to {}'.format(name, e))
         self.classes[self.Module].member_classes += manual_classes
 
         logging.info("\n\n===== Generating... =====")
@@ -876,7 +913,7 @@ class ObjectiveCWrapperGenerator(object):
         mkdir_p(package_path)
         extension_file = "%s/%s/%sExt.swift" % (output_objc_path, module, self.Module)
 
-        for ci in self.classes.values():
+        for ci in sorted(self.classes.values(), key=lambda x: x.symbol_id):
             if ci.name == "Mat":
                 continue
             ci.initCodeStreams(self.Module)
@@ -902,13 +939,13 @@ class ObjectiveCWrapperGenerator(object):
         report.write("\n".join(self.ported_func_list))
         report.write("\n\nSKIPPED FUNCs LIST (%i of %i):\n\n" % (len(self.skipped_func_list), total_count))
         report.write("".join(self.skipped_func_list))
-        for i in self.def_args_hist.keys():
+        for i in sorted(self.def_args_hist.keys()):
             report.write("\n%i def args - %i funcs" % (i, self.def_args_hist[i]))
         return report.getvalue()
 
     def fullTypeName(self, t):
-        if not type_dict[t].get("is_primitive", False) or type_dict[t].has_key("cast_to"):
-            if type_dict[t].has_key("cast_to"):
+        if not type_dict[t].get("is_primitive", False) or "cast_to" in type_dict[t]:
+            if "cast_to" in type_dict[t]:
                 return type_dict[t]["cast_to"]
             else:
                 namespace_prefix = self.get_namespace_prefix(t)
@@ -917,7 +954,7 @@ class ObjectiveCWrapperGenerator(object):
             return t
 
     def build_objc2cv_prologue(self, prologue, vector_type, vector_full_type, objc_type, vector_name, array_name):
-        if not (type_dict.has_key(vector_type) and type_dict[vector_type].has_key("to_cpp") and type_dict[vector_type]["to_cpp"] != "%(n)s.nativeRef"):
+        if not (vector_type in type_dict and "to_cpp" in type_dict[vector_type] and type_dict[vector_type]["to_cpp"] != "%(n)s.nativeRef"):
             prologue.append("OBJC2CV(" + vector_full_type + ", " + objc_type[:-1] + ", " + vector_name + ", " + array_name + ");")
         else:
             conv_macro = "CONV_" + array_name
@@ -926,7 +963,7 @@ class ObjectiveCWrapperGenerator(object):
             prologue.append("#undef " + conv_macro)
 
     def build_cv2objc_epilogue(self, epilogue, vector_type, vector_full_type, objc_type, vector_name, array_name):
-        if not (type_dict.has_key(vector_type) and type_dict[vector_type].has_key("from_cpp") and type_dict[vector_type]["from_cpp"] != ("[" + objc_type[:-1] + " fromNative:%(n)s]")):
+        if not (vector_type in type_dict and "from_cpp" in type_dict[vector_type] and type_dict[vector_type]["from_cpp"] != ("[" + objc_type[:-1] + " fromNative:%(n)s]")):
             epilogue.append("CV2OBJC(" + vector_full_type + ", " + objc_type[:-1] + ", " + vector_name + ", " + array_name + ");")
         else:
             unconv_macro = "UNCONV_" + array_name
@@ -1086,7 +1123,7 @@ class ObjectiveCWrapperGenerator(object):
                 if cpp_type.find("::") == -1:
                     cpp_type = self.get_namespace_prefix(cpp_type) + cpp_type
                 prologue.append("NSMutableArray<NSMutableArray<" + objc_type + ">*>* retVal = [NSMutableArray new];")
-                ret_val = "std::vector<" + cpp_type + "> retValVector = "
+                ret_val = "std::vector< std::vector<" + cpp_type + "> > retValVector = "
                 epilogue.append("CV2OBJC2(" + cpp_type + ", " + objc_type[:-1] + ", retValVector, retVal);")
             elif ret_type.startswith("Ptr_"):
                 cpp_type = type_dict[ret_type]["c_type"]
@@ -1107,7 +1144,7 @@ class ObjectiveCWrapperGenerator(object):
                 ret_val = "cv::Ptr<" + namespace_prefix + ret_type + "> retVal = new " + namespace_prefix + ret_type + "("
                 tail = ")"
                 ret_type_dict = type_dict[ret_type]
-                from_cpp = ret_type_dict["from_cpp_ptr"] if ret_type_dict.has_key("from_cpp_ptr") else ret_type_dict["from_cpp"]
+                from_cpp = ret_type_dict["from_cpp_ptr"] if "from_cpp_ptr" in ret_type_dict else ret_type_dict["from_cpp"]
                 ret = "return " + (from_cpp % { "n" : "retVal" }) + ";"
             elif "from_cpp" in type_dict[ret_type]:
                 ret = "return " + (type_dict[ret_type]["from_cpp"] % { "n" : "retVal" }) + ";"
@@ -1194,13 +1231,26 @@ $unrefined_call$epilogue$ret
 
     def gen_class(self, ci, module, extension_implementations, extension_signatures):
         logging.info("%s", ci)
-        if module in AdditionalImports and (ci.name in AdditionalImports[module] or "*" in AdditionalImports[module]):
-            additional_imports = []
+        additional_imports = []
+        if module in AdditionalImports:
             if "*" in AdditionalImports[module]:
                 additional_imports += AdditionalImports[module]["*"]
             if ci.name in AdditionalImports[module]:
                 additional_imports += AdditionalImports[module][ci.name]
-            ci.additionalImports.write("\n".join(["#import %s" % h for h in additional_imports]))
+        if hasattr(ci, 'header_import'):
+            h = '"{}"'.format(ci.header_import)
+            if not h in additional_imports:
+                additional_imports.append(h)
+
+        h = '"{}.hpp"'.format(module)
+        if h in additional_imports:
+            additional_imports.remove(h)
+        h = '"opencv2/{}.hpp"'.format(module)
+        if not h in additional_imports:
+            additional_imports.insert(0, h)
+
+        if additional_imports:
+            ci.additionalImports.write('\n'.join(['#import %s' % h for h in additional_imports]))
 
         # constants
         wrote_consts_pragma = False
@@ -1213,19 +1263,20 @@ $unrefined_call$epilogue$ret
                 return const_value(target.value)
             return v
         if ci.consts:
-            enumTypes = set(map(lambda c: c.enumType, ci.consts))
+            enumTypes = set([c.enumType for c in ci.consts])
             grouped_consts = {enumType: [c for c in ci.consts if c.enumType == enumType] for enumType in enumTypes}
-            for typeName, consts in grouped_consts.items():
+            for typeName in sorted(grouped_consts.keys(), key=lambda x: str(x) if x is not None else ""):
+                consts = grouped_consts[typeName]
                 logging.info("%s", consts)
                 if typeName:
-                    typeName = typeName.rsplit(".", 1)[-1]
-                    if enum_fix.has_key(ci.cname):
-                        typeName = enum_fix[ci.cname].get(typeName, typeName)
+                    typeNameShort = typeName.rsplit(".", 1)[-1]
+                    if ci.cname in enum_fix:
+                        typeNameShort = enum_fix[ci.cname].get(typeNameShort, typeNameShort)
 
                     ci.enum_declarations.write("""
-// C++: enum {1}
-typedef NS_ENUM(int, {2}) {{
-    {0}\n}};\n\n""".format(",\n    ".join(["%s = %s" % (c.name, c.value) for c in consts]), typeName, typeName)
+// C++: enum {1} ({2})
+typedef NS_ENUM(int, {1}) {{
+    {0}\n}};\n\n""".format(",\n    ".join(["%s = %s" % (c.name, c.value) for c in consts]), typeNameShort, typeName)
                     )
                 else:
                     if not wrote_consts_pragma:
@@ -1258,7 +1309,7 @@ typedef NS_ENUM(int, {2}) {{
             ci.addImports(pi.ctype, False)
             ci.method_declarations.write("@property " + ("(readonly) " if not pi.rw else "") + objc_type + " " + pi.name + ";\n")
             ptr_ref = "self." + ci.native_ptr_name + "->" if not ci.is_base_class else "self.nativePtr->"
-            if type_data.has_key("v_type"):
+            if "v_type" in type_data:
                 vector_cpp_type = type_data["v_type"]
                 has_namespace = vector_cpp_type.find("::") != -1
                 vector_full_cpp_type = self.fullTypeName(vector_cpp_type) if not has_namespace else vector_cpp_type
@@ -1270,7 +1321,7 @@ typedef NS_ENUM(int, {2}) {{
                 self.build_cv2objc_epilogue(epilogue, vector_cpp_type, vector_full_cpp_type, objc_type, "retValVector", "retVal")
                 ci.method_implementations.write("\t" + ("\n\t".join(epilogue)) + "\n")
                 ci.method_implementations.write("\treturn retVal;\n}\n\n")
-            elif type_data.has_key("v_v_type"):
+            elif "v_v_type" in type_data:
                 vector_cpp_type = type_data["v_v_type"]
                 has_namespace = vector_cpp_type.find("::") != -1
                 vector_full_cpp_type = self.fullTypeName(vector_cpp_type) if not has_namespace else vector_cpp_type
@@ -1284,14 +1335,14 @@ typedef NS_ENUM(int, {2}) {{
                 namespace_prefix = self.get_namespace_prefix(pi.ctype)
                 ci.method_implementations.write("-(" + objc_type + ")" + pi.name + " {\n")
                 ci.method_implementations.write("\tcv::Ptr<" + namespace_prefix + pi.ctype + "> retVal = new " + namespace_prefix + pi.ctype + "(" + ptr_ref + pi.name + ");\n")
-                from_cpp = type_data["from_cpp_ptr"] if type_data.has_key("from_cpp_ptr") else type_data["from_cpp"]
+                from_cpp = type_data["from_cpp_ptr"] if "from_cpp_ptr" in type_data else type_data["from_cpp"]
                 ci.method_implementations.write("\treturn " + (from_cpp % {"n": "retVal"}) + ";\n}\n\n")
             else:
                 from_cpp = type_data.get("from_cpp", "%(n)s")
                 retVal = from_cpp % {"n": (ptr_ref + pi.name)}
                 ci.method_implementations.write("-(" + objc_type + ")" + pi.name + " {\n\treturn " + retVal + ";\n}\n\n")
             if pi.rw:
-                if type_data.has_key("v_type"):
+                if "v_type" in type_data:
                     vector_cpp_type = type_data["v_type"]
                     has_namespace = vector_cpp_type.find("::") != -1
                     vector_full_cpp_type = self.fullTypeName(vector_cpp_type) if not has_namespace else vector_cpp_type
@@ -1301,15 +1352,17 @@ typedef NS_ENUM(int, {2}) {{
                     ci.method_implementations.write("\t" + ("\n\t".join(prologue)) + "\n")
                     ci.method_implementations.write("\t" + ptr_ref + pi.name + " = valVector;\n}\n\n")
                 else:
-                    to_cpp = type_data.get("to_cpp", "%(n)s")
+                    to_cpp = type_data.get("to_cpp", ("(" + type_data.get("cast_to") + ")%(n)s") if "cast_to" in type_data else "%(n)s")
                     val = to_cpp % {"n": pi.name}
                     ci.method_implementations.write("-(void)set" + pi.name[0].upper() + pi.name[1:] + ":(" + objc_type + ")" + pi.name + " {\n\t" + ptr_ref + pi.name + " = " + val + ";\n}\n\n")
 
         # manual ports
         if ci.name in ManualFuncs:
-            for func in ManualFuncs[ci.name].keys():
-                ci.method_declarations.write( "\n".join(ManualFuncs[ci.name][func]["declaration"]) )
-                ci.method_implementations.write( "\n".join(ManualFuncs[ci.name][func]["implementation"]) )
+            for func in sorted(ManualFuncs[ci.name].keys()):
+                logging.info("manual function: %s", func)
+                fn = ManualFuncs[ci.name][func]
+                ci.method_declarations.write( "\n".join(fn["declaration"]) )
+                ci.method_implementations.write( "\n".join(fn["implementation"]) )
 
     def getClass(self, classname):
         return self.classes[classname or self.Module]
@@ -1346,29 +1399,41 @@ typedef NS_ENUM(int, {2}) {{
             return "Ptr<" + fullname + ">"
         return fullname
 
-    def finalize(self, output_objc_path):
+    def finalize(self, objc_target, output_objc_path, output_objc_build_path):
         opencv_header_file = os.path.join(output_objc_path, framework_name + ".h")
-        self.save(opencv_header_file, '\n'.join(['#import "%s"' % os.path.basename(f) for f in self.header_files]))
+        opencv_header = "#import <Foundation/Foundation.h>\n\n"
+        opencv_header += "// ! Project version number\nFOUNDATION_EXPORT double " + framework_name + "VersionNumber;\n\n"
+        opencv_header += "// ! Project version string\nFOUNDATION_EXPORT const unsigned char " + framework_name + "VersionString[];\n\n"
+        opencv_header += "\n".join(["#import <" + framework_name + "/%s>" % os.path.basename(f) for f in self.header_files])
+        self.save(opencv_header_file, opencv_header)
+        opencv_modulemap_file = os.path.join(output_objc_path, framework_name + ".modulemap")
+        opencv_modulemap = "framework module " + framework_name + " {\n"
+        opencv_modulemap += "  umbrella header \"" + framework_name + ".h\"\n"
+        opencv_modulemap += "\n".join(["  header \"%s\"" % os.path.basename(f) for f in self.header_files])
+        opencv_modulemap += "\n  export *\n  module * {export *}\n}\n"
+        self.save(opencv_modulemap_file, opencv_modulemap)
         cmakelist_template = read_contents(os.path.join(SCRIPT_DIR, 'templates/cmakelists.template'))
-        cmakelist = Template(cmakelist_template).substitute(modules = ";".join(modules), framework = framework_name)
+        cmakelist = Template(cmakelist_template).substitute(modules = ";".join(modules), framework = framework_name, objc_target=objc_target)
         self.save(os.path.join(dstdir, "CMakeLists.txt"), cmakelist)
-        mkdir_p("./framework_build")
-        mkdir_p("./test_build")
-        mkdir_p("./doc_build")
+        mkdir_p(os.path.join(output_objc_build_path, "framework_build"))
+        mkdir_p(os.path.join(output_objc_build_path, "test_build"))
+        mkdir_p(os.path.join(output_objc_build_path, "doc_build"))
         with open(os.path.join(SCRIPT_DIR, '../doc/README.md')) as readme_in:
             readme_body = readme_in.read()
         readme_body += "\n\n\n##Modules\n\n" + ", ".join(["`" + m.capitalize() + "`" for m in modules])
-        with open("./doc_build/README.md", "w") as readme_out:
+        with open(os.path.join(output_objc_build_path, "doc_build/README.md"), "w") as readme_out:
             readme_out.write(readme_body)
         if framework_name != "OpenCV":
             for dirname, dirs, files in os.walk(os.path.join(testdir, "test")):
+                if dirname.endswith('/resources'):
+                    continue  # don't touch resource binary files
                 for filename in files:
                     filepath = os.path.join(dirname, filename)
-                    with open(filepath) as file:
+                    with io.open(filepath, encoding="utf-8", errors="ignore") as file:
                         body = file.read()
                     body = body.replace("import OpenCV", "import " + framework_name)
                     body = body.replace("#import <OpenCV/OpenCV.h>", "#import <" + framework_name + "/" + framework_name + ".h>")
-                    with open(filepath, "w") as file:
+                    with codecs.open(filepath, "w", "utf-8") as file:
                         file.write(body)
 
 
@@ -1468,9 +1533,9 @@ def sanitize_documentation_string(doc, type):
             in_code = True
             lines[i] = line.replace("<code>", "")
 
-    lines = list(map(lambda x: x[x.find('*'):].strip() if x.lstrip().startswith("*") else x, lines))
-    lines = list(map(lambda x: "* " + x[1:].strip() if x.startswith("*") and x != "*" else x, lines))
-    lines = list(map(lambda x: x if x.startswith("*") else "* " + x if x and x != "*" else "*", lines))
+    lines = list([x[x.find('*'):].strip() if x.lstrip().startswith("*") else x for x in lines])
+    lines = list(["* " + x[1:].strip() if x.startswith("*") and x != "*" else x for x in lines])
+    lines = list([x if x.startswith("*") else "* " + x if x and x != "*" else "*" for x in lines])
 
     hasValues = False
     for line in lines:
@@ -1483,7 +1548,7 @@ if __name__ == "__main__":
     # initialize logger
     logging.basicConfig(filename='gen_objc.log', format=None, filemode='w', level=logging.INFO)
     handler = logging.StreamHandler()
-    handler.setLevel(logging.WARNING)
+    handler.setLevel(os.environ.get('LOG_LEVEL', logging.WARNING))
     logging.getLogger().addHandler(handler)
 
     # parse command line parameters
@@ -1507,6 +1572,11 @@ if __name__ == "__main__":
         config = json.load(f)
 
     ROOT_DIR = config['rootdir']; assert os.path.exists(ROOT_DIR)
+    if 'objc_build_dir' in config:
+        objc_build_dir = config['objc_build_dir']
+        assert os.path.exists(objc_build_dir), objc_build_dir
+    else:
+        objc_build_dir = os.getcwd()
 
     dstdir = "./gen"
     testdir = "./test"
@@ -1560,6 +1630,7 @@ if __name__ == "__main__":
         if os.path.exists(gendict_fname):
             with open(gendict_fname) as f:
                 gen_type_dict = json.load(f)
+            namespace_ignore_list = gen_type_dict.get("namespace_ignore_list", [])
             class_ignore_list += gen_type_dict.get("class_ignore_list", [])
             enum_ignore_list += gen_type_dict.get("enum_ignore_list", [])
             const_ignore_list += gen_type_dict.get("const_ignore_list", [])
@@ -1584,6 +1655,11 @@ if __name__ == "__main__":
             if os.path.exists(ios_files_dir):
                 copied_files += copy_objc_files(ios_files_dir, objc_base_path, module, True)
 
+        if args.target == 'osx':
+            osx_files_dir = os.path.join(misc_location, 'macosx')
+            if os.path.exists(osx_files_dir):
+                copied_files += copy_objc_files(osx_files_dir, objc_base_path, module, True)
+
         objc_test_files_dir = os.path.join(misc_location, 'test')
         if os.path.exists(objc_test_files_dir):
             copy_objc_files(objc_test_files_dir, objc_test_base_path, 'test', False)
@@ -1591,14 +1667,12 @@ if __name__ == "__main__":
             if os.path.exists(objc_test_resources_dir):
                 copy_tree(objc_test_resources_dir, os.path.join(objc_test_base_path, 'test', 'resources'))
 
-        manual_classes = filter(lambda x:type_dict.has_key(x),
-                                map(lambda x: x[x.rfind('/')+1:-2],
-                                    filter(lambda x: x.endswith('.h'), copied_files)))
+        manual_classes = [x for x in [x[x.rfind('/')+1:-2] for x in [x for x in copied_files if x.endswith('.h')]] if x in type_dict]
 
         if len(srcfiles) > 0:
             generator.gen(srcfiles, module, dstdir, objc_base_path, common_headers, manual_classes)
         else:
             logging.info("No generated code for module: %s", module)
-    generator.finalize(objc_base_path)
+    generator.finalize(args.target, objc_base_path, objc_build_dir)
 
     print('Generated files: %d (updated %d)' % (total_files, updated_files))
diff --git a/modules/objc/generator/templates/cmakelists.template b/modules/objc/generator/templates/cmakelists.template
index e928a6d21a..10e9379694 100644
--- a/modules/objc/generator/templates/cmakelists.template
+++ b/modules/objc/generator/templates/cmakelists.template
@@ -13,32 +13,56 @@ set (SUPPRESS_WARNINGS_FLAGS "-Wno-incomplete-umbrella")
 set (CMAKE_CXX_FLAGS  "$${CMAKE_CXX_FLAGS} $${OBJC_COMPILE_FLAGS} $${SUPPRESS_WARNINGS_FLAGS}")
 
 # grab the files
-file(GLOB_RECURSE objc_sources "objc/*\.h" "objc/*\.m" "objc/*\.mm" "objc/*\.swift")
+if(SWIFT_DISABLED)
+  message(STATUS "Swift wrapper disabled")
+  file(GLOB_RECURSE objc_sources "objc/*\.h" "objc/*\.m" "objc/*\.mm" "objc/*\.modulemap")
+else()
+  enable_language(Swift)
+  file(GLOB_RECURSE objc_sources "objc/*\.h" "objc/*\.m" "objc/*\.mm" "objc/*\.swift" "objc/*\.modulemap")
+endif()
 file(GLOB_RECURSE objc_headers "*\.h")
 
-add_library(opencv_objc_framework STATIC $${objc_sources})
+add_library($framework STATIC $${objc_sources})
 
-set_target_properties(opencv_objc_framework PROPERTIES LINKER_LANGUAGE CXX)
+set_target_properties($framework PROPERTIES LINKER_LANGUAGE CXX)
 
-target_include_directories(opencv_objc_framework PRIVATE "$${BUILD_ROOT}")
-target_include_directories(opencv_objc_framework PRIVATE "$${BUILD_ROOT}/install/include")
-target_include_directories(opencv_objc_framework PRIVATE "$${BUILD_ROOT}/install/include/opencv2")
+target_include_directories($framework PRIVATE "$${BUILD_ROOT}")
+target_include_directories($framework PRIVATE "$${BUILD_ROOT}/install/include")
+target_include_directories($framework PRIVATE "$${BUILD_ROOT}/install/include/opencv2")
 foreach(m $${MODULES})
-  target_include_directories(opencv_objc_framework PRIVATE "$${BUILD_ROOT}/modules/objc/gen/objc/$${m}")
+  target_include_directories($framework PRIVATE "$${BUILD_ROOT}/modules/objc_bindings_generator/$objc_target/gen/objc/$${m}")
 endforeach()
 
-install(TARGETS opencv_objc_framework LIBRARY DESTINATION lib)
-
-enable_language(Swift)
+install(TARGETS $framework LIBRARY DESTINATION lib)
 
 # Additional target properties
-set_target_properties(opencv_objc_framework PROPERTIES
-    OUTPUT_NAME "$framework"
-    ARCHIVE_OUTPUT_DIRECTORY "$${BUILD_ROOT}/lib"
-    XCODE_ATTRIBUTE_SWIFT_VERSION 5.0
-    XCODE_ATTRIBUTE_OTHER_SWIFT_FLAGS "-Xcc $${SUPPRESS_WARNINGS_FLAGS}"
-    FRAMEWORK TRUE
-    MACOSX_FRAMEWORK_IDENTIFIER org.opencv.$framework
-    PUBLIC_HEADER "$${objc_headers}"
-    DEFINE_SYMBOL CVAPI_EXPORTS
-    )
+if (CMAKE_XCODE_BUILD_SYSTEM GREATER_EQUAL 12)
+  set_target_properties($framework PROPERTIES
+      OUTPUT_NAME "$framework"
+      ARCHIVE_OUTPUT_DIRECTORY "$${BUILD_ROOT}/lib"
+      XCODE_ATTRIBUTE_SWIFT_VERSION 5.0
+      XCODE_ATTRIBUTE_DEFINES_MODULE YES
+      XCODE_ATTRIBUTE_BUILD_LIBRARY_FOR_DISTRIBUTION YES
+      XCODE_ATTRIBUTE_OTHER_SWIFT_FLAGS "-Xcc $${SUPPRESS_WARNINGS_FLAGS}"
+      XCODE_ATTRIBUTE_MODULEMAP_FILE objc/$framework.modulemap
+      XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER org.opencv.$framework
+      FRAMEWORK TRUE
+      MACOSX_FRAMEWORK_IDENTIFIER org.opencv.$framework
+      PUBLIC_HEADER "$${objc_headers}"
+      DEFINE_SYMBOL CVAPI_EXPORTS
+      )
+else()
+  set_target_properties($framework PROPERTIES
+      OUTPUT_NAME "$framework"
+      ARCHIVE_OUTPUT_DIRECTORY "$${BUILD_ROOT}/lib"
+      XCODE_ATTRIBUTE_SWIFT_VERSION 5.0
+      XCODE_ATTRIBUTE_DEFINES_MODULE YES
+      XCODE_ATTRIBUTE_OTHER_SWIFT_FLAGS "-Xcc $${SUPPRESS_WARNINGS_FLAGS}"
+      XCODE_ATTRIBUTE_MODULEMAP_FILE objc/$framework.modulemap
+      XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER org.opencv.$framework
+      FRAMEWORK TRUE
+      MACOSX_FRAMEWORK_IDENTIFIER org.opencv.$framework
+      PUBLIC_HEADER "$${objc_headers}"
+      DEFINE_SYMBOL CVAPI_EXPORTS
+      )
+endif()
diff --git a/modules/objc/generator/templates/objc_class_header.template b/modules/objc/generator/templates/objc_class_header.template
index 0bad670685..77697e8c93 100644
--- a/modules/objc/generator/templates/objc_class_header.template
+++ b/modules/objc/generator/templates/objc_class_header.template
@@ -4,7 +4,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+//#import "opencv.hpp"
 $additionalImports
 #else
 #define CV_EXPORTS
diff --git a/modules/objc/generator/templates/objc_module_header.template b/modules/objc/generator/templates/objc_module_header.template
index fa9e7df6a3..88f45a11cf 100644
--- a/modules/objc/generator/templates/objc_module_header.template
+++ b/modules/objc/generator/templates/objc_module_header.template
@@ -4,7 +4,7 @@
 #pragma once
 
 #ifdef __cplusplus
-#import "opencv.hpp"
+//#import "opencv.hpp"
 $additionalImports
 #else
 #define CV_EXPORTS
diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp
index 097c5923af..eaee1290ce 100644
--- a/modules/objdetect/include/opencv2/objdetect.hpp
+++ b/modules/objdetect/include/opencv2/objdetect.hpp
@@ -699,6 +699,15 @@ public:
      */
     CV_WRAP std::string decode(InputArray img, InputArray points, OutputArray straight_qrcode = noArray());
 
+    /** @brief Decodes QR code on a curved surface in image once it's found by the detect() method.
+
+     Returns UTF8-encoded output string or empty string if the code cannot be decoded.
+     @param img grayscale or color (BGR) image containing QR code.
+     @param points Quadrangle vertices found by detect() method (or some other algorithm).
+     @param straight_qrcode The optional output image containing rectified and binarized QR code
+     */
+    CV_WRAP cv::String decodeCurved(InputArray img, InputArray points, OutputArray straight_qrcode = noArray());
+
     /** @brief Both detects and decodes QR code
 
      @param img grayscale or color (BGR) image containing QR code.
@@ -707,6 +716,16 @@ public:
      */
     CV_WRAP std::string detectAndDecode(InputArray img, OutputArray points=noArray(),
                                         OutputArray straight_qrcode = noArray());
+
+    /** @brief Both detects and decodes QR code on a curved surface
+
+     @param img grayscale or color (BGR) image containing QR code.
+     @param points optional output array of vertices of the found QR code quadrangle. Will be empty if not found.
+     @param straight_qrcode The optional output image containing rectified and binarized QR code
+     */
+    CV_WRAP std::string detectAndDecodeCurved(InputArray img, OutputArray points=noArray(),
+                                              OutputArray straight_qrcode = noArray());
+
     /** @brief Detects QR codes in image and returns the vector of the quadrangles containing the codes.
      @param img grayscale or color (BGR) image containing (or not) QR codes.
      @param points Output vector of vector of vertices of the minimum-area quadrangle containing the codes.
diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp
index c662d93f7d..d47f1d3a20 100644
--- a/modules/objdetect/src/qrcode.cpp
+++ b/modules/objdetect/src/qrcode.cpp
@@ -18,6 +18,7 @@
 #include <iostream>
 #include <queue>
 #include <limits>
+#include <map>
 
 namespace cv
 {
@@ -63,7 +64,40 @@ static void updatePointsResult(OutputArray points_, const vector<Point2f>& point
     }
 }
 
+static Point2f intersectionLines(Point2f a1, Point2f a2, Point2f b1, Point2f b2)
+{
+    const float divisor = (a1.x - a2.x) * (b1.y - b2.y) - (a1.y - a2.y) * (b1.x - b2.x);
+    const float eps = 0.001f;
+    if (abs(divisor) < eps)
+        return a2;
+    Point2f result_square_angle(
+                              ((a1.x * a2.y  -  a1.y * a2.x) * (b1.x - b2.x) -
+                               (b1.x * b2.y  -  b1.y * b2.x) * (a1.x - a2.x)) /
+                               divisor,
+                              ((a1.x * a2.y  -  a1.y * a2.x) * (b1.y - b2.y) -
+                               (b1.x * b2.y  -  b1.y * b2.x) * (a1.y - a2.y)) /
+                               divisor
+                              );
+    return result_square_angle;
+}
 
+//      / | b
+//     /  |
+//    /   |
+//  a/    | c
+
+static inline double getCosVectors(Point2f a, Point2f b, Point2f c)
+{
+    return ((a - b).x * (c - b).x + (a - b).y * (c - b).y) / (norm(a - b) * norm(c - b));
+}
+
+static bool arePointsNearest(Point2f a, Point2f b, float delta = 0.0)
+{
+    if ((abs(a.x - b.x) < delta) && (abs(a.y - b.y) < delta))
+        return true;
+    else
+        return false;
+}
 
 class QRDetect
 {
@@ -74,15 +108,13 @@ public:
     Mat getBinBarcode() { return bin_barcode; }
     Mat getStraightBarcode() { return straight_barcode; }
     vector<Point2f> getTransformationPoints() { return transformation_points; }
-    static Point2f intersectionLines(Point2f a1, Point2f a2, Point2f b1, Point2f b2);
 protected:
     vector<Vec3d> searchHorizontalLines();
     vector<Point2f> separateVerticalLines(const vector<Vec3d> &list_lines);
     vector<Point2f> extractVerticalLines(const vector<Vec3d> &list_lines, double eps);
     void fixationPoints(vector<Point2f> &local_point);
     vector<Point2f> getQuadrilateral(vector<Point2f> angle_list);
-    bool testBypassRoute(vector<Point2f> hull, int start, int finish);
-    inline double getCosVectors(Point2f a, Point2f b, Point2f c);
+    bool testByPassRoute(vector<Point2f> hull, int start, int finish);
 
     Mat barcode, bin_barcode, resized_barcode, resized_bin_barcode, straight_barcode;
     vector<Point2f> localization_points, transformation_points;
@@ -361,7 +393,6 @@ void QRDetect::fixationPoints(vector<Point2f> &local_point)
                               Point2f(static_cast<float>(bin_barcode.cols - 1),
                                       static_cast<float>(bin_barcode.rows - 1))));
 
-
         vector<Point2f> list_area_pnt;
         list_area_pnt.push_back(current_point);
 
@@ -629,7 +660,6 @@ bool QRDetect::computeTransformationPoints()
     transformation_points.push_back(
         intersectionLines(down_left_edge_point, down_max_delta_point,
                           up_right_edge_point, up_max_delta_point));
-
     vector<Point2f> quadrilateral = getQuadrilateral(transformation_points);
     transformation_points = quadrilateral;
 
@@ -643,23 +673,8 @@ bool QRDetect::computeTransformationPoints()
     return true;
 }
 
-Point2f QRDetect::intersectionLines(Point2f a1, Point2f a2, Point2f b1, Point2f b2)
-{
-    Point2f result_square_angle(
-                              ((a1.x * a2.y  -  a1.y * a2.x) * (b1.x - b2.x) -
-                               (b1.x * b2.y  -  b1.y * b2.x) * (a1.x - a2.x)) /
-                              ((a1.x - a2.x) * (b1.y - b2.y) -
-                               (a1.y - a2.y) * (b1.x - b2.x)),
-                              ((a1.x * a2.y  -  a1.y * a2.x) * (b1.y - b2.y) -
-                               (b1.x * b2.y  -  b1.y * b2.x) * (a1.y - a2.y)) /
-                              ((a1.x - a2.x) * (b1.y - b2.y) -
-                               (a1.y - a2.y) * (b1.x - b2.x))
-                              );
-    return result_square_angle;
-}
-
 // test function (if true then ------> else <------ )
-bool QRDetect::testBypassRoute(vector<Point2f> hull, int start, int finish)
+bool QRDetect::testByPassRoute(vector<Point2f> hull, int start, int finish)
 {
     CV_TRACE_FUNCTION();
     int index_hull = start, next_index_hull, hull_size = (int)hull.size();
@@ -764,7 +779,7 @@ vector<Point2f> QRDetect::getQuadrilateral(vector<Point2f> angle_list)
     int index_hull, extra_index_hull, next_index_hull, extra_next_index_hull;
     Point result_side_begin[4], result_side_end[4];
 
-    bool bypass_orientation = testBypassRoute(hull, start_line[0], finish_line[0]);
+    bool bypass_orientation = testByPassRoute(hull, start_line[0], finish_line[0]);
 
     min_norm = std::numeric_limits<double>::max();
     index_hull = start_line[0];
@@ -805,7 +820,7 @@ vector<Point2f> QRDetect::getQuadrilateral(vector<Point2f> angle_list)
 
     min_norm = std::numeric_limits<double>::max();
     index_hull = start_line[1];
-    bypass_orientation = testBypassRoute(hull, start_line[1], finish_line[1]);
+    bypass_orientation = testByPassRoute(hull, start_line[1], finish_line[1]);
     do
     {
         if (bypass_orientation) { next_index_hull = index_hull + 1; }
@@ -840,8 +855,8 @@ vector<Point2f> QRDetect::getQuadrilateral(vector<Point2f> angle_list)
         result_side_end[1]   = angle_list[1];
     }
 
-    bypass_orientation = testBypassRoute(hull, start_line[0], unstable_pnt);
-    const bool extra_bypass_orientation = testBypassRoute(hull, finish_line[1], unstable_pnt);
+    bypass_orientation = testByPassRoute(hull, start_line[0], unstable_pnt);
+    const bool extra_bypass_orientation = testByPassRoute(hull, finish_line[1], unstable_pnt);
 
     vector<Point2f> result_angle_list(4), test_result_angle_list(4);
     double min_diff_area = std::numeric_limits<double>::max();
@@ -918,16 +933,6 @@ vector<Point2f> QRDetect::getQuadrilateral(vector<Point2f> angle_list)
     return result_angle_list;
 }
 
-//      / | b
-//     /  |
-//    /   |
-//  a/    | c
-
-inline double QRDetect::getCosVectors(Point2f a, Point2f b, Point2f c)
-{
-    return ((a - b).x * (c - b).x + (a - b).y * (c - b).y) / (norm(a - b) * norm(c - b));
-}
-
 struct QRCodeDetector::Impl
 {
 public:
@@ -966,17 +971,79 @@ public:
     Mat getStraightBarcode() { return straight; }
     size_t getVersion() { return version; }
     std::string getDecodeInformation() { return result_info; }
-    bool fullDecodingProcess();
+    bool straightDecodingProcess();
+    bool curvedDecodingProcess();
 protected:
     bool updatePerspective();
     bool versionDefinition();
     bool samplingForVersion();
     bool decodingProcess();
-    Mat original, no_border_intermediate, intermediate, straight;
+    inline double pointPosition(Point2f a, Point2f b , Point2f c);
+    float distancePointToLine(Point2f a, Point2f b , Point2f c);
+    void getPointsInsideQRCode(const vector<Point2f> &angle_list);
+    bool computeClosestPoints(const vector<Point> &result_integer_hull);
+    bool computeSidesPoints(const vector<Point> &result_integer_hull);
+    vector<Point> getPointsNearUnstablePoint(const vector<Point> &side, int start, int end, int step);
+    bool findAndAddStablePoint();
+    bool findIndexesCurvedSides();
+    bool findIncompleteIndexesCurvedSides();
+    Mat getPatternsMask();
+    Point findClosestZeroPoint(Point2f original_point);
+    bool findPatternsContours(vector<vector<Point> > &patterns_contours);
+    bool findPatternsVerticesPoints(vector<vector<Point> > &patterns_vertices_points);
+    bool findTempPatternsAddingPoints(vector<std::pair<int, vector<Point> > > &temp_patterns_add_points);
+    bool computePatternsAddingPoints(std::map<int, vector<Point> > &patterns_add_points);
+    bool addPointsToSides();
+    void completeAndSortSides();
+    vector<vector<float> > computeSpline(const vector<int> &x_arr, const vector<int> &y_arr);
+    bool createSpline(vector<vector<Point2f> > &spline_lines);
+    bool divideIntoEvenSegments(vector<vector<Point2f> > &segments_points);
+    bool straightenQRCodeInParts();
+    bool preparingCurvedQRCodes();
+
+    const static int NUM_SIDES = 2;
+    Mat original, bin_barcode, no_border_intermediate, intermediate, straight, curved_to_straight, test_image;
     vector<Point2f> original_points;
+    vector<Point2f> original_curved_points;
+    vector<Point> qrcode_locations;
+    vector<std::pair<size_t, Point> > closest_points;
+    vector<vector<Point> > sides_points;
+    std::pair<size_t, Point> unstable_pair;
+    vector<int> curved_indexes, curved_incomplete_indexes;
+    std::map<int, vector<Point> > complete_curved_sides;
     std::string result_info;
     uint8_t version, version_size;
     float test_perspective_size;
+    struct sortPairAsc
+    {
+        bool operator()(const std::pair<size_t, double> &a,
+                        const std::pair<size_t, double> &b) const
+        {
+            return a.second < b.second;
+        }
+    };
+    struct sortPairDesc
+    {
+        bool operator()(const std::pair<size_t, double> &a,
+                        const std::pair<size_t, double> &b) const
+        {
+            return a.second > b.second;
+        }
+    };
+    struct sortPointsByX
+    {
+        bool operator()(const Point &a, const Point &b) const
+        {
+            return a.x < b.x;
+        }
+    };
+    struct sortPointsByY
+    {
+        bool operator()(const Point &a, const Point &b) const
+        {
+            return a.y < b.y;
+        }
+    };
 };
 
 void QRDecode::init(const Mat &src, const vector<Point2f> &points)
@@ -984,6 +1051,8 @@ void QRDecode::init(const Mat &src, const vector<Point2f> &points)
     CV_TRACE_FUNCTION();
     vector<Point2f> bbox = points;
     original = src.clone();
+    test_image = src.clone();
+    adaptiveThreshold(original, bin_barcode, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY, 83, 2);
     intermediate = Mat::zeros(original.size(), CV_8UC1);
     original_points = bbox;
     version = 0;
@@ -992,11 +1061,1169 @@ void QRDecode::init(const Mat &src, const vector<Point2f> &points)
     result_info = "";
 }
 
+inline double QRDecode::pointPosition(Point2f a, Point2f b , Point2f c)
+{
+    return (a.x - b.x) * (c.y - b.y) - (c.x - b.x) * (a.y - b.y);
+}
+
+float QRDecode::distancePointToLine(Point2f a, Point2f b , Point2f c)
+{
+    float A, B, C, result;
+    A = c.y - b.y;
+    B = c.x - b.x;
+    C = c.x * b.y - b.x * c.y;
+    float dist = sqrt(A*A + B*B);
+    if (dist == 0) return 0;
+    result = abs((A * a.x - B * a.y + C)) / dist;
+
+    return result;
+}
+
+void QRDecode::getPointsInsideQRCode(const vector<Point2f> &angle_list)
+{
+    CV_TRACE_FUNCTION();
+    size_t angle_size = angle_list.size();
+    Mat contour_mask = Mat::zeros(bin_barcode.size(), CV_8UC1);
+    for (size_t i = 0; i < angle_size; i++)
+    {
+        LineIterator line_iter(bin_barcode, angle_list[ i      % angle_size],
+                                            angle_list[(i + 1) % angle_size]);
+        for(int j = 0; j < line_iter.count; j++, ++line_iter)
+        {
+            Point p = line_iter.pos();
+            contour_mask.at<uint8_t>(p + Point(1, 1)) = 255;
+        }
+    }
+    Point2f center_point = intersectionLines(angle_list[0], angle_list[2],
+                                             angle_list[1], angle_list[3]);
+    floodFill(contour_mask, center_point, 255, 0, Scalar(), Scalar(), FLOODFILL_FIXED_RANGE);
+
+    vector<Point> locations;
+    findNonZero(contour_mask, locations);
+
+    Mat fill_bin_barcode = bin_barcode.clone();
+    Mat qrcode_mask = Mat::zeros(bin_barcode.rows + 2, bin_barcode.cols + 2, CV_8UC1);
+    uint8_t value, mask_value;
+    for(size_t i = 0; i < locations.size(); i++)
+    {
+        value = bin_barcode.at<uint8_t>(locations[i]);
+        mask_value = qrcode_mask.at<uint8_t>(locations[i] + Point(1, 1));
+        if (value == 0 && mask_value == 0)
+        {
+            floodFill(fill_bin_barcode, qrcode_mask, locations[i], 255,
+                      0, Scalar(), Scalar(), FLOODFILL_MASK_ONLY);
+        }
+    }
+    Mat qrcode_mask_roi = qrcode_mask(Range(1, qrcode_mask.rows - 1), Range(1, qrcode_mask.cols - 1));
+    findNonZero(qrcode_mask_roi, qrcode_locations);
+}
+
+bool QRDecode::computeClosestPoints(const vector<Point> &result_integer_hull)
+{
+    CV_TRACE_FUNCTION();
+    double min_norm, max_norm = 0.0;
+    size_t idx_min = (size_t)-1;
+    for (size_t i = 0; i < original_points.size(); i++)
+    {
+        min_norm = std::numeric_limits<double>::max();
+
+        Point closest_pnt;
+        for (size_t j = 0; j < result_integer_hull.size(); j++)
+        {
+            Point integer_original_point = original_points[i];
+            double temp_norm = norm(integer_original_point - result_integer_hull[j]);
+            if (temp_norm < min_norm)
+            {
+                min_norm = temp_norm;
+                closest_pnt = result_integer_hull[j];
+                idx_min = j;
+            }
+        }
+        if (min_norm > max_norm)
+        {
+            max_norm = min_norm;
+            unstable_pair = std::pair<size_t,Point>(i, closest_pnt);
+        }
+        CV_Assert(idx_min != (size_t)-1);
+        closest_points.push_back(std::pair<size_t,Point>(idx_min, closest_pnt));
+    }
+
+    if (closest_points.size() != 4)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool QRDecode::computeSidesPoints(const vector<Point> &result_integer_hull)
+{
+    size_t num_closest_points = closest_points.size();
+    vector<Point> points;
+
+    for(size_t i = 0; i < num_closest_points; i++)
+    {
+        points.clear();
+        size_t start = closest_points[i].first,
+               end   = closest_points[(i + 1) % num_closest_points].first;
+        if (start < end)
+        {
+            points.insert(points.end(),
+                          result_integer_hull.begin() + start,
+                          result_integer_hull.begin() + end + 1);
+        }
+        else
+        {
+            points.insert(points.end(),
+                          result_integer_hull.begin() + start,
+                          result_integer_hull.end());
+            points.insert(points.end(),
+                          result_integer_hull.begin(),
+                          result_integer_hull.begin() + end + 1);
+        }
+        if (abs(result_integer_hull[start].x - result_integer_hull[end].x) >
+            abs(result_integer_hull[start].y - result_integer_hull[end].y))
+        {
+            if (points.front().x > points.back().x)
+            {
+                reverse(points.begin(), points.end());
+            }
+        }
+        else
+        {
+            if (points.front().y > points.back().y)
+            {
+                reverse(points.begin(), points.end());
+            }
+        }
+        if (points.empty())
+        {
+            return false;
+        }
+        sides_points.push_back(points);
+    }
+
+    return true;
+}
+
+vector<Point> QRDecode::getPointsNearUnstablePoint(const vector<Point> &side, int start, int end, int step)
+{
+    vector<Point> points;
+    Point p1, p2, p3;
+
+    double max_neighbour_angle = 1.0;
+    int index_max_angle = start + step;
+    bool enough_points = true;
+
+    if(side.size() < 3)
+    {
+        points.insert(points.end(), side.begin(), side.end());
+        return points;
+    }
+    const double cos_angle_threshold = -0.97;
+    for (int i = start + step; i != end; i+= step)
+    {
+        p1 = side[i + step];
+        if (norm(p1 - side[i])        < 5) { continue; }
+        p2 = side[i];
+        if (norm(p2 - side[i - step]) < 5) { continue; }
+        p3 = side[i - step];
+
+        double neighbour_angle = getCosVectors(p1, p2, p3);
+        neighbour_angle = floor(neighbour_angle*1000)/1000;
+
+        if ((neighbour_angle <= max_neighbour_angle) && (neighbour_angle < cos_angle_threshold))
+        {
+            max_neighbour_angle = neighbour_angle;
+            index_max_angle = i;
+        }
+        else if (i == end - step)
+        {
+            enough_points = false;
+            index_max_angle = i;
+        }
+    }
+
+    if (enough_points)
+    {
+        p1 = side[index_max_angle + step];
+        p2 = side[index_max_angle];
+        p3 = side[index_max_angle - step];
+
+        points.push_back(p1);
+        points.push_back(p2);
+        points.push_back(p3);
+    }
+    else
+    {
+        p1 = side[index_max_angle];
+        p2 = side[index_max_angle - step];
+
+        points.push_back(p1);
+        points.push_back(p2);
+    }
+
+    return points;
+}
+
+bool QRDecode::findAndAddStablePoint()
+{
+    size_t idx_unstable_point = unstable_pair.first;
+    Point unstable_point = unstable_pair.second;
+
+    vector<Point> current_side_points, next_side_points;
+    Point a1, a2, b1, b2;
+    int start_current, end_current, step_current, start_next, end_next, step_next;
+    vector<Point>::iterator it_a, it_b;
+
+    vector<Point> &current_side = sides_points[(idx_unstable_point + 3) % 4];
+    vector<Point> &next_side    = sides_points[idx_unstable_point];
+
+    if(current_side.size() < 2 || next_side.size() < 2)
+    {
+        return false;
+    }
+
+    if(arePointsNearest(unstable_point, current_side.front(), 3.0))
+    {
+        start_current = (int)current_side.size() - 1;
+        end_current = 0;
+        step_current = -1;
+        it_a = current_side.begin();
+    }
+    else if(arePointsNearest(unstable_point, current_side.back(), 3.0))
+    {
+        start_current = 0;
+        end_current = (int)current_side.size() - 1;
+        step_current = 1;
+        it_a = current_side.end() - 1;
+    }
+    else
+    {
+        return false;
+    }
+    if(arePointsNearest(unstable_point, next_side.front(), 3.0))
+    {
+        start_next = (int)next_side.size() - 1;
+        end_next = 0;
+        step_next = -1;
+        it_b = next_side.begin();
+    }
+    else if(arePointsNearest(unstable_point, next_side.back(), 3.0))
+    {
+        start_next = 0;
+        end_next = (int)next_side.size() - 1;
+        step_next = 1;
+        it_b = next_side.end() - 1;
+    }
+    else
+    {
+        return false;
+    }
+    current_side_points = getPointsNearUnstablePoint(current_side, start_current, end_current, step_current);
+    next_side_points    = getPointsNearUnstablePoint(next_side, start_next, end_next, step_next);
+
+    if (current_side_points.size() < 2 || next_side_points.size() < 2)
+    {
+        return false;
+    }
+
+    a1 = current_side_points[0];
+    a2 = current_side_points[1];
+
+    b1 = next_side_points[0];
+    b2 = next_side_points[1];
+
+    if(norm(a1 - b1) < 10 && next_side_points.size() > 2)
+    {
+        b1 = next_side_points[1];
+        b2 = next_side_points[2];
+    }
+
+    Point stable_point = intersectionLines(a1, a2, b1, b2);
+
+    const double max_side = std::max(bin_barcode.size().width, bin_barcode.size().height);
+    if ((abs(stable_point.x) > max_side) || (abs(stable_point.y) > max_side))
+    {
+        return false;
+    }
+
+    while (*it_a != a1)
+    {
+        it_a = current_side.erase(it_a);
+        if (it_a == current_side.end())
+        {
+            it_a -= step_current;
+        }
+        Point point_to_remove_from_current = *it_a;
+        if (point_to_remove_from_current.x > max_side || point_to_remove_from_current.y > max_side)
+        {
+            break;
+        }
+    }
+    while (*it_b != b1)
+    {
+        it_b = next_side.erase(it_b);
+        if (it_b == next_side.end())
+        {
+            it_b -= step_next;
+        }
+        Point point_to_remove_from_next = *it_b;
+        if (point_to_remove_from_next.x > max_side || point_to_remove_from_next.y > max_side)
+        {
+            break;
+        }
+    }
+
+    bool add_stable_point = true;
+
+    for (size_t i = 0; i < original_points.size(); i++)
+    {
+        if(arePointsNearest(stable_point, original_points[i], 3.0))
+        {
+            add_stable_point = false;
+            break;
+        }
+    }
+
+    if(add_stable_point)
+    {
+        current_side.insert(it_a, stable_point);
+        next_side.insert(it_b, stable_point);
+        closest_points[unstable_pair.first].second = stable_point;
+    }
+    else
+    {
+        stable_point = original_points[unstable_pair.first];
+        closest_points[unstable_pair.first].second = stable_point;
+        current_side.insert(it_a, stable_point);
+        next_side.insert(it_b, stable_point);
+    }
+
+    return true;
+}
+
+bool QRDecode::findIndexesCurvedSides()
+{
+    double max_dist_to_arc_side = 0.0;
+    size_t num_closest_points = closest_points.size();
+    int idx_curved_current = -1, idx_curved_opposite = -1;
+
+    for (size_t i = 0; i < num_closest_points; i++)
+    {
+        double dist_to_arc = 0.0;
+
+        Point arc_start = closest_points[i].second;
+        Point arc_end   = closest_points[(i + 1) % num_closest_points].second;
+
+        for (size_t j = 0; j < sides_points[i].size(); j++)
+        {
+            Point arc_point = sides_points[i][j];
+            double dist = distancePointToLine(arc_point, arc_start, arc_end);
+            dist_to_arc += dist;
+        }
+        dist_to_arc /= sides_points[i].size();
+
+        if (dist_to_arc > max_dist_to_arc_side)
+        {
+            max_dist_to_arc_side = dist_to_arc;
+            idx_curved_current = (int)i;
+            idx_curved_opposite = (int)(i + 2) % num_closest_points;
+        }
+    }
+    if (idx_curved_current == -1 || idx_curved_opposite == -1)
+    {
+        return false;
+    }
+
+    curved_indexes.push_back(idx_curved_current);
+    curved_indexes.push_back(idx_curved_opposite);
+
+    return true;
+}
+
+bool QRDecode::findIncompleteIndexesCurvedSides()
+{
+    int num_closest_points = (int)closest_points.size();
+
+    for (int i = 0; i < NUM_SIDES; i++)
+    {
+        int idx_side = curved_indexes[i];
+        int side_size = (int)sides_points[idx_side].size();
+
+        double max_norm = norm(closest_points[idx_side].second -
+                               closest_points[(idx_side + 1) % num_closest_points].second);
+        double real_max_norm = 0;
+
+        for (int j = 0; j < side_size - 1; j++)
+        {
+            double temp_norm = norm(sides_points[idx_side][j] -
+                                    sides_points[idx_side][j + 1]);
+            if (temp_norm > real_max_norm)
+            {
+                real_max_norm = temp_norm;
+            }
+        }
+        if (real_max_norm > (0.5 * max_norm))
+        {
+            curved_incomplete_indexes.push_back(curved_indexes[i]);
+        }
+
+    }
+
+    if (curved_incomplete_indexes.size() == 0)
+    {
+        return false;
+    }
+    return true;
+}
+
+Point QRDecode::findClosestZeroPoint(Point2f original_point)
+{
+    int orig_x = static_cast<int>(original_point.x);
+    int orig_y = static_cast<int>(original_point.y);
+    uint8_t value;
+    Point zero_point;
+
+    const int step = 2;
+    for (int i = orig_x - step; i >= 0 && i <= orig_x + step; i++)
+    {
+        for (int j = orig_y - step; j >= 0 && j <= orig_y + step; j++)
+        {
+            Point p(i, j);
+            value = bin_barcode.at<uint8_t>(p);
+            if (value == 0) zero_point = p;
+        }
+    }
+
+    return zero_point;
+}
+
+Mat QRDecode::getPatternsMask()
+{
+    Mat mask(bin_barcode.rows + 2, bin_barcode.cols + 2, CV_8UC1, Scalar(0));
+    Mat patterns_mask(bin_barcode.rows + 2, bin_barcode.cols + 2, CV_8UC1, Scalar(0));
+    Mat fill_bin_barcode = bin_barcode.clone();
+    for (size_t i = 0; i < original_points.size(); i++)
+    {
+        if (i == 2) continue;
+        Point p = findClosestZeroPoint(original_points[i]);
+        floodFill(fill_bin_barcode, mask, p, 255,
+                        0, Scalar(), Scalar(), FLOODFILL_MASK_ONLY);
+        patterns_mask += mask;
+    }
+    Mat mask_roi = patterns_mask(Range(1, bin_barcode.rows - 1), Range(1, bin_barcode.cols - 1));
+
+    return mask_roi;
+}
+
+bool QRDecode::findPatternsContours(vector<vector<Point> > &patterns_contours)
+{
+    Mat patterns_mask = getPatternsMask();
+    findContours(patterns_mask, patterns_contours, RETR_EXTERNAL, CHAIN_APPROX_NONE, Point(0, 0));
+    if (patterns_contours.size() != 3) {  return false; }
+    return true;
+}
+
+bool QRDecode::findPatternsVerticesPoints(vector<vector<Point> > &patterns_vertices_points)
+{
+    vector<vector<Point> > patterns_contours;
+    if(!findPatternsContours(patterns_contours))
+    {
+        return false;
+    }
+    const int num_vertices = 4;
+    for(size_t i = 0; i < patterns_contours.size(); i++)
+    {
+        vector<Point> convexhull_contours, new_convexhull_contours;
+        convexHull(patterns_contours[i], convexhull_contours);
+
+        size_t number_pnts_in_hull = convexhull_contours.size();
+        vector<std::pair<size_t, double> > cos_angles_in_hull;
+        vector<size_t> min_angle_pnts_indexes;
+
+        for(size_t j = 1; j < number_pnts_in_hull + 1; j++)
+        {
+            double cos_angle = getCosVectors(convexhull_contours[(j - 1) % number_pnts_in_hull],
+                                             convexhull_contours[ j      % number_pnts_in_hull],
+                                             convexhull_contours[(j + 1) % number_pnts_in_hull]);
+            cos_angles_in_hull.push_back(std::pair<size_t, double>(j, cos_angle));
+        }
+
+        sort(cos_angles_in_hull.begin(), cos_angles_in_hull.end(), sortPairDesc());
+
+        for (size_t j = 0; j < cos_angles_in_hull.size(); j++)
+        {
+            bool add_edge = true;
+            for(size_t k = 0; k < min_angle_pnts_indexes.size(); k++)
+            {
+                if(norm(convexhull_contours[cos_angles_in_hull[j].first % number_pnts_in_hull] -
+                        convexhull_contours[min_angle_pnts_indexes[k]   % number_pnts_in_hull]) < 3)
+                {
+                    add_edge = false;
+                }
+            }
+            if (add_edge)
+            {
+                min_angle_pnts_indexes.push_back(cos_angles_in_hull[j].first % number_pnts_in_hull);
+            }
+            if ((int)min_angle_pnts_indexes.size() == num_vertices) { break; }
+        }
+        sort(min_angle_pnts_indexes.begin(), min_angle_pnts_indexes.end());
+
+        vector<Point> contour_vertices_points;
+
+        for (size_t k = 0; k < min_angle_pnts_indexes.size(); k++)
+        {
+            contour_vertices_points.push_back(convexhull_contours[min_angle_pnts_indexes[k]]);
+        }
+        patterns_vertices_points.push_back(contour_vertices_points);
+    }
+    if (patterns_vertices_points.size() != 3)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool QRDecode::findTempPatternsAddingPoints(vector<std::pair<int, vector<Point> > > &temp_patterns_add_points)
+{
+    vector<vector<Point> >patterns_contours, patterns_vertices_points;
+    if(!findPatternsVerticesPoints(patterns_vertices_points))
+    {
+        return false;
+    }
+    if(!findPatternsContours(patterns_contours))
+    {
+        return false;
+    }
+
+    for (size_t i = 0; i < curved_incomplete_indexes.size(); i++)
+    {
+        int idx_curved_side = curved_incomplete_indexes[i];
+        Point close_transform_pnt_curr = original_points[idx_curved_side];
+        Point close_transform_pnt_next = original_points[(idx_curved_side + 1) % 4];
+
+        vector<size_t> patterns_indexes;
+
+        for (size_t j = 0; j < patterns_vertices_points.size(); j++)
+        {
+            for (size_t k = 0; k < patterns_vertices_points[j].size(); k++)
+            {
+                if (norm(close_transform_pnt_curr - patterns_vertices_points[j][k]) < 5)
+                {
+                    patterns_indexes.push_back(j);
+                    break;
+                }
+                if (norm(close_transform_pnt_next - patterns_vertices_points[j][k]) < 5)
+                {
+                    patterns_indexes.push_back(j);
+                    break;
+                }
+            }
+        }
+        for (size_t j = 0; j < patterns_indexes.size(); j++)
+        {
+            vector<Point> vertices = patterns_vertices_points[patterns_indexes[j]];
+            vector<std::pair<int, double> > vertices_dist_pair;
+            vector<Point> points;
+            for (size_t k = 0; k < vertices.size(); k++)
+            {
+                double dist_to_side = distancePointToLine(vertices[k], close_transform_pnt_curr,
+                                                                       close_transform_pnt_next);
+                vertices_dist_pair.push_back(std::pair<int, double>((int)k, dist_to_side));
+            }
+            if (vertices_dist_pair.size() == 0)
+            {
+                return false;
+            }
+            sort(vertices_dist_pair.begin(), vertices_dist_pair.end(), sortPairAsc());
+            Point p1, p2;
+            int index_p1_in_vertices = 0, index_p2_in_vertices = 0;
+            for (int k = 4; k > 0; k--)
+            {
+                if((vertices_dist_pair[0].first == k % 4) && (vertices_dist_pair[1].first == (k - 1) % 4))
+                {
+                    index_p1_in_vertices = vertices_dist_pair[0].first;
+                    index_p2_in_vertices = vertices_dist_pair[1].first;
+                }
+                else if((vertices_dist_pair[1].first == k % 4) && (vertices_dist_pair[0].first == (k - 1) % 4))
+                {
+                    index_p1_in_vertices = vertices_dist_pair[1].first;
+                    index_p2_in_vertices = vertices_dist_pair[0].first;
+                }
+            }
+            if (index_p1_in_vertices == index_p2_in_vertices) return false;
+
+            p1 = vertices[index_p1_in_vertices];
+            p2 = vertices[index_p2_in_vertices];
+
+            size_t index_p1_in_contour = 0, index_p2_in_contour = 0;
+            vector<Point> add_points = patterns_contours[patterns_indexes[j]];
+
+            for(size_t k = 0; k < add_points.size(); k++)
+            {
+                if (add_points[k] == p1)
+                {
+                    index_p1_in_contour = k;
+                }
+                if (add_points[k] == p2)
+                {
+                    index_p2_in_contour = k;
+                }
+            }
+
+            if (index_p1_in_contour > index_p2_in_contour)
+            {
+                for (size_t k = index_p1_in_contour; k < add_points.size(); k++)
+                {
+                    points.push_back(add_points[k]);
+                }
+                for (size_t k = 0; k <= index_p2_in_contour; k++)
+                {
+                    points.push_back(add_points[k]);
+                }
+            }
+            else if (index_p1_in_contour < index_p2_in_contour)
+            {
+                for (size_t k = index_p1_in_contour; k <= index_p2_in_contour; k++)
+                {
+                    points.push_back(add_points[k]);
+                }
+            }
+            else
+            {
+                return false;
+            }
+            if (abs(p1.x - p2.x) > abs(p1.y - p2.y))
+            {
+                sort(points.begin(), points.end(), sortPointsByX());
+            }
+            else
+            {
+                sort(points.begin(), points.end(), sortPointsByY());
+            }
+
+            temp_patterns_add_points.push_back(std::pair<int, vector<Point> >(idx_curved_side,points));
+        }
+    }
+
+    return true;
+}
+
+bool QRDecode::computePatternsAddingPoints(std::map<int, vector<Point> > &patterns_add_points)
+{
+    vector<std::pair<int, vector<Point> > > temp_patterns_add_points;
+    if(!findTempPatternsAddingPoints(temp_patterns_add_points))
+    {
+        return false;
+    }
+
+    const int num_points_in_pattern = 3;
+    for(size_t i = 0; i < temp_patterns_add_points.size(); i++)
+    {
+        int idx_side = temp_patterns_add_points[i].first;
+        int size = (int)temp_patterns_add_points[i].second.size();
+
+        float step = static_cast<float>(size) / num_points_in_pattern;
+        vector<Point> temp_points;
+        for (int j = 0; j < num_points_in_pattern; j++)
+        {
+            float val = j * step;
+            int idx = cvRound(val) >= size ? size - 1 : cvRound(val);
+            temp_points.push_back(temp_patterns_add_points[i].second[idx]);
+        }
+        temp_points.push_back(temp_patterns_add_points[i].second.back());
+        if(patterns_add_points.count(idx_side) == 1)
+        {
+            patterns_add_points[idx_side].insert(patterns_add_points[idx_side].end(),
+                                                temp_points.begin(), temp_points.end());
+        }
+        patterns_add_points.insert(std::pair<int, vector<Point> >(idx_side, temp_points));
+
+    }
+    if (patterns_add_points.size() == 0)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool QRDecode::addPointsToSides()
+{
+    if(!computePatternsAddingPoints(complete_curved_sides))
+    {
+        return false;
+    }
+    std::map<int, vector<Point> >::iterator it;
+    double mean_step = 0.0;
+    size_t num_points_at_side = 0;
+    for (it = complete_curved_sides.begin(); it != complete_curved_sides.end(); ++it)
+    {
+        int count = -1;
+        const size_t num_points_at_pattern = it->second.size();
+        for(size_t j = 0; j < num_points_at_pattern - 1; j++, count++)
+        {
+            if (count == 3) continue;
+            double temp_norm = norm(it->second[j] -
+                                    it->second[j + 1]);
+            mean_step += temp_norm;
+        }
+        num_points_at_side += num_points_at_pattern;
+    }
+    if (num_points_at_side == 0)
+    {
+        return false;
+    }
+    mean_step /= num_points_at_side;
+
+    const size_t num_incomplete_sides = curved_incomplete_indexes.size();
+    for (size_t i = 0; i < num_incomplete_sides; i++)
+    {
+        int idx = curved_incomplete_indexes[i];
+        vector<int> sides_points_indexes;
+
+        const int num_points_at_side_to_add = (int)sides_points[idx].size();
+        for (int j = 0; j < num_points_at_side_to_add; j++)
+        {
+            bool not_too_close = true;
+            const size_t num_points_at_side_exist = complete_curved_sides[idx].size();
+            for (size_t k = 0; k < num_points_at_side_exist; k++)
+            {
+                double temp_norm = norm(sides_points[idx][j] - complete_curved_sides[idx][k]);
+                if (temp_norm < mean_step)
+                {
+                    not_too_close = false;
+                    break;
+                }
+            }
+            if (not_too_close)
+            {
+                sides_points_indexes.push_back(j);
+            }
+        }
+
+        for (size_t j = 0; j < sides_points_indexes.size(); j++)
+        {
+            bool not_equal = true;
+            for (size_t k = 0; k < complete_curved_sides[idx].size(); k++)
+            {
+                if (sides_points[idx][sides_points_indexes[j]] ==
+                    complete_curved_sides[idx][k])
+                {
+                    not_equal = false;
+                }
+            }
+            if (not_equal)
+            {
+                complete_curved_sides[idx].push_back(sides_points[idx][sides_points_indexes[j]]);
+            }
+        }
+    }
+
+    return true;
+}
+
+void QRDecode::completeAndSortSides()
+{
+    if (complete_curved_sides.size() < 2)
+    {
+        for (int i = 0; i < NUM_SIDES; i++)
+        {
+            if(complete_curved_sides.count(curved_indexes[i]) == 0)
+            {
+                int idx_second_cur_side = curved_indexes[i];
+                complete_curved_sides.insert(std::pair<int,vector<Point> >(idx_second_cur_side, sides_points[idx_second_cur_side]));
+            }
+        }
+    }
+    std::map<int,vector<Point> >::iterator it;
+    for (it = complete_curved_sides.begin(); it != complete_curved_sides.end(); ++it)
+    {
+        Point p1 = it->second.front();
+        Point p2 = it->second.back();
+        if (abs(p1.x - p2.x) > abs(p1.y - p2.y))
+        {
+            sort(it->second.begin(), it->second.end(), sortPointsByX());
+        }
+        else
+        {
+            sort(it->second.begin(), it->second.end(), sortPointsByY());
+        }
+    }
+}
+
+vector<vector<float> > QRDecode::computeSpline(const vector<int> &x_arr, const vector<int> &y_arr)
+{
+    const int n = (int)x_arr.size();
+    vector<float> a, b(n - 1), d(n - 1), h(n - 1), alpha(n - 1), c(n), l(n), mu(n), z(n);
+
+    for (int i = 0; i < (int)y_arr.size(); i++)
+    {
+        a.push_back(static_cast<float>(x_arr[i]));
+    }
+    for (int i = 0; i < n - 1; i++)
+    {
+        h[i] = static_cast<float>(y_arr[i + 1] - y_arr[i]);
+    }
+    for (int i = 1; i < n - 1; i++)
+    {
+        alpha[i] = 3 / h[i] * (a[i + 1] - a[i]) - 3 / (h[i - 1]) * (a[i] - a[i - 1]);
+    }
+    l[0] = 1;
+    mu[0] = 0;
+    z[0] = 0;
+
+    for (int i = 1; i < n - 1; i++)
+    {
+        l[i] = 2 * (y_arr[i + 1] - y_arr[i - 1]) - h[i - 1] * mu[i - 1];
+        mu[i] = h[i] / l[i];
+        z[i] = (alpha[i] - h[i - 1] * z[i - 1]) / l[i];
+    }
+    l[n - 1] = 1;
+    z[n - 1] = 0;
+    c[n - 1] = 0;
+
+    for(int j = n - 2; j >= 0; j--)
+    {
+        c[j] = z[j] - mu[j] * c[j + 1];
+        b[j] = (a[j + 1] - a[j]) / h[j] - (h[j] * (c[j + 1] + 2 * c[j])) / 3;
+        d[j] = (c[j + 1] - c[j]) / (3 * h[j]);
+    }
+
+    vector<vector<float> > S(n - 1);
+    for (int i = 0; i < n - 1; i++)
+    {
+        S[i].push_back(a[i]);
+        S[i].push_back(b[i]);
+        S[i].push_back(c[i]);
+        S[i].push_back(d[i]);
+    }
+
+    return S;
+}
+
+bool QRDecode::createSpline(vector<vector<Point2f> > &spline_lines)
+{
+    int start, end;
+    vector<vector<float> > S;
+
+    for (int idx = 0; idx < NUM_SIDES; idx++)
+    {
+        int idx_curved_side = curved_indexes[idx];
+
+        vector<Point> spline_points = complete_curved_sides.find(idx_curved_side)->second;
+        vector<int> x_arr, y_arr;
+
+        for (size_t j = 0; j < spline_points.size(); j++)
+        {
+            x_arr.push_back(cvRound(spline_points[j].x));
+            y_arr.push_back(cvRound(spline_points[j].y));
+        }
+
+        bool horizontal_order = abs(x_arr.front() - x_arr.back()) > abs(y_arr.front() - y_arr.back());
+        vector<int>& second_arr = horizontal_order ? x_arr : y_arr;
+        vector<int>& first_arr  = horizontal_order ? y_arr : x_arr;
+
+        S = computeSpline(first_arr, second_arr);
+
+        int closest_point_first  = horizontal_order ? closest_points[idx_curved_side].second.x
+                                                    : closest_points[idx_curved_side].second.y;
+        int closest_point_second = horizontal_order ? closest_points[(idx_curved_side + 1) % 4].second.x
+                                                    : closest_points[(idx_curved_side + 1) % 4].second.y;
+
+        start = idx_curved_side;
+        end = (idx_curved_side + 1) % 4;
+        if(closest_point_first > closest_point_second)
+        {
+            start = (idx_curved_side + 1) % 4;
+            end = idx_curved_side;
+        }
+
+        int closest_point_start = horizontal_order ? closest_points[start].second.x : closest_points[start].second.y;
+        int closest_point_end   = horizontal_order ? closest_points[end].second.x   : closest_points[end].second.y;
+
+        for (int index = closest_point_start; index <= closest_point_end; index++)
+        {
+            if (index == second_arr.front())
+            {
+                spline_lines[idx].push_back(closest_points[start].second);
+            }
+            for (size_t i = 0; i < second_arr.size() - 1; i++)
+            {
+                if ((index > second_arr[i]) && (index <= second_arr[i + 1]))
+                {
+                    float val = S[i][0] + S[i][1] * (index - second_arr[i]) + S[i][2] * (index - second_arr[i]) * (index - second_arr[i])
+                                                                            + S[i][3] * (index - second_arr[i]) * (index - second_arr[i]) * (index - second_arr[i]);
+                    spline_lines[idx].push_back(horizontal_order ? Point2f(static_cast<float>(index), val) : Point2f(val, static_cast<float>(index)));
+                }
+            }
+        }
+    }
+    return true;
+}
+
+bool QRDecode::divideIntoEvenSegments(vector<vector<Point2f> > &segments_points)
+{
+    vector<vector<Point2f> > spline_lines(NUM_SIDES);
+    if (!createSpline(spline_lines))
+    {
+        return false;
+    }
+    float mean_num_points_in_line = 0.0;
+    for (int i = 0; i < NUM_SIDES; i++)
+    {
+        mean_num_points_in_line += spline_lines[i].size();
+    }
+    mean_num_points_in_line /= NUM_SIDES;
+    const int min_num_points = 1, max_num_points = cvRound(mean_num_points_in_line / 2.0);
+    float linear_threshold = 0.5f;
+    for (int num = min_num_points; num < max_num_points; num++)
+    {
+        for (int i = 0; i < NUM_SIDES; i++)
+        {
+            segments_points[i].clear();
+
+            int size = (int)spline_lines[i].size();
+            float step = static_cast<float>(size) / num;
+            for (int j = 0; j < num; j++)
+            {
+                float val = j * step;
+                int idx = cvRound(val) >= size ? size - 1 : cvRound(val);
+                segments_points[i].push_back(spline_lines[i][idx]);
+            }
+            segments_points[i].push_back(spline_lines[i].back());
+        }
+        float mean_of_two_sides = 0.0;
+        for (int i = 0; i < NUM_SIDES; i++)
+        {
+            float mean_dist_in_segment = 0.0;
+            for (size_t j = 0; j < segments_points[i].size() - 1; j++)
+            {
+                Point2f segment_start = segments_points[i][j];
+                Point2f segment_end   = segments_points[i][j + 1];
+                vector<Point2f>::iterator it_start, it_end, it;
+                it_start = find(spline_lines[i].begin(), spline_lines[i].end(), segment_start);
+                it_end   = find(spline_lines[i].begin(), spline_lines[i].end(), segment_end);
+                float max_dist_to_line = 0.0;
+                for (it = it_start; it != it_end; it++)
+                {
+                    float temp_dist = distancePointToLine(*it, segment_start, segment_end);
+                    if (temp_dist > max_dist_to_line)
+                    {
+                        max_dist_to_line = temp_dist;
+                    }
+                }
+                mean_dist_in_segment += max_dist_to_line;
+            }
+            mean_dist_in_segment /= segments_points[i].size();
+            mean_of_two_sides    += mean_dist_in_segment;
+        }
+        mean_of_two_sides /= NUM_SIDES;
+        if (mean_of_two_sides < linear_threshold)
+        {
+            break;
+        }
+    }
+
+    return true;
+}
+
+bool QRDecode::straightenQRCodeInParts()
+{
+    vector<vector<Point2f> > segments_points(NUM_SIDES);
+    if (!divideIntoEvenSegments(segments_points))
+    {
+        return false;
+    }
+    vector<Point2f> current_curved_side, opposite_curved_side;
+
+    for (int i = 0; i < NUM_SIDES; i++)
+    {
+        Point2f temp_point_start = segments_points[i].front();
+        Point2f temp_point_end   = segments_points[i].back();
+        bool horizontal_order = (abs(temp_point_start.x - temp_point_end.x) >
+                                 abs(temp_point_start.y - temp_point_end.y));
+        float compare_point_current  = horizontal_order ? segments_points[i].front().y
+                                                        : segments_points[(i + 1) % 2].front().x;
+        float compare_point_opposite = horizontal_order ? segments_points[(i + 1) % 2].front().y
+                                                        : segments_points[i].front().x;
+
+        if (compare_point_current > compare_point_opposite)
+        {
+            current_curved_side  = segments_points[i];
+            opposite_curved_side = segments_points[(i + 1) % 2];
+        }
+    }
+    if (current_curved_side.size() != opposite_curved_side.size())
+    {
+        return false;
+    }
+    size_t number_pnts_to_cut = current_curved_side.size();
+    if (number_pnts_to_cut == 0)
+    {
+        return false;
+    }
+    float perspective_curved_size = 251.0;
+    const Size temporary_size(cvRound(perspective_curved_size), cvRound(perspective_curved_size));
+
+    float dist = perspective_curved_size / (number_pnts_to_cut - 1);
+    Mat perspective_result = Mat::zeros(temporary_size, CV_8UC1);
+    vector<Point2f> curved_parts_points;
+
+    float start_cut = 0.0;
+    vector<Point2f> temp_closest_points(4);
+
+    for (size_t i = 1; i < number_pnts_to_cut; i++)
+    {
+        curved_parts_points.clear();
+        Mat test_mask = Mat::zeros(bin_barcode.size(), CV_8UC1);
+
+        Point2f start_point = current_curved_side[i];
+        Point2f prev_start_point = current_curved_side[i - 1];
+        Point2f finish_point = opposite_curved_side[i];
+        Point2f prev_finish_point = opposite_curved_side[i - 1];
+
+        for (size_t j = 0; j < qrcode_locations.size(); j++)
+        {
+            if ((pointPosition(start_point, finish_point, qrcode_locations[j]) >= 0) &&
+                (pointPosition(prev_start_point, prev_finish_point, qrcode_locations[j]) <= 0))
+            {
+                test_mask.at<uint8_t>(qrcode_locations[j]) = 255;
+            }
+        }
+
+        vector<Point2f> perspective_points;
+
+        perspective_points.push_back(Point2f(0.0, start_cut));
+        perspective_points.push_back(Point2f(perspective_curved_size, start_cut));
+
+        perspective_points.push_back(Point2f(perspective_curved_size, start_cut + dist));
+        perspective_points.push_back(Point2f(0.0, start_cut+dist));
+
+        perspective_points.push_back(Point2f(perspective_curved_size * 0.5f, start_cut + dist * 0.5f));
+
+        if (i == 1)
+        {
+            for (size_t j = 0; j < closest_points.size(); j++)
+            {
+                if (arePointsNearest(closest_points[j].second, prev_start_point, 3.0))
+                {
+                    temp_closest_points[j] = perspective_points[0];
+                }
+                else if (arePointsNearest(closest_points[j].second, prev_finish_point, 3.0))
+                {
+                    temp_closest_points[j] = perspective_points[1];
+                }
+            }
+        }
+        if (i == number_pnts_to_cut - 1)
+        {
+            for (size_t j = 0; j < closest_points.size(); j++)
+            {
+                if (arePointsNearest(closest_points[j].second, finish_point, 3.0))
+                {
+                    temp_closest_points[j] = perspective_points[2];
+                }
+                else if (arePointsNearest(closest_points[j].second, start_point, 3.0))
+                {
+                    temp_closest_points[j] = perspective_points[3];
+                }
+            }
+        }
+        start_cut += dist;
+
+        curved_parts_points.push_back(prev_start_point);
+        curved_parts_points.push_back(prev_finish_point);
+        curved_parts_points.push_back(finish_point);
+        curved_parts_points.push_back(start_point);
+
+        Point2f center_point = intersectionLines(curved_parts_points[0], curved_parts_points[2],
+                                                 curved_parts_points[1], curved_parts_points[3]);
+        if (cvIsNaN(center_point.x) || cvIsNaN(center_point.y))
+            return false;
+
+        vector<Point2f> pts = curved_parts_points;
+        pts.push_back(center_point);
+
+        Mat H = findHomography(pts, perspective_points);
+        Mat temp_intermediate(temporary_size, CV_8UC1);
+        warpPerspective(test_mask, temp_intermediate, H, temporary_size, INTER_NEAREST);
+        perspective_result += temp_intermediate;
+
+    }
+    Mat white_mask = Mat(temporary_size, CV_8UC1, Scalar(255));
+    Mat inversion = white_mask - perspective_result;
+    Mat temp_result;
+
+    original_curved_points = temp_closest_points;
+
+    Point2f original_center_point = intersectionLines(original_curved_points[0], original_curved_points[2],
+                                                      original_curved_points[1], original_curved_points[3]);
+
+    original_curved_points.push_back(original_center_point);
+
+    for (size_t i = 0; i < original_curved_points.size(); i++)
+    {
+        if (cvIsNaN(original_curved_points[i].x) || cvIsNaN(original_curved_points[i].y))
+            return false;
+    }
+
+    vector<Point2f> perspective_straight_points;
+    perspective_straight_points.push_back(Point2f(0.f, 0.f));
+    perspective_straight_points.push_back(Point2f(perspective_curved_size, 0.f));
+
+    perspective_straight_points.push_back(Point2f(perspective_curved_size, perspective_curved_size));
+    perspective_straight_points.push_back(Point2f(0.f, perspective_curved_size));
+
+    perspective_straight_points.push_back(Point2f(perspective_curved_size * 0.5f, perspective_curved_size * 0.5f));
+
+    Mat H = findHomography(original_curved_points, perspective_straight_points);
+    warpPerspective(inversion, temp_result, H, temporary_size, INTER_NEAREST, BORDER_REPLICATE);
+
+    no_border_intermediate = temp_result(Range(1, temp_result.rows), Range(1, temp_result.cols));
+    const int border = cvRound(0.1 * perspective_curved_size);
+    const int borderType = BORDER_CONSTANT;
+    copyMakeBorder(no_border_intermediate, curved_to_straight, border, border, border, border, borderType, Scalar(255));
+    intermediate = curved_to_straight;
+
+    return true;
+}
+
+bool QRDecode::preparingCurvedQRCodes()
+{
+    vector<Point> result_integer_hull;
+    getPointsInsideQRCode(original_points);
+    if (qrcode_locations.size() == 0)
+        return false;
+    convexHull(qrcode_locations, result_integer_hull);
+    if (!computeClosestPoints(result_integer_hull))
+        return false;
+    if (!computeSidesPoints(result_integer_hull))
+        return false;
+    if (!findAndAddStablePoint())
+        return false;
+    if (!findIndexesCurvedSides())
+        return false;
+    if (findIncompleteIndexesCurvedSides())
+    {
+        if(!addPointsToSides())
+            return false;
+    }
+    completeAndSortSides();
+    if (!straightenQRCodeInParts())
+        return false;
+
+    return true;
+}
+
 bool QRDecode::updatePerspective()
 {
     CV_TRACE_FUNCTION();
-    const Point2f centerPt = QRDetect::intersectionLines(original_points[0], original_points[2],
-                                                         original_points[1], original_points[3]);
+    const Point2f centerPt = intersectionLines(original_points[0], original_points[2],
+                                               original_points[1], original_points[3]);
     if (cvIsNaN(centerPt.x) || cvIsNaN(centerPt.y))
         return false;
 
@@ -1112,7 +2339,7 @@ bool QRDecode::samplingForVersion()
     CV_TRACE_FUNCTION();
     const double multiplyingFactor = (version < 3)  ? 1 :
                                      (version == 3) ? 1.5 :
-                                     version * (5 + version - 4);
+                                     version * (version + 1);
     const Size newFactorSize(
                   cvRound(no_border_intermediate.size().width  * multiplyingFactor),
                   cvRound(no_border_intermediate.size().height * multiplyingFactor));
@@ -1197,7 +2424,7 @@ bool QRDecode::decodingProcess()
 
 }
 
-bool QRDecode::fullDecodingProcess()
+bool QRDecode::straightDecodingProcess()
 {
 #ifdef HAVE_QUIRC
     if (!updatePerspective())  { return false; }
@@ -1211,6 +2438,20 @@ bool QRDecode::fullDecodingProcess()
 #endif
 }
 
+bool QRDecode::curvedDecodingProcess()
+{
+#ifdef HAVE_QUIRC
+    if (!preparingCurvedQRCodes()) { return false; }
+    if (!versionDefinition())  { return false; }
+    if (!samplingForVersion()) { return false; }
+    if (!decodingProcess())    { return false; }
+    return true;
+#else
+    std::cout << "Library QUIRC is not linked. No decoding is performed. Take it to the OpenCV repository." << std::endl;
+    return false;
+#endif
+}
+
 std::string QRCodeDetector::decode(InputArray in, InputArray points,
                                    OutputArray straight_qrcode)
 {
@@ -1225,7 +2466,35 @@ std::string QRCodeDetector::decode(InputArray in, InputArray points,
 
     QRDecode qrdec;
     qrdec.init(inarr, src_points);
-    bool ok = qrdec.fullDecodingProcess();
+    bool ok = qrdec.straightDecodingProcess();
+
+    std::string decoded_info = qrdec.getDecodeInformation();
+
+    if (ok && straight_qrcode.needed())
+    {
+        qrdec.getStraightBarcode().convertTo(straight_qrcode,
+                                             straight_qrcode.fixedType() ?
+                                             straight_qrcode.type() : CV_32FC2);
+    }
+
+    return ok ? decoded_info : std::string();
+}
+
+cv::String QRCodeDetector::decodeCurved(InputArray in, InputArray points,
+                                        OutputArray straight_qrcode)
+{
+    Mat inarr;
+    if (!checkQRInputImage(in, inarr))
+        return std::string();
+
+    vector<Point2f> src_points;
+    points.copyTo(src_points);
+    CV_Assert(src_points.size() == 4);
+    CV_CheckGT(contourArea(src_points), 0.0, "Invalid QR code source points");
+
+    QRDecode qrdec;
+    qrdec.init(inarr, src_points);
+    bool ok = qrdec.curvedDecodingProcess();
 
     std::string decoded_info = qrdec.getDecodeInformation();
 
@@ -1262,6 +2531,28 @@ std::string QRCodeDetector::detectAndDecode(InputArray in,
     return decoded_info;
 }
 
+std::string QRCodeDetector::detectAndDecodeCurved(InputArray in,
+                                                  OutputArray points_,
+                                                  OutputArray straight_qrcode)
+{
+    Mat inarr;
+    if (!checkQRInputImage(in, inarr))
+    {
+        points_.release();
+        return std::string();
+    }
+
+    vector<Point2f> points;
+    bool ok = detect(inarr, points);
+    if (!ok)
+    {
+        points_.release();
+        return std::string();
+    }
+    updatePointsResult(points_, points);
+    std::string decoded_info = decodeCurved(inarr, points, straight_qrcode);
+    return decoded_info;
+}
 
 class QRDetectMulti : public QRDetect
 {
@@ -1495,7 +2786,6 @@ void QRDetectMulti::fixationPoints(vector<Point2f> &local_point)
                 Point2f(static_cast<float>(bin_barcode_temp.cols - 1),
                         static_cast<float>(bin_barcode_temp.rows - 1))));
 
-
         vector<Point2f> list_area_pnt;
         list_area_pnt.push_back(current_point);
 
@@ -2226,7 +3516,7 @@ public:
         for (int i = range.start; i < range.end; i++)
         {
             qrdec[i].init(inarr, src_points[i]);
-            bool ok = qrdec[i].fullDecodingProcess();
+            bool ok = qrdec[i].straightDecodingProcess();
             if (ok)
             {
                 decoded_info[i] = qrdec[i].getDecodeInformation();
@@ -2246,7 +3536,7 @@ public:
                     src_points[i][j] /= static_cast<float>(coeff_expansion);
                 }
                 qrdec[i].init(inarr2, src_points[i]);
-                ok = qrdec[i].fullDecodingProcess();
+                ok = qrdec[i].straightDecodingProcess();
                 if (ok)
                 {
                     decoded_info[i] = qrdec[i].getDecodeInformation();
diff --git a/modules/objdetect/test/test_qrcode.cpp b/modules/objdetect/test/test_qrcode.cpp
index a716c837ee..c26cd8a4f2 100644
--- a/modules/objdetect/test/test_qrcode.cpp
+++ b/modules/objdetect/test/test_qrcode.cpp
@@ -21,6 +21,9 @@ std::string qrcode_images_close[] = {
 std::string qrcode_images_monitor[] = {
   "monitor_1.png", "monitor_2.png", "monitor_3.png", "monitor_4.png", "monitor_5.png"
 };
+std::string qrcode_images_curved[] = {
+  "curved_1.jpg", "curved_2.jpg", "curved_3.jpg", "curved_4.jpg", "curved_5.jpg", "curved_6.jpg", "curved_7.jpg", "curved_8.jpg"
+};
 std::string qrcode_images_multiple[] = {
   "2_qrcodes.png", "3_close_qrcodes.png", "3_qrcodes.png", "4_qrcodes.png",
   "5_qrcodes.png", "6_qrcodes.png", "7_qrcodes.png", "8_close_qrcodes.png"
@@ -137,7 +140,38 @@ TEST(Objdetect_QRCode_Monitor, generate_test_data)
     file_config << "]";
     file_config.release();
 }
+TEST(Objdetect_QRCode_Curved, generate_test_data)
+{
+    const std::string root = "qrcode/curved/";
+    const std::string dataset_config = findDataFile(root + "dataset_config.json");
+    FileStorage file_config(dataset_config, FileStorage::WRITE);
 
+    file_config << "test_images" << "[";
+    size_t images_count = sizeof(qrcode_images_curved) / sizeof(qrcode_images_curved[0]);
+    for (size_t i = 0; i < images_count; i++)
+    {
+        file_config << "{:" << "image_name" << qrcode_images_curved[i];
+        std::string image_path = findDataFile(root + qrcode_images_curved[i]);
+        std::vector<Point> corners;
+        Mat src = imread(image_path, IMREAD_GRAYSCALE), straight_barcode;
+        std::string decoded_info;
+        ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
+        EXPECT_TRUE(detectQRCode(src, corners));
+#ifdef HAVE_QUIRC
+        EXPECT_TRUE(decodeCurvedQRCode(src, corners, decoded_info, straight_barcode));
+#endif
+        file_config << "x" << "[:";
+        for (size_t j = 0; j < corners.size(); j++) { file_config << corners[j].x; }
+        file_config << "]";
+        file_config << "y" << "[:";
+        for (size_t j = 0; j < corners.size(); j++) { file_config << corners[j].y; }
+        file_config << "]";
+        file_config << "info" << decoded_info;
+        file_config << "}";
+    }
+    file_config << "]";
+    file_config.release();
+}
 TEST(Objdetect_QRCode_Multi, generate_test_data)
 {
     const std::string root = "qrcode/multiple/";
@@ -390,6 +424,66 @@ TEST_P(Objdetect_QRCode_Monitor, regression)
     }
 }
 
+typedef testing::TestWithParam< std::string > Objdetect_QRCode_Curved;
+TEST_P(Objdetect_QRCode_Curved, regression)
+{
+    const std::string name_current_image = GetParam();
+    const std::string root = "qrcode/curved/";
+    const int pixels_error = 3;
+
+    std::string image_path = findDataFile(root + name_current_image);
+    Mat src = imread(image_path, IMREAD_GRAYSCALE), straight_barcode;
+    ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
+
+    std::vector<Point> corners;
+    std::string decoded_info;
+    QRCodeDetector qrcode;
+#ifdef HAVE_QUIRC
+    decoded_info = qrcode.detectAndDecodeCurved(src, corners, straight_barcode);
+    ASSERT_FALSE(corners.empty());
+    ASSERT_FALSE(decoded_info.empty());
+#else
+    ASSERT_TRUE(qrcode.detect(src, corners));
+#endif
+
+    const std::string dataset_config = findDataFile(root + "dataset_config.json");
+    FileStorage file_config(dataset_config, FileStorage::READ);
+    ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config;
+    {
+        FileNode images_list = file_config["test_images"];
+        size_t images_count = static_cast<size_t>(images_list.size());
+        ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config;
+
+        for (size_t index = 0; index < images_count; index++)
+        {
+            FileNode config = images_list[(int)index];
+            std::string name_test_image = config["image_name"];
+            if (name_test_image == name_current_image)
+            {
+                for (int i = 0; i < 4; i++)
+                {
+                    int x = config["x"][i];
+                    int y = config["y"][i];
+                    EXPECT_NEAR(x, corners[i].x, pixels_error);
+                    EXPECT_NEAR(y, corners[i].y, pixels_error);
+                }
+
+#ifdef HAVE_QUIRC
+                std::string original_info = config["info"];
+                EXPECT_EQ(decoded_info, original_info);
+#endif
+
+                return; // done
+            }
+        }
+        std::cerr
+            << "Not found results for '" << name_current_image
+            << "' image in config file:" << dataset_config << std::endl
+            << "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data."
+            << std::endl;
+    }
+}
+
 typedef testing::TestWithParam < std::string > Objdetect_QRCode_Multi;
 TEST_P(Objdetect_QRCode_Multi, regression)
 {
@@ -478,6 +572,7 @@ TEST_P(Objdetect_QRCode_Multi, regression)
 INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode, testing::ValuesIn(qrcode_images_name));
 INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Close, testing::ValuesIn(qrcode_images_close));
 INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Monitor, testing::ValuesIn(qrcode_images_monitor));
+INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Curved, testing::ValuesIn(qrcode_images_curved));
 INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Multi, testing::ValuesIn(qrcode_images_multiple));
 
 TEST(Objdetect_QRCode_decodeMulti, decode_regression_16491)
diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py
index 233587c9cb..243442cbdd 100755
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@@ -201,7 +201,8 @@ simple_argtype_mapping = {
     "int": ArgTypeInfo("int", FormatStrings.int, "0", True),
     "float": ArgTypeInfo("float", FormatStrings.float, "0.f", True),
     "double": ArgTypeInfo("double", FormatStrings.double, "0", True),
-    "c_string": ArgTypeInfo("char*", FormatStrings.string, '(char*)""')
+    "c_string": ArgTypeInfo("char*", FormatStrings.string, '(char*)""'),
+    "string": ArgTypeInfo("std::string", FormatStrings.object, None, True),
 }
 
 
diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py
index a486e0b71a..897465010c 100755
--- a/modules/python/src2/hdr_parser.py
+++ b/modules/python/src2/hdr_parser.py
@@ -663,6 +663,10 @@ class CppHeaderParser(object):
         stack_top = self.block_stack[-1]
         context = stack_top[self.BLOCK_TYPE]
 
+        if stmt.startswith('inline namespace'):
+            # emulate anonymous namespace
+            return "namespace", "", True, None
+
         stmt_type = ""
         if end_token == "{":
             stmt_type = "block"
@@ -958,7 +962,9 @@ class CppHeaderParser(object):
                         else:
                             decls.append(decl)
 
-                            if self._generate_gpumat_decls and "cv.cuda" in decl[0]:
+                            if self._generate_gpumat_decls and ("cv.cuda" in decl[0] or decl[0] in [
+                                "cv.imshow", # https://github.com/opencv/opencv/issues/18553
+                            ]):
                                 # If function takes as one of arguments Mat or vector<Mat> - we want to create the
                                 # same declaration working with GpuMat
                                 args = decl[3]
diff --git a/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp b/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp
index 2b76d0923d..074c9b6dfb 100644
--- a/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp
@@ -115,7 +115,7 @@ public:
     CV_WRAP GainCompensator()
             : GainCompensator(1) {}
     CV_WRAP GainCompensator(int nr_feeds)
-            : nr_feeds_(nr_feeds) {}
+            : nr_feeds_(nr_feeds), similarity_threshold_(1) {}
     void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
               const std::vector<std::pair<UMat,uchar> > &masks) CV_OVERRIDE;
     void singleFeed(const std::vector<Point> &corners, const std::vector<UMat> &images,
@@ -125,11 +125,18 @@ public:
     CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE ;
     CV_WRAP void setNrFeeds(int nr_feeds) { nr_feeds_ = nr_feeds; }
     CV_WRAP int getNrFeeds() { return nr_feeds_; }
+    CV_WRAP void setSimilarityThreshold(double similarity_threshold) { similarity_threshold_ = similarity_threshold; }
+    CV_WRAP double getSimilarityThreshold() const { return similarity_threshold_; }
+    void prepareSimilarityMask(const std::vector<Point> &corners, const std::vector<UMat> &images);
     std::vector<double> gains() const;
 
 private:
+    UMat buildSimilarityMask(InputArray src_array1, InputArray src_array2);
+
     Mat_<double> gains_;
     int nr_feeds_;
+    double similarity_threshold_;
+    std::vector<UMat> similarities_;
 };
 
 /** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image
@@ -138,7 +145,8 @@ intensities on each channel independently.
 class CV_EXPORTS_W ChannelsCompensator : public ExposureCompensator
 {
 public:
-    CV_WRAP ChannelsCompensator(int nr_feeds=1) : nr_feeds_(nr_feeds) {}
+    CV_WRAP ChannelsCompensator(int nr_feeds=1)
+        : nr_feeds_(nr_feeds), similarity_threshold_(1) {}
     void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
               const std::vector<std::pair<UMat,uchar> > &masks) CV_OVERRIDE;
     CV_WRAP void apply(int index, Point corner, InputOutputArray image, InputArray mask) CV_OVERRIDE;
@@ -146,11 +154,14 @@ public:
     CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE;
     CV_WRAP void setNrFeeds(int nr_feeds) { nr_feeds_ = nr_feeds; }
     CV_WRAP int getNrFeeds() { return nr_feeds_; }
+    CV_WRAP void setSimilarityThreshold(double similarity_threshold) { similarity_threshold_ = similarity_threshold; }
+    CV_WRAP double getSimilarityThreshold() const { return similarity_threshold_; }
     std::vector<Scalar> gains() const { return gains_; }
 
 private:
     std::vector<Scalar> gains_;
     int nr_feeds_;
+    double similarity_threshold_;
 };
 
 /** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image blocks.
@@ -159,12 +170,15 @@ class CV_EXPORTS_W BlocksCompensator : public ExposureCompensator
 {
 public:
     BlocksCompensator(int bl_width=32, int bl_height=32, int nr_feeds=1)
-            : bl_width_(bl_width), bl_height_(bl_height), nr_feeds_(nr_feeds), nr_gain_filtering_iterations_(2) {}
+            : bl_width_(bl_width), bl_height_(bl_height), nr_feeds_(nr_feeds), nr_gain_filtering_iterations_(2),
+              similarity_threshold_(1) {}
     CV_WRAP void apply(int index, Point corner, InputOutputArray image, InputArray mask) CV_OVERRIDE;
     CV_WRAP void getMatGains(CV_OUT std::vector<Mat>& umv) CV_OVERRIDE;
     CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE;
     CV_WRAP void setNrFeeds(int nr_feeds) { nr_feeds_ = nr_feeds; }
     CV_WRAP int getNrFeeds() { return nr_feeds_; }
+    CV_WRAP void setSimilarityThreshold(double similarity_threshold) { similarity_threshold_ = similarity_threshold; }
+    CV_WRAP double getSimilarityThreshold() const { return similarity_threshold_; }
     CV_WRAP void setBlockSize(int width, int height) { bl_width_ = width; bl_height_ = height; }
     CV_WRAP void setBlockSize(Size size) { setBlockSize(size.width, size.height); }
     CV_WRAP Size getBlockSize() const { return Size(bl_width_, bl_height_); }
@@ -184,6 +198,7 @@ private:
     std::vector<UMat> gain_maps_;
     int nr_feeds_;
     int nr_gain_filtering_iterations_;
+    double similarity_threshold_;
 };
 
 /** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image block
diff --git a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp
index ff05af1814..ad21ee1277 100644
--- a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp
@@ -328,9 +328,19 @@ private:
 enum WaveCorrectKind
 {
     WAVE_CORRECT_HORIZ,
-    WAVE_CORRECT_VERT
+    WAVE_CORRECT_VERT,
+    WAVE_CORRECT_AUTO
 };
 
+/** @brief Tries to detect the wave correction kind depending
+on whether a panorama spans horizontally or vertically
+
+@param rmats Camera rotation matrices.
+@return The correction kind to use for this panorama
+ */
+CV_EXPORTS
+WaveCorrectKind autoDetectWaveCorrectKind(const std::vector<Mat> &rmats);
+
 /** @brief Tries to make panorama more horizontal (or vertical).
 
 @param rmats Camera rotation matrices.
diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
index bc2c6e3546..ff005e8da2 100644
--- a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
@@ -70,6 +70,23 @@ public:
      */
     virtual Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R) = 0;
 
+    /** @brief Projects the image point backward.
+
+    @param pt Projected point
+    @param K Camera intrinsic parameters
+    @param R Camera rotation matrix
+    @return Backward-projected point
+    */
+#if CV_VERSION_MAJOR == 4
+    virtual Point2f warpPointBackward(const Point2f& pt, InputArray K, InputArray R)
+    {
+        CV_UNUSED(pt); CV_UNUSED(K); CV_UNUSED(R);
+        CV_Error(Error::StsNotImplemented, "");
+    }
+#else
+    virtual Point2f warpPointBackward(const Point2f& pt, InputArray K, InputArray R) = 0;
+#endif
+
     /** @brief Builds the projection maps according to the given camera data.
 
     @param src_size Source image size
@@ -143,6 +160,8 @@ class CV_EXPORTS_TEMPLATE RotationWarperBase : public RotationWarper
 public:
     Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R) CV_OVERRIDE;
 
+    Point2f warpPointBackward(const Point2f &pt, InputArray K, InputArray R) CV_OVERRIDE;
+
     Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) CV_OVERRIDE;
 
     Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
@@ -189,6 +208,9 @@ public:
     Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R) CV_OVERRIDE;
     Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R, InputArray T);
 
+    Point2f warpPointBackward(const Point2f& pt, InputArray K, InputArray R) CV_OVERRIDE;
+    Point2f warpPointBackward(const Point2f& pt, InputArray K, InputArray R, InputArray T);
+
     virtual Rect buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, CV_OUT OutputArray xmap, CV_OUT OutputArray ymap);
     Rect buildMaps(Size src_size, InputArray K, InputArray R, CV_OUT OutputArray xmap, CV_OUT OutputArray ymap) CV_OVERRIDE;
 
@@ -228,6 +250,15 @@ public:
      */
     Point2f warpPoint(const Point2f &pt, InputArray K, InputArray H) CV_OVERRIDE;
 
+    /** @brief Projects the image point backward.
+
+    @param pt Projected point
+    @param K Camera intrinsic parameters
+    @param H Camera extrinsic parameters
+    @return Backward-projected point
+    */
+    Point2f warpPointBackward(const Point2f &pt, InputArray K, InputArray H) CV_OVERRIDE;
+
     /** @brief Builds the projection maps according to the given camera data.
 
     @param src_size Source image size
diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp
index f4a19d9c24..72b5c08672 100644
--- a/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp
@@ -61,6 +61,14 @@ Point2f RotationWarperBase<P>::warpPoint(const Point2f &pt, InputArray K, InputA
     return uv;
 }
 
+template <class P>
+Point2f RotationWarperBase<P>::warpPointBackward(const Point2f& pt, InputArray K, InputArray R)
+{
+    projector_.setCameraParams(K, R);
+    Point2f xy;
+    projector_.mapBackward(pt.x, pt.y, xy.x, xy.y);
+    return xy;
+}
 
 template <class P>
 Rect RotationWarperBase<P>::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray _xmap, OutputArray _ymap)
@@ -355,8 +363,8 @@ void StereographicProjector::mapForward(float x, float y, float &u, float &v)
 
     float r = sinf(v_) / (1 - cosf(v_));
 
-    u = scale * r * cos(u_);
-    v = scale * r * sin(u_);
+    u = scale * r * std::cos(u_);
+    v = scale * r * std::sin(u_);
 }
 
 inline
@@ -617,7 +625,7 @@ void TransverseMercatorProjector::mapBackward(float u, float v, float &x, float
     v /= scale;
 
     float v_ = asinf( sinf(v) / coshf(u) );
-    float u_ = atan2f( sinhf(u), cos(v) );
+    float u_ = atan2f( sinhf(u), std::cos(v) );
 
     float cosv = cosf(v_);
     float x_ = cosv * sinf(u_);
diff --git a/modules/stitching/include/opencv2/stitching/warpers.hpp b/modules/stitching/include/opencv2/stitching/warpers.hpp
index ff43386107..aa1ce5a6a7 100644
--- a/modules/stitching/include/opencv2/stitching/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/warpers.hpp
@@ -65,6 +65,22 @@ namespace cv {
         */
         CV_WRAP Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R);
 
+        /** @brief Projects the image point backward.
+
+        @param pt Projected point
+        @param K Camera intrinsic parameters
+        @param R Camera rotation matrix
+        @return Backward-projected point
+        */
+#if CV_VERSION_MAJOR == 4
+        CV_WRAP Point2f warpPointBackward(const Point2f& pt, InputArray K, InputArray R)
+        {
+            CV_UNUSED(pt); CV_UNUSED(K); CV_UNUSED(R);
+            CV_Error(Error::StsNotImplemented, "");
+        }
+#else
+        CV_WRAP Point2f warpPointBackward(const Point2f &pt, InputArray K, InputArray R);
+#endif
         /** @brief Builds the projection maps according to the given camera data.
 
         @param src_size Source image size
diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp
index 811d7453cf..111a6e13ae 100644
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@@ -218,7 +218,7 @@ MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
     num_bands_ = 0;
     setNumBands(num_bands);
 
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount();
     gpu_feed_idx_ = 0;
 #else
@@ -244,7 +244,7 @@ void MultiBandBlender::prepare(Rect dst_roi)
 
     Blender::prepare(dst_roi);
 
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     if (can_use_gpu_)
     {
         gpu_initialized_ = false;
@@ -330,7 +330,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
 
     UMat img;
 
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     // If using gpu save the top left coordinate when running first time after prepare
     if (can_use_gpu_)
     {
@@ -351,7 +351,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
     {
         img = _img.getUMat();
     }
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     else
     {
         gpu_img_ = _img.getGpuMat();
@@ -392,7 +392,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
     int bottom = br_new.y - tl.y - img.rows;
     int right = br_new.x - tl.x - img.cols;
 
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     if (can_use_gpu_)
     {
         if (!gpu_initialized_)
@@ -601,7 +601,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
 void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
 {
     Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     if (can_use_gpu_)
     {
         if (!gpu_initialized_)
@@ -836,7 +836,7 @@ void createLaplacePyr(InputArray img, int num_levels, std::vector<UMat> &pyr)
 
 void createLaplacePyrGpu(InputArray img, int num_levels, std::vector<UMat> &pyr)
 {
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     pyr.resize(num_levels + 1);
 
     std::vector<cuda::GpuMat> gpu_pyr(num_levels + 1);
@@ -877,7 +877,7 @@ void restoreImageFromLaplacePyr(std::vector<UMat> &pyr)
 
 void restoreImageFromLaplacePyrGpu(std::vector<UMat> &pyr)
 {
-#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
     if (pyr.empty())
         return;
 
diff --git a/modules/stitching/src/exposure_compensate.cpp b/modules/stitching/src/exposure_compensate.cpp
index 7213349ccc..df2b8779bb 100644
--- a/modules/stitching/src/exposure_compensate.cpp
+++ b/modules/stitching/src/exposure_compensate.cpp
@@ -90,6 +90,7 @@ void GainCompensator::feed(const std::vector<Point> &corners, const std::vector<
 
     const int num_images = static_cast<int>(images.size());
     Mat accumulated_gains;
+    prepareSimilarityMask(corners, images);
 
     for (int n = 0; n < nr_feeds_; ++n)
     {
@@ -133,6 +134,8 @@ void GainCompensator::singleFeed(const std::vector<Point> &corners, const std::v
     Mat subimg1, subimg2;
     Mat_<uchar> submask1, submask2, intersect;
 
+    std::vector<UMat>::iterator similarity_it = similarities_.begin();
+
     for (int i = 0; i < num_images; ++i)
     {
         for (int j = i; j < num_images; ++j)
@@ -147,6 +150,13 @@ void GainCompensator::singleFeed(const std::vector<Point> &corners, const std::v
                 submask2 = masks[j].first(Rect(roi.tl() - corners[j], roi.br() - corners[j])).getMat(ACCESS_READ);
                 intersect = (submask1 == masks[i].second) & (submask2 == masks[j].second);
 
+                if (!similarities_.empty())
+                {
+                    CV_Assert(similarity_it != similarities_.end());
+                    UMat similarity = *similarity_it++;
+                    bitwise_and(intersect, similarity, intersect);
+                }
+
                 int intersect_count = countNonZero(intersect);
                 N(i, j) = N(j, i) = std::max(1, intersect_count);
 
@@ -298,6 +308,88 @@ void GainCompensator::setMatGains(std::vector<Mat>& umv)
     }
 }
 
+void GainCompensator::prepareSimilarityMask(
+    const std::vector<Point> &corners, const std::vector<UMat> &images)
+{
+    if (similarity_threshold_ >= 1)
+    {
+        LOGLN("  skipping similarity mask: disabled");
+        return;
+    }
+    if (!similarities_.empty())
+    {
+        LOGLN("  skipping similarity mask: already set");
+        return;
+    }
+
+    LOGLN("  calculating similarity mask");
+    const int num_images = static_cast<int>(images.size());
+    for (int i = 0; i < num_images; ++i)
+    {
+        for (int j = i; j < num_images; ++j)
+        {
+            Rect roi;
+            if (overlapRoi(corners[i], corners[j], images[i].size(), images[j].size(), roi))
+            {
+                UMat subimg1 = images[i](Rect(roi.tl() - corners[i], roi.br() - corners[i]));
+                UMat subimg2 = images[j](Rect(roi.tl() - corners[j], roi.br() - corners[j]));
+                UMat similarity = buildSimilarityMask(subimg1, subimg2);
+                similarities_.push_back(similarity);
+            }
+        }
+    }
+}
+
+UMat GainCompensator::buildSimilarityMask(InputArray src_array1, InputArray src_array2)
+{
+    CV_Assert(src_array1.rows() == src_array2.rows() && src_array1.cols() == src_array2.cols());
+    CV_Assert(src_array1.type() == src_array2.type());
+    CV_Assert(src_array1.type() == CV_8UC3 || src_array1.type() == CV_8UC1);
+
+    Mat src1 = src_array1.getMat();
+    Mat src2 = src_array2.getMat();
+
+    UMat umat_similarity(src1.rows, src1.cols, CV_8UC1);
+    Mat similarity = umat_similarity.getMat(ACCESS_WRITE);
+
+    if (src1.channels() == 3)
+    {
+        for (int y = 0; y < similarity.rows; ++y)
+        {
+            for (int x = 0; x < similarity.cols; ++x)
+            {
+                Vec<float, 3> vec_diff =
+                    Vec<float, 3>(*src1.ptr<Vec<uchar, 3>>(y, x))
+                    - Vec<float, 3>(*src2.ptr<Vec<uchar, 3>>(y, x));
+                double diff = norm(vec_diff * (1.f / 255.f));
+
+                *similarity.ptr<uchar>(y, x) = diff <= similarity_threshold_ ? 255 : 0;
+            }
+        }
+    }
+    else // if (src1.channels() == 1)
+    {
+        for (int y = 0; y < similarity.rows; ++y)
+        {
+            for (int x = 0; x < similarity.cols; ++x)
+            {
+                float diff = std::abs(static_cast<int>(*src1.ptr<uchar>(y, x))
+                    - static_cast<int>(*src2.ptr<uchar>(y, x))) / 255.f;
+
+                *similarity.ptr<uchar>(y, x) = diff <= similarity_threshold_ ? 255 : 0;
+            }
+        }
+    }
+    similarity.release();
+
+    Mat kernel = getStructuringElement(MORPH_RECT, Size(3,3));
+    UMat umat_erode;
+    erode(umat_similarity, umat_erode, kernel);
+    dilate(umat_erode, umat_similarity, kernel);
+
+    return umat_similarity;
+}
+
 void ChannelsCompensator::feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
                                const std::vector<std::pair<UMat,uchar> > &masks)
 {
@@ -317,11 +409,15 @@ void ChannelsCompensator::feed(const std::vector<Point> &corners, const std::vec
     // For each channel, feed the channel of each image in a GainCompensator
     gains_.clear();
     gains_.resize(images.size());
+
+    GainCompensator compensator(getNrFeeds());
+    compensator.setSimilarityThreshold(getSimilarityThreshold());
+    compensator.prepareSimilarityMask(corners, images);
+
     for (int c = 0; c < 3; ++c)
     {
         const std::vector<UMat>& channels = images_channels[c];
 
-        GainCompensator compensator(getNrFeeds());
         compensator.feed(corners, channels, masks);
 
         std::vector<double> gains = compensator.gains();
@@ -400,6 +496,7 @@ void BlocksCompensator::feed(const std::vector<Point> &corners, const std::vecto
     {
         Compensator compensator;
         compensator.setNrFeeds(getNrFeeds());
+        compensator.setSimilarityThreshold(getSimilarityThreshold());
         compensator.feed(block_corners, block_images, block_masks);
 
         gain_maps_.clear();
diff --git a/modules/stitching/src/motion_estimators.cpp b/modules/stitching/src/motion_estimators.cpp
index d9848dbe7f..c0b46b101d 100644
--- a/modules/stitching/src/motion_estimators.cpp
+++ b/modules/stitching/src/motion_estimators.cpp
@@ -886,6 +886,45 @@ void BundleAdjusterAffinePartial::calcJacobian(Mat &jac)
 
 //////////////////////////////////////////////////////////////////////////////
 
+WaveCorrectKind autoDetectWaveCorrectKind(const std::vector<Mat> &rmats)
+{
+    std::vector<float> xs, ys;
+    xs.reserve(rmats.size());
+    ys.reserve(rmats.size());
+
+    // Project a [0, 0, 1, 1] point to the camera image frame
+    // Ignore intrinsic parameters and camera translation as they
+    // have little influence
+    // This also means we can simply use "rmat.col(2)" as the
+    // projected point homogeneous coordinate
+    for (const Mat& rmat: rmats)
+    {
+        CV_Assert(rmat.type() == CV_32F);
+        xs.push_back(rmat.at<float>(0, 2) / rmat.at<float>(2, 2));
+        ys.push_back(rmat.at<float>(1, 2) / rmat.at<float>(2, 2));
+    }
+
+    // Calculate the delta between the max and min values for
+    // both the X and Y axis
+    auto min_max_x = std::minmax_element(xs.begin(), xs.end());
+    auto min_max_y = std::minmax_element(ys.begin(), ys.end());
+    double delta_x = *min_max_x.second - *min_max_x.first;
+    double delta_y = *min_max_y.second - *min_max_y.first;
+
+    // If the Y delta is the biggest, it means the images
+    // mostly span along the vertical axis: correct this axis
+    if (delta_y > delta_x)
+    {
+        LOGLN("  using vertical wave correction");
+        return WAVE_CORRECT_VERT;
+    }
+    else
+    {
+        LOGLN("  using horizontal wave correction");
+        return WAVE_CORRECT_HORIZ;
+    }
+}
+
 void waveCorrect(std::vector<Mat> &rmats, WaveCorrectKind kind)
 {
     LOGLN("Wave correcting...");
@@ -898,12 +937,18 @@ void waveCorrect(std::vector<Mat> &rmats, WaveCorrectKind kind)
         return;
     }
 
+    if (kind == WAVE_CORRECT_AUTO)
+    {
+        kind = autoDetectWaveCorrectKind(rmats);
+    }
+
     Mat moment = Mat::zeros(3, 3, CV_32F);
     for (size_t i = 0; i < rmats.size(); ++i)
     {
         Mat col = rmats[i].col(0);
         moment += col * col.t();
     }
+
     Mat eigen_vals, eigen_vecs;
     eigen(moment, eigen_vals, eigen_vecs);
 
diff --git a/modules/stitching/src/warpers.cpp b/modules/stitching/src/warpers.cpp
index 4360590c94..85ac939074 100644
--- a/modules/stitching/src/warpers.cpp
+++ b/modules/stitching/src/warpers.cpp
@@ -92,6 +92,14 @@ Point2f PyRotationWarper::warpPoint(const Point2f &pt, InputArray K, InputArray
 {
     return rw.get()->warpPoint(pt, K, R);
 }
+
+#if CV_VERSION_MAJOR != 4
+Point2f PyRotationWarper::warpPointBackward(const Point2f& pt, InputArray K, InputArray R)
+{
+    return rw.get()->warpPointBackward(pt, K, R);
+}
+#endif
+
 Rect PyRotationWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
 {
     return rw.get()->buildMaps(src_size, K, R, xmap, ymap);
@@ -164,6 +172,20 @@ Point2f PlaneWarper::warpPoint(const Point2f &pt, InputArray K, InputArray R)
     Mat_<float> T(3, 1, tz);
     return warpPoint(pt, K, R, T);
 }
+Point2f PlaneWarper::warpPointBackward(const Point2f& pt, InputArray K, InputArray R, InputArray T)
+{
+    projector_.setCameraParams(K, R, T);
+    Point2f xy;
+    projector_.mapBackward(pt.x, pt.y, xy.x, xy.y);
+    return xy;
+}
+
+Point2f PlaneWarper::warpPointBackward(const Point2f& pt, InputArray K, InputArray R)
+{
+    float tz[] = { 0.f, 0.f, 0.f };
+    Mat_<float> T(3, 1, tz);
+    return warpPointBackward(pt, K, R, T);
+}
 
 Rect PlaneWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
 {
@@ -299,6 +321,12 @@ Point2f AffineWarper::warpPoint(const Point2f &pt, InputArray K, InputArray H)
     return PlaneWarper::warpPoint(pt, K, R, T);
 }
 
+Point2f AffineWarper::warpPointBackward(const Point2f& pt, InputArray K, InputArray H)
+{
+    Mat R, T;
+    getRTfromHomogeneous(H, R, T);
+    return PlaneWarper::warpPointBackward(pt, K, R, T);
+}
 
 Rect AffineWarper::buildMaps(Size src_size, InputArray K, InputArray H, OutputArray xmap, OutputArray ymap)
 {
diff --git a/modules/stitching/test/test_exposure_compensate.cpp b/modules/stitching/test/test_exposure_compensate.cpp
new file mode 100644
index 0000000000..3f34742095
--- /dev/null
+++ b/modules/stitching/test/test_exposure_compensate.cpp
@@ -0,0 +1,70 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+
+namespace opencv_test {
+namespace {
+
+double minPSNR(UMat src1, UMat src2)
+{
+    std::vector<UMat> src1_channels, src2_channels;
+    split(src1, src1_channels);
+    split(src2, src2_channels);
+
+    double psnr = cvtest::PSNR(src1_channels[0], src2_channels[0]);
+    psnr = std::min(psnr, cvtest::PSNR(src1_channels[1], src2_channels[1]));
+    return std::min(psnr, cvtest::PSNR(src1_channels[2], src2_channels[2]));
+}
+
+TEST(ExposureCompensate, SimilarityThreshold)
+{
+    UMat source;
+    imread(cvtest::TS::ptr()->get_data_path() + "stitching/s1.jpg").copyTo(source);
+
+    UMat image1 = source.clone();
+    UMat image2 = source.clone();
+
+    // Add a big artifact
+    image2(Rect(150, 150, 100, 100)).setTo(Scalar(0, 0, 255));
+
+    UMat mask(image1.size(), CV_8U);
+    mask.setTo(255);
+
+    detail::BlocksChannelsCompensator compensator;
+    compensator.setNrGainsFilteringIterations(0); // makes it more clear
+
+    // Feed the compensator, image 1 and 2 are perfectly
+    // identical, except for the red artifact in image 2
+    // Apart from that artifact, there is no exposure to compensate
+    compensator.setSimilarityThreshold(1);
+    uchar xff = 255;
+    compensator.feed(
+        {{}, {}},
+        {image1, image2},
+        {{mask, xff}, {mask, xff}}
+    );
+    // Verify that the artifact in image 2 did create
+    // an artifact in image1 during the exposure compensation
+    UMat image1_result = image1.clone();
+    compensator.apply(0, {}, image1_result, mask);
+    double psnr_no_similarity_mask = minPSNR(image1, image1_result);
+    EXPECT_LT(psnr_no_similarity_mask, 45);
+
+    // Add a similarity threshold and verify that
+    // the artifact in image1 is gone
+    compensator.setSimilarityThreshold(0.1);
+    compensator.feed(
+        {{}, {}},
+        {image1, image2},
+        {{mask, xff}, {mask, xff}}
+    );
+    image1_result = image1.clone();
+    compensator.apply(0, {}, image1_result, mask);
+    double psnr_similarity_mask = minPSNR(image1, image1_result);
+    EXPECT_GT(psnr_similarity_mask, 300);
+}
+
+} // namespace
+} // namespace opencv_test
diff --git a/modules/stitching/test/test_precomp.hpp b/modules/stitching/test/test_precomp.hpp
index f3ebc682c0..e761fb1fb0 100644
--- a/modules/stitching/test/test_precomp.hpp
+++ b/modules/stitching/test/test_precomp.hpp
@@ -6,8 +6,10 @@
 
 #include "opencv2/ts.hpp"
 #include "opencv2/stitching.hpp"
+#include "opencv2/stitching/detail/motion_estimators.hpp"
 #include "opencv2/stitching/detail/matchers.hpp"
 #include "opencv2/stitching/detail/blenders.hpp"
+#include "opencv2/stitching/detail/exposure_compensate.hpp"
 
 #ifdef HAVE_OPENCV_XFEATURES2D
 #include "opencv2/xfeatures2d/nonfree.hpp"
diff --git a/modules/stitching/test/test_reprojection.cpp b/modules/stitching/test/test_reprojection.cpp
new file mode 100644
index 0000000000..076bbb769d
--- /dev/null
+++ b/modules/stitching/test/test_reprojection.cpp
@@ -0,0 +1,131 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+#include "opencv2/stitching/warpers.hpp"
+
+namespace opencv_test { namespace {
+class ReprojectionTest : public ::testing::Test {
+
+protected:
+    const size_t TEST_COUNT = 15;
+    Mat K, R;
+    RNG rng = RNG(0);
+    ReprojectionTest()
+    {
+        K = Mat::eye(3, 3, CV_32FC1);
+        float angle = (float)(30.0 * CV_PI / 180.0);
+        float rotationMatrix[9] = {
+                (float)cos(angle), (float)sin(angle), 0,
+                (float)-sin(angle), (float)cos(angle), 0,
+                0, 0, 1
+        };
+        Mat(3, 3, CV_32FC1, rotationMatrix).copyTo(R);
+    }
+    void TestReprojection(Ptr<detail::RotationWarper> warper, Point2f pt) {
+        Point2f projected_pt = warper->warpPoint(pt, K, R);
+        Point2f reprojected_pt = warper->warpPointBackward(projected_pt, K, R);
+        EXPECT_NEAR(pt.x, reprojected_pt.x, float( 1e-5));
+        EXPECT_NEAR(pt.y, reprojected_pt.y, float( 1e-5));
+    }
+};
+
+
+TEST_F(ReprojectionTest, PlaneWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<PlaneWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, AffineWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<AffineWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, CylindricalWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<CylindricalWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, SphericalWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<SphericalWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, FisheyeWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<FisheyeWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, StereographicWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<StereographicWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, CompressedRectilinearWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<CompressedRectilinearWarper>(1.5f, 1.0f);
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, CompressedRectilinearPortraitWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<CompressedRectilinearPortraitWarper>(1.5f, 1.0f);
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, PaniniWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<PaniniWarper>(1.5f, 1.0f);
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, PaniniPortraitWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<PaniniPortraitWarper>(1.5f, 1.0f);
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, MercatorWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<MercatorWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+TEST_F(ReprojectionTest, TransverseMercatorWarper)
+{
+    Ptr<WarperCreator> creator = makePtr<TransverseMercatorWarper>();
+    for (size_t i = 0; i < TEST_COUNT; ++i) {
+        TestReprojection(creator->create(1), Point2f(rng.uniform(-1.f, 1.f), rng.uniform(-1.f, 1.f)));
+    }
+}
+
+}} // namespace
diff --git a/modules/stitching/test/test_wave_correction.cpp b/modules/stitching/test/test_wave_correction.cpp
new file mode 100644
index 0000000000..1ac8ff07aa
--- /dev/null
+++ b/modules/stitching/test/test_wave_correction.cpp
@@ -0,0 +1,50 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+
+namespace opencv_test {
+namespace {
+
+detail::WaveCorrectKind correctionKind(const std::vector<UMat>& images)
+{
+
+    Ptr<Stitcher> stitcher = Stitcher::create(Stitcher::PANORAMA);
+    stitcher->estimateTransform(images);
+
+    std::vector<Mat> rmats;
+    auto cameras = stitcher->cameras();
+    for (const auto& camera: cameras)
+        rmats.push_back(camera.R);
+
+    return detail::autoDetectWaveCorrectKind(rmats);
+}
+
+TEST(WaveCorrection, AutoWaveCorrection)
+{
+    std::vector<UMat> images(2);
+    imread(cvtest::TS::ptr()->get_data_path() + "stitching/s1.jpg").copyTo(images[0]);
+    imread(cvtest::TS::ptr()->get_data_path() + "stitching/s2.jpg").copyTo(images[1]);
+
+    EXPECT_EQ(detail::WAVE_CORRECT_HORIZ, correctionKind(images));
+
+    std::vector<UMat> rotated_images(2);
+    rotate(images[0], rotated_images[0], cv::ROTATE_90_CLOCKWISE);
+    rotate(images[1], rotated_images[1], cv::ROTATE_90_CLOCKWISE);
+
+    EXPECT_EQ(detail::WAVE_CORRECT_VERT, correctionKind(rotated_images));
+
+    rotate(images[0], rotated_images[0], cv::ROTATE_90_COUNTERCLOCKWISE);
+    rotate(images[1], rotated_images[1], cv::ROTATE_90_COUNTERCLOCKWISE);
+
+    EXPECT_EQ(detail::WAVE_CORRECT_VERT, correctionKind(rotated_images));
+
+    rotate(images[0], rotated_images[0], cv::ROTATE_180);
+    rotate(images[1], rotated_images[1], cv::ROTATE_180);
+
+    EXPECT_EQ(detail::WAVE_CORRECT_HORIZ, correctionKind(rotated_images));
+}
+
+} // namespace
+} // namespace opencv_test
diff --git a/modules/ts/include/opencv2/ts/ts_gtest.h b/modules/ts/include/opencv2/ts/ts_gtest.h
index bf0ff9134e..b1c6c12152 100644
--- a/modules/ts/include/opencv2/ts/ts_gtest.h
+++ b/modules/ts/include/opencv2/ts/ts_gtest.h
@@ -9795,7 +9795,7 @@ class GTEST_API_ ExitedWithCode {
   bool operator()(int exit_status) const;
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ExitedWithCode& other);
+  void operator=(const ExitedWithCode& other) = delete;
 
   const int exit_code_;
 };
@@ -11769,7 +11769,7 @@ class RangeGenerator : public ParamGeneratorInterface<T> {
           step_(other.step_) {}
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<T>* const base_;
     T value_;
@@ -11787,7 +11787,7 @@ class RangeGenerator : public ParamGeneratorInterface<T> {
   }
 
   // No implementation - assignment is unsupported.
-  void operator=(const RangeGenerator& other);
+  void operator=(const RangeGenerator& other) = delete;
 
   const T begin_;
   const T end_;
@@ -11878,7 +11878,7 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
   };  // class ValuesInIteratorRangeGenerator::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const ValuesInIteratorRangeGenerator& other);
+  void operator=(const ValuesInIteratorRangeGenerator& other) = delete;
 
   const ContainerType container_;
 };  // class ValuesInIteratorRangeGenerator
@@ -12329,7 +12329,7 @@ class ValueArray1 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray1& other);
+  void operator=(const ValueArray1& other) = delete;
 
   const T1 v1_;
 };
@@ -12349,7 +12349,7 @@ class ValueArray2 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray2& other);
+  void operator=(const ValueArray2& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12372,7 +12372,7 @@ class ValueArray3 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray3& other);
+  void operator=(const ValueArray3& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12397,7 +12397,7 @@ class ValueArray4 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray4& other);
+  void operator=(const ValueArray4& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12423,7 +12423,7 @@ class ValueArray5 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray5& other);
+  void operator=(const ValueArray5& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12452,7 +12452,7 @@ class ValueArray6 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray6& other);
+  void operator=(const ValueArray6& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12483,7 +12483,7 @@ class ValueArray7 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray7& other);
+  void operator=(const ValueArray7& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12516,7 +12516,7 @@ class ValueArray8 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray8& other);
+  void operator=(const ValueArray8& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12551,7 +12551,7 @@ class ValueArray9 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray9& other);
+  void operator=(const ValueArray9& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12587,7 +12587,7 @@ class ValueArray10 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray10& other);
+  void operator=(const ValueArray10& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12626,7 +12626,7 @@ class ValueArray11 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray11& other);
+  void operator=(const ValueArray11& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12667,7 +12667,7 @@ class ValueArray12 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray12& other);
+  void operator=(const ValueArray12& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12710,7 +12710,7 @@ class ValueArray13 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray13& other);
+  void operator=(const ValueArray13& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12754,7 +12754,7 @@ class ValueArray14 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray14& other);
+  void operator=(const ValueArray14& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12801,7 +12801,7 @@ class ValueArray15 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray15& other);
+  void operator=(const ValueArray15& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12851,7 +12851,7 @@ class ValueArray16 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray16& other);
+  void operator=(const ValueArray16& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12902,7 +12902,7 @@ class ValueArray17 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray17& other);
+  void operator=(const ValueArray17& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -12955,7 +12955,7 @@ class ValueArray18 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray18& other);
+  void operator=(const ValueArray18& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13010,7 +13010,7 @@ class ValueArray19 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray19& other);
+  void operator=(const ValueArray19& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13067,7 +13067,7 @@ class ValueArray20 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray20& other);
+  void operator=(const ValueArray20& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13127,7 +13127,7 @@ class ValueArray21 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray21& other);
+  void operator=(const ValueArray21& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13188,7 +13188,7 @@ class ValueArray22 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray22& other);
+  void operator=(const ValueArray22& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13252,7 +13252,7 @@ class ValueArray23 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray23& other);
+  void operator=(const ValueArray23& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13318,7 +13318,7 @@ class ValueArray24 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray24& other);
+  void operator=(const ValueArray24& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13385,7 +13385,7 @@ class ValueArray25 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray25& other);
+  void operator=(const ValueArray25& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13454,7 +13454,7 @@ class ValueArray26 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray26& other);
+  void operator=(const ValueArray26& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13527,7 +13527,7 @@ class ValueArray27 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray27& other);
+  void operator=(const ValueArray27& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13601,7 +13601,7 @@ class ValueArray28 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray28& other);
+  void operator=(const ValueArray28& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13676,7 +13676,7 @@ class ValueArray29 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray29& other);
+  void operator=(const ValueArray29& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13754,7 +13754,7 @@ class ValueArray30 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray30& other);
+  void operator=(const ValueArray30& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13835,7 +13835,7 @@ class ValueArray31 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray31& other);
+  void operator=(const ValueArray31& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -13917,7 +13917,7 @@ class ValueArray32 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray32& other);
+  void operator=(const ValueArray32& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14002,7 +14002,7 @@ class ValueArray33 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray33& other);
+  void operator=(const ValueArray33& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14088,7 +14088,7 @@ class ValueArray34 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray34& other);
+  void operator=(const ValueArray34& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14176,7 +14176,7 @@ class ValueArray35 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray35& other);
+  void operator=(const ValueArray35& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14267,7 +14267,7 @@ class ValueArray36 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray36& other);
+  void operator=(const ValueArray36& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14360,7 +14360,7 @@ class ValueArray37 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray37& other);
+  void operator=(const ValueArray37& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14454,7 +14454,7 @@ class ValueArray38 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray38& other);
+  void operator=(const ValueArray38& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14551,7 +14551,7 @@ class ValueArray39 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray39& other);
+  void operator=(const ValueArray39& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14650,7 +14650,7 @@ class ValueArray40 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray40& other);
+  void operator=(const ValueArray40& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14751,7 +14751,7 @@ class ValueArray41 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray41& other);
+  void operator=(const ValueArray41& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14854,7 +14854,7 @@ class ValueArray42 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray42& other);
+  void operator=(const ValueArray42& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -14959,7 +14959,7 @@ class ValueArray43 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray43& other);
+  void operator=(const ValueArray43& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15066,7 +15066,7 @@ class ValueArray44 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray44& other);
+  void operator=(const ValueArray44& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15175,7 +15175,7 @@ class ValueArray45 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray45& other);
+  void operator=(const ValueArray45& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15286,7 +15286,7 @@ class ValueArray46 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray46& other);
+  void operator=(const ValueArray46& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15400,7 +15400,7 @@ class ValueArray47 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray47& other);
+  void operator=(const ValueArray47& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15516,7 +15516,7 @@ class ValueArray48 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray48& other);
+  void operator=(const ValueArray48& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15633,7 +15633,7 @@ class ValueArray49 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray49& other);
+  void operator=(const ValueArray49& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15751,7 +15751,7 @@ class ValueArray50 {
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const ValueArray50& other);
+  void operator=(const ValueArray50& other) = delete;
 
   const T1 v1_;
   const T2 v2_;
@@ -15904,7 +15904,7 @@ class CartesianProductGenerator2
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -15919,7 +15919,7 @@ class CartesianProductGenerator2
   };  // class CartesianProductGenerator2::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator2& other);
+  void operator=(const CartesianProductGenerator2& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -16032,7 +16032,7 @@ class CartesianProductGenerator3
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -16050,7 +16050,7 @@ class CartesianProductGenerator3
   };  // class CartesianProductGenerator3::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator3& other);
+  void operator=(const CartesianProductGenerator3& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -16179,7 +16179,7 @@ class CartesianProductGenerator4
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -16200,7 +16200,7 @@ class CartesianProductGenerator4
   };  // class CartesianProductGenerator4::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator4& other);
+  void operator=(const CartesianProductGenerator4& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -16342,7 +16342,7 @@ class CartesianProductGenerator5
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -16366,7 +16366,7 @@ class CartesianProductGenerator5
   };  // class CartesianProductGenerator5::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator5& other);
+  void operator=(const CartesianProductGenerator5& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -16524,7 +16524,7 @@ class CartesianProductGenerator6
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -16551,7 +16551,7 @@ class CartesianProductGenerator6
   };  // class CartesianProductGenerator6::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator6& other);
+  void operator=(const CartesianProductGenerator6& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -16723,7 +16723,7 @@ class CartesianProductGenerator7
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -16753,7 +16753,7 @@ class CartesianProductGenerator7
   };  // class CartesianProductGenerator7::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator7& other);
+  void operator=(const CartesianProductGenerator7& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -16941,7 +16941,7 @@ class CartesianProductGenerator8
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -16974,7 +16974,7 @@ class CartesianProductGenerator8
   };  // class CartesianProductGenerator8::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator8& other);
+  void operator=(const CartesianProductGenerator8& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -17176,7 +17176,7 @@ class CartesianProductGenerator9
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -17212,7 +17212,7 @@ class CartesianProductGenerator9
   };  // class CartesianProductGenerator9::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator9& other);
+  void operator=(const CartesianProductGenerator9& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -17428,7 +17428,7 @@ class CartesianProductGenerator10
     }
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
+    void operator=(const Iterator& other) = delete;
 
     const ParamGeneratorInterface<ParamType>* const base_;
     // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
@@ -17467,7 +17467,7 @@ class CartesianProductGenerator10
   };  // class CartesianProductGenerator10::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductGenerator10& other);
+  void operator=(const CartesianProductGenerator10& other) = delete;
 
   const ParamGenerator<T1> g1_;
   const ParamGenerator<T2> g2_;
@@ -17503,7 +17503,7 @@ CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder2& other);
+  void operator=(const CartesianProductHolder2& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17526,7 +17526,7 @@ CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder3& other);
+  void operator=(const CartesianProductHolder3& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17552,7 +17552,7 @@ CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder4& other);
+  void operator=(const CartesianProductHolder4& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17580,7 +17580,7 @@ CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder5& other);
+  void operator=(const CartesianProductHolder5& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17612,7 +17612,7 @@ CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder6& other);
+  void operator=(const CartesianProductHolder6& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17647,7 +17647,7 @@ CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder7& other);
+  void operator=(const CartesianProductHolder7& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17686,7 +17686,7 @@ CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder8& other);
+  void operator=(const CartesianProductHolder8& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17729,7 +17729,7 @@ CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder9& other);
+  void operator=(const CartesianProductHolder9& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
@@ -17775,7 +17775,7 @@ CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
 
  private:
   // No implementation - assignment is unsupported.
-  void operator=(const CartesianProductHolder10& other);
+  void operator=(const CartesianProductHolder10& other) = delete;
 
   const Generator1 g1_;
   const Generator2 g2_;
diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp
index 1639932088..2a9169fd13 100644
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@@ -2168,7 +2168,7 @@ struct KeypointComparator
         return cmp(pts_[idx1], pts_[idx2]);
     }
 private:
-    const KeypointComparator& operator=(const KeypointComparator&); // quiet MSVC
+    KeypointComparator& operator=(const KeypointComparator&) = delete;
 };
 }//namespace
 
diff --git a/modules/video/CMakeLists.txt b/modules/video/CMakeLists.txt
index e25f0b7e0e..8499de9169 100644
--- a/modules/video/CMakeLists.txt
+++ b/modules/video/CMakeLists.txt
@@ -1,2 +1,12 @@
 set(the_description "Video Analysis")
-ocv_define_module(video opencv_imgproc OPTIONAL opencv_calib3d WRAP java objc python js)
+ocv_define_module(video
+    opencv_imgproc
+    OPTIONAL
+      opencv_calib3d
+      opencv_dnn
+    WRAP
+      java
+      objc
+      python
+      js
+)
diff --git a/modules/video/doc/video.bib b/modules/video/doc/video.bib
index 46116bb931..e78c348a46 100644
--- a/modules/video/doc/video.bib
+++ b/modules/video/doc/video.bib
@@ -1,6 +1,44 @@
+@article{AAM,
+  title={Adaptive appearance modeling for video tracking: survey and evaluation},
+  author={Salti, Samuele and Cavallaro, Andrea and Di Stefano, Luigi},
+  journal={Image Processing, IEEE Transactions on},
+  volume={21},
+  number={10},
+  pages={4334--4348},
+  year={2012},
+  publisher={IEEE}
+}
+
+@article{AMVOT,
+  title={A survey of appearance models in visual object tracking},
+  author={Li, Xi and Hu, Weiming and Shen, Chunhua and Zhang, Zhongfei and Dick, Anthony and Hengel, Anton Van Den},
+  journal={ACM Transactions on Intelligent Systems and Technology (TIST)},
+  volume={4},
+  number={4},
+  pages={58},
+  year={2013},
+  publisher={ACM}
+}
+
+@inproceedings{GOTURN,
+  title={Learning to Track at 100 FPS with Deep Regression Networks},
+  author={Held, David and Thrun, Sebastian and Savarese, Silvio},
+  booktitle={European Conference Computer Vision (ECCV)},
+  year={2016}
+}
+
 @inproceedings{Kroeger2016,
   author={Till Kroeger and Radu Timofte and Dengxin Dai and Luc Van Gool},
   title={Fast Optical Flow using Dense Inverse Search},
   booktitle={Proceedings of the European Conference on Computer Vision ({ECCV})},
-  year = {2016}
+  year={2016}
+}
+
+@inproceedings{MIL,
+  title={Visual tracking with online multiple instance learning},
+  author={Babenko, Boris and Yang, Ming-Hsuan and Belongie, Serge},
+  booktitle={Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference on},
+  pages={983--990},
+  year={2009},
+  organization={IEEE}
 }
diff --git a/modules/video/include/opencv2/video/detail/tracking.private.hpp b/modules/video/include/opencv2/video/detail/tracking.private.hpp
new file mode 100644
index 0000000000..1e6107900d
--- /dev/null
+++ b/modules/video/include/opencv2/video/detail/tracking.private.hpp
@@ -0,0 +1,406 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEO_DETAIL_TRACKING_HPP
+#define OPENCV_VIDEO_DETAIL_TRACKING_HPP
+
+/*
+ * Partially based on:
+ * ====================================================================================================================
+ *  - [AAM] S. Salti, A. Cavallaro, L. Di Stefano, Adaptive Appearance Modeling for Video Tracking: Survey and Evaluation
+ *  - [AMVOT] X. Li, W. Hu, C. Shen, Z. Zhang, A. Dick, A. van den Hengel, A Survey of Appearance Models in Visual Object Tracking
+ *
+ * This Tracking API has been designed with PlantUML. If you modify this API please change UML files under modules/tracking/doc/uml
+ *
+ */
+
+#include "opencv2/core.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+/** @addtogroup tracking_detail
+@{
+*/
+
+/************************************ TrackerFeature Base Classes ************************************/
+
+/** @brief Abstract base class for TrackerFeature that represents the feature.
+*/
+class CV_EXPORTS TrackerFeature
+{
+public:
+    virtual ~TrackerFeature();
+
+    /** @brief Compute the features in the images collection
+    @param images The images
+    @param response The output response
+    */
+    void compute(const std::vector<Mat>& images, Mat& response);
+
+protected:
+    virtual bool computeImpl(const std::vector<Mat>& images, Mat& response) = 0;
+};
+
+/** @brief Class that manages the extraction and selection of features
+
+@cite AAM Feature Extraction and Feature Set Refinement (Feature Processing and Feature Selection).
+See table I and section III C @cite AMVOT Appearance modelling -\> Visual representation (Table II,
+section 3.1 - 3.2)
+
+TrackerFeatureSet is an aggregation of TrackerFeature
+
+@sa
+   TrackerFeature
+
+*/
+class CV_EXPORTS TrackerFeatureSet
+{
+public:
+    TrackerFeatureSet();
+
+    ~TrackerFeatureSet();
+
+    /** @brief Extract features from the images collection
+    @param images The input images
+    */
+    void extraction(const std::vector<Mat>& images);
+
+    /** @brief Add TrackerFeature in the collection. Return true if TrackerFeature is added, false otherwise
+    @param feature The TrackerFeature class
+    */
+    bool addTrackerFeature(const Ptr<TrackerFeature>& feature);
+
+    /** @brief Get the TrackerFeature collection (TrackerFeature name, TrackerFeature pointer)
+    */
+    const std::vector<Ptr<TrackerFeature>>& getTrackerFeatures() const;
+
+    /** @brief Get the responses
+    @note Be sure to call extraction before getResponses Example TrackerFeatureSet::getResponses
+    */
+    const std::vector<Mat>& getResponses() const;
+
+private:
+    void clearResponses();
+    bool blockAddTrackerFeature;
+
+    std::vector<Ptr<TrackerFeature>> features;  // list of features
+    std::vector<Mat> responses;  // list of response after compute
+};
+
+/************************************ TrackerSampler Base Classes ************************************/
+
+/** @brief Abstract base class for TrackerSamplerAlgorithm that represents the algorithm for the specific
+sampler.
+*/
+class CV_EXPORTS TrackerSamplerAlgorithm
+{
+public:
+    virtual ~TrackerSamplerAlgorithm();
+
+    /** @brief Computes the regions starting from a position in an image.
+
+    Return true if samples are computed, false otherwise
+
+    @param image The current frame
+    @param boundingBox The bounding box from which regions can be calculated
+
+    @param sample The computed samples @cite AAM Fig. 1 variable Sk
+    */
+    virtual bool sampling(const Mat& image, const Rect& boundingBox, std::vector<Mat>& sample) = 0;
+};
+
+/**
+ * \brief Class that manages the sampler in order to select regions for the update the model of the tracker
+ * [AAM] Sampling e Labeling. See table I and section III B
+ */
+
+/** @brief Class that manages the sampler in order to select regions for the update the model of the tracker
+
+@cite AAM Sampling e Labeling. See table I and section III B
+
+TrackerSampler is an aggregation of TrackerSamplerAlgorithm
+@sa
+   TrackerSamplerAlgorithm
+ */
+class CV_EXPORTS TrackerSampler
+{
+public:
+    TrackerSampler();
+
+    ~TrackerSampler();
+
+    /** @brief Computes the regions starting from a position in an image
+    @param image The current frame
+    @param boundingBox The bounding box from which regions can be calculated
+    */
+    void sampling(const Mat& image, Rect boundingBox);
+
+    /** @brief Return the collection of the TrackerSamplerAlgorithm
+    */
+    const std::vector<Ptr<TrackerSamplerAlgorithm>>& getSamplers() const;
+
+    /** @brief Return the samples from all TrackerSamplerAlgorithm, @cite AAM Fig. 1 variable Sk
+    */
+    const std::vector<Mat>& getSamples() const;
+
+    /** @brief Add TrackerSamplerAlgorithm in the collection. Return true if sampler is added, false otherwise
+    @param sampler The TrackerSamplerAlgorithm
+    */
+    bool addTrackerSamplerAlgorithm(const Ptr<TrackerSamplerAlgorithm>& sampler);
+
+private:
+    std::vector<Ptr<TrackerSamplerAlgorithm>> samplers;
+    std::vector<Mat> samples;
+    bool blockAddTrackerSampler;
+
+    void clearSamples();
+};
+
+/************************************ TrackerModel Base Classes ************************************/
+
+/** @brief Abstract base class for TrackerTargetState that represents a possible state of the target.
+
+See @cite AAM \f$\hat{x}^{i}_{k}\f$ all the states candidates.
+
+Inherits this class with your Target state, In own implementation you can add scale variation,
+width, height, orientation, etc.
+*/
+class CV_EXPORTS TrackerTargetState
+{
+public:
+    virtual ~TrackerTargetState() {};
+    /** @brief Get the position
+    * @return The position
+    */
+    Point2f getTargetPosition() const;
+
+    /** @brief Set the position
+    * @param position The position
+    */
+    void setTargetPosition(const Point2f& position);
+    /** @brief Get the width of the target
+    * @return The width of the target
+    */
+    int getTargetWidth() const;
+
+    /** @brief Set the width of the target
+    * @param width The width of the target
+    */
+    void setTargetWidth(int width);
+    /** @brief Get the height of the target
+    * @return The height of the target
+    */
+    int getTargetHeight() const;
+
+    /** @brief Set the height of the target
+    * @param height The height of the target
+    */
+    void setTargetHeight(int height);
+
+protected:
+    Point2f targetPosition;
+    int targetWidth;
+    int targetHeight;
+};
+
+/** @brief Represents the model of the target at frame \f$k\f$ (all states and scores)
+
+See @cite AAM The set of the pair \f$\langle \hat{x}^{i}_{k}, C^{i}_{k} \rangle\f$
+@sa TrackerTargetState
+*/
+typedef std::vector<std::pair<Ptr<TrackerTargetState>, float>> ConfidenceMap;
+
+/** @brief Represents the estimate states for all frames
+
+@cite AAM \f$x_{k}\f$ is the trajectory of the target up to time \f$k\f$
+
+@sa TrackerTargetState
+*/
+typedef std::vector<Ptr<TrackerTargetState>> Trajectory;
+
+/** @brief Abstract base class for TrackerStateEstimator that estimates the most likely target state.
+
+See @cite AAM State estimator
+
+See @cite AMVOT Statistical modeling (Fig. 3), Table III (generative) - IV (discriminative) - V (hybrid)
+*/
+class CV_EXPORTS TrackerStateEstimator
+{
+public:
+    virtual ~TrackerStateEstimator();
+
+    /** @brief Estimate the most likely target state, return the estimated state
+    @param confidenceMaps The overall appearance model as a list of :cConfidenceMap
+    */
+    Ptr<TrackerTargetState> estimate(const std::vector<ConfidenceMap>& confidenceMaps);
+
+    /** @brief Update the ConfidenceMap with the scores
+    @param confidenceMaps The overall appearance model as a list of :cConfidenceMap
+    */
+    void update(std::vector<ConfidenceMap>& confidenceMaps);
+
+    /** @brief Create TrackerStateEstimator by tracker state estimator type
+    @param trackeStateEstimatorType The TrackerStateEstimator name
+
+    The modes available now:
+
+    -   "BOOSTING" -- Boosting-based discriminative appearance models. See @cite AMVOT section 4.4
+
+    The modes available soon:
+
+    -   "SVM" -- SVM-based discriminative appearance models. See @cite AMVOT section 4.5
+    */
+    static Ptr<TrackerStateEstimator> create(const String& trackeStateEstimatorType);
+
+    /** @brief Get the name of the specific TrackerStateEstimator
+    */
+    String getClassName() const;
+
+protected:
+    virtual Ptr<TrackerTargetState> estimateImpl(const std::vector<ConfidenceMap>& confidenceMaps) = 0;
+    virtual void updateImpl(std::vector<ConfidenceMap>& confidenceMaps) = 0;
+    String className;
+};
+
+/** @brief Abstract class that represents the model of the target.
+
+It must be instantiated by specialized tracker
+
+See @cite AAM Ak
+
+Inherits this with your TrackerModel
+*/
+class CV_EXPORTS TrackerModel
+{
+public:
+    TrackerModel();
+
+    virtual ~TrackerModel();
+
+    /** @brief Set TrackerEstimator, return true if the tracker state estimator is added, false otherwise
+    @param trackerStateEstimator The TrackerStateEstimator
+    @note You can add only one TrackerStateEstimator
+    */
+    bool setTrackerStateEstimator(Ptr<TrackerStateEstimator> trackerStateEstimator);
+
+    /** @brief Estimate the most likely target location
+
+    @cite AAM ME, Model Estimation table I
+    @param responses Features extracted from TrackerFeatureSet
+    */
+    void modelEstimation(const std::vector<Mat>& responses);
+
+    /** @brief Update the model
+
+    @cite AAM MU, Model Update table I
+    */
+    void modelUpdate();
+
+    /** @brief Run the TrackerStateEstimator, return true if is possible to estimate a new state, false otherwise
+    */
+    bool runStateEstimator();
+
+    /** @brief Set the current TrackerTargetState in the Trajectory
+    @param lastTargetState The current TrackerTargetState
+    */
+    void setLastTargetState(const Ptr<TrackerTargetState>& lastTargetState);
+
+    /** @brief Get the last TrackerTargetState from Trajectory
+    */
+    Ptr<TrackerTargetState> getLastTargetState() const;
+
+    /** @brief Get the list of the ConfidenceMap
+    */
+    const std::vector<ConfidenceMap>& getConfidenceMaps() const;
+
+    /** @brief Get the last ConfidenceMap for the current frame
+    */
+    const ConfidenceMap& getLastConfidenceMap() const;
+
+    /** @brief Get the TrackerStateEstimator
+    */
+    Ptr<TrackerStateEstimator> getTrackerStateEstimator() const;
+
+private:
+    void clearCurrentConfidenceMap();
+
+protected:
+    std::vector<ConfidenceMap> confidenceMaps;
+    Ptr<TrackerStateEstimator> stateEstimator;
+    ConfidenceMap currentConfidenceMap;
+    Trajectory trajectory;
+    int maxCMLength;
+
+    virtual void modelEstimationImpl(const std::vector<Mat>& responses) = 0;
+    virtual void modelUpdateImpl() = 0;
+};
+
+/************************************ Specific TrackerStateEstimator Classes ************************************/
+
+// None
+
+/************************************ Specific TrackerSamplerAlgorithm Classes ************************************/
+
+/** @brief TrackerSampler based on CSC (current state centered), used by MIL algorithm TrackerMIL
+ */
+class CV_EXPORTS TrackerSamplerCSC : public TrackerSamplerAlgorithm
+{
+public:
+    ~TrackerSamplerCSC();
+
+    enum MODE
+    {
+        MODE_INIT_POS = 1,  //!< mode for init positive samples
+        MODE_INIT_NEG = 2,  //!< mode for init negative samples
+        MODE_TRACK_POS = 3,  //!< mode for update positive samples
+        MODE_TRACK_NEG = 4,  //!< mode for update negative samples
+        MODE_DETECT = 5  //!< mode for detect samples
+    };
+
+    struct CV_EXPORTS Params
+    {
+        Params();
+        float initInRad;  //!< radius for gathering positive instances during init
+        float trackInPosRad;  //!< radius for gathering positive instances during tracking
+        float searchWinSize;  //!< size of search window
+        int initMaxNegNum;  //!< # negative samples to use during init
+        int trackMaxPosNum;  //!< # positive samples to use during training
+        int trackMaxNegNum;  //!< # negative samples to use during training
+    };
+
+    /** @brief Constructor
+    @param parameters TrackerSamplerCSC parameters TrackerSamplerCSC::Params
+    */
+    TrackerSamplerCSC(const TrackerSamplerCSC::Params& parameters = TrackerSamplerCSC::Params());
+
+    /** @brief Set the sampling mode of TrackerSamplerCSC
+    @param samplingMode The sampling mode
+
+    The modes are:
+
+    -   "MODE_INIT_POS = 1" -- for the positive sampling in initialization step
+    -   "MODE_INIT_NEG = 2" -- for the negative sampling in initialization step
+    -   "MODE_TRACK_POS = 3" -- for the positive sampling in update step
+    -   "MODE_TRACK_NEG = 4" -- for the negative sampling in update step
+    -   "MODE_DETECT = 5" -- for the sampling in detection step
+    */
+    void setMode(int samplingMode);
+
+    bool sampling(const Mat& image, const Rect& boundingBox, std::vector<Mat>& sample) CV_OVERRIDE;
+
+private:
+    Params params;
+    int mode;
+    RNG rng;
+
+    std::vector<Mat> sampleImage(const Mat& img, int x, int y, int w, int h, float inrad, float outrad = 0, int maxnum = 1000000);
+};
+
+//! @}
+
+}}}  // namespace cv::detail::tracking
+
+#endif  // OPENCV_VIDEO_DETAIL_TRACKING_HPP
diff --git a/modules/video/include/opencv2/video/detail/tracking_feature.private.hpp b/modules/video/include/opencv2/video/detail/tracking_feature.private.hpp
new file mode 100644
index 0000000000..659b467abc
--- /dev/null
+++ b/modules/video/include/opencv2/video/detail/tracking_feature.private.hpp
@@ -0,0 +1,168 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEO_DETAIL_TRACKING_FEATURE_HPP
+#define OPENCV_VIDEO_DETAIL_TRACKING_FEATURE_HPP
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+
+/*
+ * TODO This implementation is based on apps/traincascade/
+ * TODO Changed CvHaarEvaluator based on ADABOOSTING implementation (Grabner et al.)
+ */
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+//! @addtogroup tracking_detail
+//! @{
+
+inline namespace feature {
+
+class CvParams
+{
+public:
+    CvParams();
+    virtual ~CvParams()
+    {
+    }
+};
+
+class CvFeatureParams : public CvParams
+{
+public:
+    enum FeatureType
+    {
+        HAAR = 0,
+        LBP = 1,
+        HOG = 2
+    };
+
+    CvFeatureParams();
+    static Ptr<CvFeatureParams> create(CvFeatureParams::FeatureType featureType);
+    int maxCatCount;  // 0 in case of numerical features
+    int featSize;  // 1 in case of simple features (HAAR, LBP) and N_BINS(9)*N_CELLS(4) in case of Dalal's HOG features
+    int numFeatures;
+};
+
+class CvFeatureEvaluator
+{
+public:
+    virtual ~CvFeatureEvaluator()
+    {
+    }
+    virtual void init(const CvFeatureParams* _featureParams, int _maxSampleCount, Size _winSize);
+    virtual void setImage(const Mat& img, uchar clsLabel, int idx);
+    static Ptr<CvFeatureEvaluator> create(CvFeatureParams::FeatureType type);
+
+    int getNumFeatures() const
+    {
+        return numFeatures;
+    }
+    int getMaxCatCount() const
+    {
+        return featureParams->maxCatCount;
+    }
+    int getFeatureSize() const
+    {
+        return featureParams->featSize;
+    }
+    const Mat& getCls() const
+    {
+        return cls;
+    }
+    float getCls(int si) const
+    {
+        return cls.at<float>(si, 0);
+    }
+
+protected:
+    virtual void generateFeatures() = 0;
+
+    int npos, nneg;
+    int numFeatures;
+    Size winSize;
+    CvFeatureParams* featureParams;
+    Mat cls;
+};
+
+class CvHaarFeatureParams : public CvFeatureParams
+{
+public:
+    CvHaarFeatureParams();
+    bool isIntegral;
+};
+
+class CvHaarEvaluator : public CvFeatureEvaluator
+{
+public:
+    class FeatureHaar
+    {
+
+    public:
+        FeatureHaar(Size patchSize);
+        bool eval(const Mat& image, Rect ROI, float* result) const;
+        inline int getNumAreas() const { return m_numAreas; }
+        inline const std::vector<float>& getWeights() const { return m_weights; }
+        inline const std::vector<Rect>& getAreas() const { return m_areas; }
+
+    private:
+        int m_type;
+        int m_numAreas;
+        std::vector<float> m_weights;
+        float m_initMean;
+        float m_initSigma;
+        void generateRandomFeature(Size imageSize);
+        float getSum(const Mat& image, Rect imgROI) const;
+        std::vector<Rect> m_areas;  // areas within the patch over which to compute the feature
+        cv::Size m_initSize;  // size of the patch used during training
+        cv::Size m_curSize;  // size of the patches currently under investigation
+        float m_scaleFactorHeight;  // scaling factor in vertical direction
+        float m_scaleFactorWidth;  // scaling factor in horizontal direction
+        std::vector<Rect> m_scaleAreas;  // areas after scaling
+        std::vector<float> m_scaleWeights;  // weights after scaling
+    };
+
+    virtual void init(const CvFeatureParams* _featureParams, int _maxSampleCount, Size _winSize) CV_OVERRIDE;
+    virtual void setImage(const Mat& img, uchar clsLabel = 0, int idx = 1) CV_OVERRIDE;
+    inline const std::vector<CvHaarEvaluator::FeatureHaar>& getFeatures() const { return features; }
+    inline CvHaarEvaluator::FeatureHaar& getFeatures(int idx)
+    {
+        return features[idx];
+    }
+    inline void setWinSize(Size patchSize) { winSize = patchSize; }
+    inline Size getWinSize() const { return winSize; }
+    virtual void generateFeatures() CV_OVERRIDE;
+
+    /**
+    * \brief Overload the original generateFeatures in order to limit the number of the features
+    * @param numFeatures Number of the features
+    */
+    virtual void generateFeatures(int numFeatures);
+
+protected:
+    bool isIntegral;
+
+    /* TODO Added from MIL implementation */
+    Mat _ii_img;
+    void compute_integral(const cv::Mat& img, std::vector<cv::Mat_<float>>& ii_imgs)
+    {
+        Mat ii_img;
+        integral(img, ii_img, CV_32F);
+        split(ii_img, ii_imgs);
+    }
+
+    std::vector<FeatureHaar> features;
+    Mat sum; /* sum images (each row represents image) */
+};
+
+}  // namespace feature
+
+//! @}
+
+}}}  // namespace cv::detail::tracking
+
+#endif
diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp
index e5852eb190..b44f6855f8 100644
--- a/modules/video/include/opencv2/video/tracking.hpp
+++ b/modules/video/include/opencv2/video/tracking.hpp
@@ -705,6 +705,121 @@ public:
             double minEigThreshold = 1e-4);
 };
 
+
+
+
+/** @brief Base abstract class for the long-term tracker
+ */
+class CV_EXPORTS_W Tracker
+{
+protected:
+    Tracker();
+public:
+    virtual ~Tracker();
+
+    /** @brief Initialize the tracker with a known bounding box that surrounded the target
+    @param image The initial frame
+    @param boundingBox The initial bounding box
+    */
+    CV_WRAP virtual
+    void init(InputArray image, const Rect& boundingBox) = 0;
+
+    /** @brief Update the tracker, find the new most likely bounding box for the target
+    @param image The current frame
+    @param boundingBox The bounding box that represent the new target location, if true was returned, not
+    modified otherwise
+
+    @return True means that target was located and false means that tracker cannot locate target in
+    current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed
+    missing from the frame (say, out of sight)
+    */
+    CV_WRAP virtual
+    bool update(InputArray image, CV_OUT Rect& boundingBox) = 0;
+};
+
+
+
+/** @brief The MIL algorithm trains a classifier in an online manner to separate the object from the
+background.
+
+Multiple Instance Learning avoids the drift problem for a robust tracking. The implementation is
+based on @cite MIL .
+
+Original code can be found here <http://vision.ucsd.edu/~bbabenko/project_miltrack.shtml>
+ */
+class CV_EXPORTS_W TrackerMIL : public Tracker
+{
+protected:
+    TrackerMIL();  // use ::create()
+public:
+    virtual ~TrackerMIL() CV_OVERRIDE;
+
+    struct CV_EXPORTS_W_SIMPLE Params
+    {
+        CV_WRAP Params();
+        //parameters for sampler
+        CV_PROP_RW float samplerInitInRadius;  //!< radius for gathering positive instances during init
+        CV_PROP_RW int samplerInitMaxNegNum;  //!< # negative samples to use during init
+        CV_PROP_RW float samplerSearchWinSize;  //!< size of search window
+        CV_PROP_RW float samplerTrackInRadius;  //!< radius for gathering positive instances during tracking
+        CV_PROP_RW int samplerTrackMaxPosNum;  //!< # positive samples to use during tracking
+        CV_PROP_RW int samplerTrackMaxNegNum;  //!< # negative samples to use during tracking
+        CV_PROP_RW int featureSetNumFeatures;  //!< # features
+    };
+
+    /** @brief Create MIL tracker instance
+     *  @param parameters MIL parameters TrackerMIL::Params
+     */
+    static CV_WRAP
+    Ptr<TrackerMIL> create(const TrackerMIL::Params &parameters = TrackerMIL::Params());
+
+    //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
+    //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
+};
+
+
+
+/** @brief the GOTURN (Generic Object Tracking Using Regression Networks) tracker
+ *
+ *  GOTURN (@cite GOTURN) is kind of trackers based on Convolutional Neural Networks (CNN). While taking all advantages of CNN trackers,
+ *  GOTURN is much faster due to offline training without online fine-tuning nature.
+ *  GOTURN tracker addresses the problem of single target tracking: given a bounding box label of an object in the first frame of the video,
+ *  we track that object through the rest of the video. NOTE: Current method of GOTURN does not handle occlusions; however, it is fairly
+ *  robust to viewpoint changes, lighting changes, and deformations.
+ *  Inputs of GOTURN are two RGB patches representing Target and Search patches resized to 227x227.
+ *  Outputs of GOTURN are predicted bounding box coordinates, relative to Search patch coordinate system, in format X1,Y1,X2,Y2.
+ *  Original paper is here: <http://davheld.github.io/GOTURN/GOTURN.pdf>
+ *  As long as original authors implementation: <https://github.com/davheld/GOTURN#train-the-tracker>
+ *  Implementation of training algorithm is placed in separately here due to 3d-party dependencies:
+ *  <https://github.com/Auron-X/GOTURN_Training_Toolkit>
+ *  GOTURN architecture goturn.prototxt and trained model goturn.caffemodel are accessible on opencv_extra GitHub repository.
+ */
+class CV_EXPORTS_W TrackerGOTURN : public Tracker
+{
+protected:
+    TrackerGOTURN();  // use ::create()
+public:
+    virtual ~TrackerGOTURN() CV_OVERRIDE;
+
+    struct CV_EXPORTS_W_SIMPLE Params
+    {
+        CV_WRAP Params();
+        CV_PROP_RW std::string modelTxt;
+        CV_PROP_RW std::string modelBin;
+    };
+
+    /** @brief Constructor
+    @param parameters GOTURN parameters TrackerGOTURN::Params
+    */
+    static CV_WRAP
+    Ptr<TrackerGOTURN> create(const TrackerGOTURN::Params& parameters = TrackerGOTURN::Params());
+
+    //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
+    //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
+};
+
+
+
 //! @} video_track
 
 } // cv
diff --git a/modules/video/misc/java/test/TrackerCreateTest.java b/modules/video/misc/java/test/TrackerCreateTest.java
new file mode 100644
index 0000000000..dad696bebf
--- /dev/null
+++ b/modules/video/misc/java/test/TrackerCreateTest.java
@@ -0,0 +1,32 @@
+package org.opencv.test.video;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvException;
+import org.opencv.test.OpenCVTestCase;
+
+import org.opencv.video.Tracker;
+import org.opencv.video.TrackerGOTURN;
+import org.opencv.video.TrackerMIL;
+
+public class TrackerCreateTest extends OpenCVTestCase {
+
+    @Override
+    protected void setUp() throws Exception {
+        super.setUp();
+    }
+
+
+    public void testCreateTrackerGOTURN() {
+        try {
+            Tracker tracker = TrackerGOTURN.create();
+            assert(tracker != null);
+        } catch (CvException e) {
+            // expected, model files may be missing
+        }
+    }
+
+    public void testCreateTrackerMIL() {
+        Tracker tracker = TrackerMIL.create();
+    }
+
+}
diff --git a/modules/video/misc/python/pyopencv_video.hpp b/modules/video/misc/python/pyopencv_video.hpp
new file mode 100644
index 0000000000..761905c8bf
--- /dev/null
+++ b/modules/video/misc/python/pyopencv_video.hpp
@@ -0,0 +1,4 @@
+#ifdef HAVE_OPENCV_VIDEO
+typedef TrackerMIL::Params TrackerMIL_Params;
+typedef TrackerGOTURN::Params TrackerGOTURN_Params;
+#endif
diff --git a/modules/video/misc/python/test/test_tracking.py b/modules/video/misc/python/test/test_tracking.py
new file mode 100644
index 0000000000..40f1570d9f
--- /dev/null
+++ b/modules/video/misc/python/test/test_tracking.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+import os
+import numpy as np
+import cv2 as cv
+
+from tests_common import NewOpenCVTests, unittest
+
+class tracking_test(NewOpenCVTests):
+
+    def test_createTracker(self):
+        t = cv.TrackerMIL_create()
+        try:
+            t = cv.TrackerGOTURN_create()
+        except cv.error as e:
+            pass  # may fail due to missing DL model files
+
+
+if __name__ == '__main__':
+    NewOpenCVTests.bootstrap()
diff --git a/modules/video/perf/perf_main.cpp b/modules/video/perf/perf_main.cpp
index b6fd57a32d..1879aeff90 100644
--- a/modules/video/perf/perf_main.cpp
+++ b/modules/video/perf/perf_main.cpp
@@ -4,4 +4,19 @@
     #include <hpx/hpx_main.hpp>
 #endif
 
-CV_PERF_TEST_MAIN(video)
+static
+void initTests()
+{
+    const char* extraTestDataPath =
+#ifdef WINRT
+        NULL;
+#else
+        getenv("OPENCV_DNN_TEST_DATA_PATH");
+#endif
+    if (extraTestDataPath)
+        cvtest::addDataSearchPath(extraTestDataPath);
+
+    cvtest::addDataSearchSubDirectory("");  // override "cv" prefix below to access without "../dnn" hacks
+}
+
+CV_PERF_TEST_MAIN(video, initTests())
diff --git a/modules/video/perf/perf_trackers.cpp b/modules/video/perf/perf_trackers.cpp
new file mode 100644
index 0000000000..44f5184693
--- /dev/null
+++ b/modules/video/perf/perf_trackers.cpp
@@ -0,0 +1,104 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test { namespace {
+using namespace perf;
+
+typedef tuple<string, int, Rect> TrackingParams_t;
+
+std::vector<TrackingParams_t> getTrackingParams()
+{
+    std::vector<TrackingParams_t> params {
+        TrackingParams_t("david/data/david.webm", 300, Rect(163,62,47,56)),
+        TrackingParams_t("dudek/data/dudek.webm", 1, Rect(123,87,132,176)),
+        TrackingParams_t("faceocc2/data/faceocc2.webm", 1, Rect(118,57,82,98))
+    };
+    return params;
+}
+
+class Tracking : public perf::TestBaseWithParam<TrackingParams_t>
+{
+public:
+    template<typename ROI_t = Rect2d, typename Tracker>
+    void runTrackingTest(const Ptr<Tracker>& tracker, const TrackingParams_t& params);
+};
+
+template<typename ROI_t, typename Tracker>
+void Tracking::runTrackingTest(const Ptr<Tracker>& tracker, const TrackingParams_t& params)
+{
+    const int N = 10;
+    string video = get<0>(params);
+    int startFrame = get<1>(params);
+    //int endFrame = startFrame + N;
+    Rect boundingBox = get<2>(params);
+
+    string videoPath = findDataFile(std::string("cv/tracking/") + video);
+
+    VideoCapture c;
+    c.open(videoPath);
+    if (!c.isOpened())
+        throw SkipTestException("Can't open video file");
+#if 0
+    // c.set(CAP_PROP_POS_FRAMES, startFrame);
+#else
+    if (startFrame)
+        std::cout << "startFrame = " << startFrame << std::endl;
+    for (int i = 0; i < startFrame; i++)
+    {
+        Mat dummy_frame;
+        c >> dummy_frame;
+        ASSERT_FALSE(dummy_frame.empty()) << i << ": " << videoPath;
+    }
+#endif
+
+    // decode frames into memory (don't measure decoding performance)
+    std::vector<Mat> frames;
+    for (int i = 0; i < N; ++i)
+    {
+        Mat frame;
+        c >> frame;
+        ASSERT_FALSE(frame.empty()) << "i=" << i;
+        frames.push_back(frame);
+    }
+
+    std::cout << "frame size = " << frames[0].size() << std::endl;
+
+    PERF_SAMPLE_BEGIN();
+    {
+        tracker->init(frames[0], (ROI_t)boundingBox);
+        for (int i = 1; i < N; ++i)
+        {
+            ROI_t rc;
+            tracker->update(frames[i], rc);
+            ASSERT_FALSE(rc.empty());
+        }
+    }
+    PERF_SAMPLE_END();
+
+    SANITY_CHECK_NOTHING();
+}
+
+
+//==================================================================================================
+
+PERF_TEST_P(Tracking, MIL, testing::ValuesIn(getTrackingParams()))
+{
+    auto tracker = TrackerMIL::create();
+    runTrackingTest<Rect>(tracker, GetParam());
+}
+
+PERF_TEST_P(Tracking, GOTURN, testing::ValuesIn(getTrackingParams()))
+{
+    std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
+    std::string weights = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.caffemodel", false);
+    TrackerGOTURN::Params params;
+    params.modelTxt = model;
+    params.modelBin = weights;
+    auto tracker = TrackerGOTURN::create(params);
+    runTrackingTest<Rect>(tracker, GetParam());
+}
+
+}} // namespace
diff --git a/modules/video/src/tracking/detail/tracker_feature.cpp b/modules/video/src/tracking/detail/tracker_feature.cpp
new file mode 100644
index 0000000000..47651f6657
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_feature.cpp
@@ -0,0 +1,25 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+TrackerFeature::~TrackerFeature()
+{
+    // nothing
+}
+
+void TrackerFeature::compute(const std::vector<Mat>& images, Mat& response)
+{
+    if (images.empty())
+        return;
+
+    computeImpl(images, response);
+}
+
+}}}  // namespace cv::detail::tracking
\ No newline at end of file
diff --git a/modules/video/src/tracking/detail/tracker_feature_haar.impl.hpp b/modules/video/src/tracking/detail/tracker_feature_haar.impl.hpp
new file mode 100644
index 0000000000..6590abf34f
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_feature_haar.impl.hpp
@@ -0,0 +1,121 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+#include "opencv2/video/detail/tracking_feature.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+inline namespace internal {
+
+class TrackerFeatureHAAR : public TrackerFeature
+{
+public:
+    struct Params
+    {
+        Params();
+        int numFeatures;  //!< # of rects
+        Size rectSize;  //!< rect size
+        bool isIntegral;  //!< true if input images are integral, false otherwise
+    };
+
+    TrackerFeatureHAAR(const TrackerFeatureHAAR::Params& parameters = TrackerFeatureHAAR::Params());
+
+    virtual ~TrackerFeatureHAAR() CV_OVERRIDE {}
+
+protected:
+    bool computeImpl(const std::vector<Mat>& images, Mat& response) CV_OVERRIDE;
+
+private:
+    Params params;
+    Ptr<CvHaarEvaluator> featureEvaluator;
+};
+
+/**
+ * Parameters
+ */
+
+TrackerFeatureHAAR::Params::Params()
+{
+    numFeatures = 250;
+    rectSize = Size(100, 100);
+    isIntegral = false;
+}
+
+TrackerFeatureHAAR::TrackerFeatureHAAR(const TrackerFeatureHAAR::Params& parameters)
+    : params(parameters)
+{
+    CvHaarFeatureParams haarParams;
+    haarParams.numFeatures = params.numFeatures;
+    haarParams.isIntegral = params.isIntegral;
+    featureEvaluator = makePtr<CvHaarEvaluator>();
+    featureEvaluator->init(&haarParams, 1, params.rectSize);
+}
+
+class Parallel_compute : public cv::ParallelLoopBody
+{
+private:
+    Ptr<CvHaarEvaluator> featureEvaluator;
+    std::vector<Mat> images;
+    Mat response;
+    //std::vector<CvHaarEvaluator::FeatureHaar> features;
+public:
+    Parallel_compute(Ptr<CvHaarEvaluator>& fe, const std::vector<Mat>& img, Mat& resp)
+        : featureEvaluator(fe)
+        , images(img)
+        , response(resp)
+    {
+
+        //features = featureEvaluator->getFeatures();
+    }
+
+    virtual void operator()(const cv::Range& r) const CV_OVERRIDE
+    {
+        for (int jf = r.start; jf != r.end; ++jf)
+        {
+            int cols = images[jf].cols;
+            int rows = images[jf].rows;
+            for (int j = 0; j < featureEvaluator->getNumFeatures(); j++)
+            {
+                float res = 0;
+                featureEvaluator->getFeatures()[j].eval(images[jf], Rect(0, 0, cols, rows), &res);
+                (Mat_<float>(response))(j, jf) = res;
+            }
+        }
+    }
+};
+
+bool TrackerFeatureHAAR::computeImpl(const std::vector<Mat>& images, Mat& response)
+{
+    if (images.empty())
+    {
+        return false;
+    }
+
+    int numFeatures = featureEvaluator->getNumFeatures();
+
+    response = Mat_<float>(Size((int)images.size(), numFeatures));
+
+    std::vector<CvHaarEvaluator::FeatureHaar> f = featureEvaluator->getFeatures();
+    //for each sample compute #n_feature -> put each feature (n Rect) in response
+    parallel_for_(Range(0, (int)images.size()), Parallel_compute(featureEvaluator, images, response));
+
+    /*for ( size_t i = 0; i < images.size(); i++ )
+  {
+    int c = images[i].cols;
+    int r = images[i].rows;
+    for ( int j = 0; j < numFeatures; j++ )
+    {
+      float res = 0;
+      featureEvaluator->getFeatures( j ).eval( images[i], Rect( 0, 0, c, r ), &res );
+      ( Mat_<float>( response ) )( j, i ) = res;
+    }
+  }*/
+
+    return true;
+}
+
+}}}}  // namespace cv::detail::tracking::internal
diff --git a/modules/video/src/tracking/detail/tracker_feature_set.cpp b/modules/video/src/tracking/detail/tracker_feature_set.cpp
new file mode 100644
index 0000000000..43f3203c52
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_feature_set.cpp
@@ -0,0 +1,60 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+TrackerFeatureSet::TrackerFeatureSet()
+{
+    blockAddTrackerFeature = false;
+}
+
+TrackerFeatureSet::~TrackerFeatureSet()
+{
+    // nothing
+}
+
+void TrackerFeatureSet::extraction(const std::vector<Mat>& images)
+{
+    blockAddTrackerFeature = true;
+
+    clearResponses();
+    responses.resize(features.size());
+
+    for (size_t i = 0; i < features.size(); i++)
+    {
+        CV_DbgAssert(features[i]);
+        features[i]->compute(images, responses[i]);
+    }
+}
+
+bool TrackerFeatureSet::addTrackerFeature(const Ptr<TrackerFeature>& feature)
+{
+    CV_Assert(!blockAddTrackerFeature);
+    CV_Assert(feature);
+
+    features.push_back(feature);
+    return true;
+}
+
+const std::vector<Ptr<TrackerFeature>>& TrackerFeatureSet::getTrackerFeatures() const
+{
+    return features;
+}
+
+const std::vector<Mat>& TrackerFeatureSet::getResponses() const
+{
+    return responses;
+}
+
+void TrackerFeatureSet::clearResponses()
+{
+    responses.clear();
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracker_mil_model.cpp b/modules/video/src/tracking/detail/tracker_mil_model.cpp
new file mode 100644
index 0000000000..8769d66c09
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_mil_model.cpp
@@ -0,0 +1,85 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "tracker_mil_model.hpp"
+
+/**
+ * TrackerMILModel
+ */
+
+namespace cv {
+inline namespace tracking {
+namespace impl {
+
+TrackerMILModel::TrackerMILModel(const Rect& boundingBox)
+{
+    currentSample.clear();
+    mode = MODE_POSITIVE;
+    width = boundingBox.width;
+    height = boundingBox.height;
+
+    Ptr<TrackerStateEstimatorMILBoosting::TrackerMILTargetState> initState = Ptr<TrackerStateEstimatorMILBoosting::TrackerMILTargetState>(
+            new TrackerStateEstimatorMILBoosting::TrackerMILTargetState(Point2f((float)boundingBox.x, (float)boundingBox.y), boundingBox.width, boundingBox.height,
+                    true, Mat()));
+    trajectory.push_back(initState);
+}
+
+void TrackerMILModel::responseToConfidenceMap(const std::vector<Mat>& responses, ConfidenceMap& confidenceMap)
+{
+    if (currentSample.empty())
+    {
+        CV_Error(-1, "The samples in Model estimation are empty");
+    }
+
+    for (size_t i = 0; i < responses.size(); i++)
+    {
+        //for each column (one sample) there are #num_feature
+        //get informations from currentSample
+        for (int j = 0; j < responses.at(i).cols; j++)
+        {
+
+            Size currentSize;
+            Point currentOfs;
+            currentSample.at(j).locateROI(currentSize, currentOfs);
+            bool foreground = false;
+            if (mode == MODE_POSITIVE || mode == MODE_ESTIMATON)
+            {
+                foreground = true;
+            }
+            else if (mode == MODE_NEGATIVE)
+            {
+                foreground = false;
+            }
+
+            //get the column of the HAAR responses
+            Mat singleResponse = responses.at(i).col(j);
+
+            //create the state
+            Ptr<TrackerStateEstimatorMILBoosting::TrackerMILTargetState> currentState = Ptr<TrackerStateEstimatorMILBoosting::TrackerMILTargetState>(
+                    new TrackerStateEstimatorMILBoosting::TrackerMILTargetState(currentOfs, width, height, foreground, singleResponse));
+
+            confidenceMap.push_back(std::make_pair(currentState, 0.0f));
+        }
+    }
+}
+
+void TrackerMILModel::modelEstimationImpl(const std::vector<Mat>& responses)
+{
+    responseToConfidenceMap(responses, currentConfidenceMap);
+}
+
+void TrackerMILModel::modelUpdateImpl()
+{
+}
+
+void TrackerMILModel::setMode(int trainingMode, const std::vector<Mat>& samples)
+{
+    currentSample.clear();
+    currentSample = samples;
+
+    mode = trainingMode;
+}
+
+}}}  // namespace cv::tracking::impl
diff --git a/modules/video/src/tracking/detail/tracker_mil_model.hpp b/modules/video/src/tracking/detail/tracker_mil_model.hpp
new file mode 100644
index 0000000000..04d9176298
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_mil_model.hpp
@@ -0,0 +1,67 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_TRACKER_MIL_MODEL_HPP__
+#define __OPENCV_TRACKER_MIL_MODEL_HPP__
+
+#include "opencv2/video/detail/tracking.private.hpp"
+#include "tracker_mil_state.hpp"
+
+namespace cv {
+inline namespace tracking {
+namespace impl {
+
+using namespace cv::detail::tracking;
+
+/**
+ * \brief Implementation of TrackerModel for MIL algorithm
+ */
+class TrackerMILModel : public detail::TrackerModel
+{
+public:
+    enum
+    {
+        MODE_POSITIVE = 1,  // mode for positive features
+        MODE_NEGATIVE = 2,  // mode for negative features
+        MODE_ESTIMATON = 3  // mode for estimation step
+    };
+
+    /**
+   * \brief Constructor
+   * \param boundingBox The first boundingBox
+   */
+    TrackerMILModel(const Rect& boundingBox);
+
+    /**
+   * \brief Destructor
+   */
+    ~TrackerMILModel() {};
+
+    /**
+   * \brief Set the mode
+   */
+    void setMode(int trainingMode, const std::vector<Mat>& samples);
+
+    /**
+   * \brief Create the ConfidenceMap from a list of responses
+   * \param responses The list of the responses
+   * \param confidenceMap The output
+   */
+    void responseToConfidenceMap(const std::vector<Mat>& responses, ConfidenceMap& confidenceMap);
+
+protected:
+    void modelEstimationImpl(const std::vector<Mat>& responses) CV_OVERRIDE;
+    void modelUpdateImpl() CV_OVERRIDE;
+
+private:
+    int mode;
+    std::vector<Mat> currentSample;
+
+    int width;  //initial width of the boundingBox
+    int height;  //initial height of the boundingBox
+};
+
+}}}  // namespace cv::tracking::impl
+
+#endif
diff --git a/modules/video/src/tracking/detail/tracker_mil_state.cpp b/modules/video/src/tracking/detail/tracker_mil_state.cpp
new file mode 100644
index 0000000000..63591382b0
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_mil_state.cpp
@@ -0,0 +1,159 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+#include "tracker_mil_state.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+/**
+ * TrackerStateEstimatorMILBoosting::TrackerMILTargetState
+ */
+TrackerStateEstimatorMILBoosting::TrackerMILTargetState::TrackerMILTargetState(const Point2f& position, int width, int height, bool foreground,
+        const Mat& features)
+{
+    setTargetPosition(position);
+    setTargetWidth(width);
+    setTargetHeight(height);
+    setTargetFg(foreground);
+    setFeatures(features);
+}
+
+void TrackerStateEstimatorMILBoosting::TrackerMILTargetState::setTargetFg(bool foreground)
+{
+    isTarget = foreground;
+}
+
+void TrackerStateEstimatorMILBoosting::TrackerMILTargetState::setFeatures(const Mat& features)
+{
+    targetFeatures = features;
+}
+
+bool TrackerStateEstimatorMILBoosting::TrackerMILTargetState::isTargetFg() const
+{
+    return isTarget;
+}
+
+Mat TrackerStateEstimatorMILBoosting::TrackerMILTargetState::getFeatures() const
+{
+    return targetFeatures;
+}
+
+TrackerStateEstimatorMILBoosting::TrackerStateEstimatorMILBoosting(int nFeatures)
+{
+    className = "BOOSTING";
+    trained = false;
+    numFeatures = nFeatures;
+}
+
+TrackerStateEstimatorMILBoosting::~TrackerStateEstimatorMILBoosting()
+{
+}
+
+void TrackerStateEstimatorMILBoosting::setCurrentConfidenceMap(ConfidenceMap& confidenceMap)
+{
+    currentConfidenceMap.clear();
+    currentConfidenceMap = confidenceMap;
+}
+
+uint TrackerStateEstimatorMILBoosting::max_idx(const std::vector<float>& v)
+{
+    const float* findPtr = &(*std::max_element(v.begin(), v.end()));
+    const float* beginPtr = &(*v.begin());
+    return (uint)(findPtr - beginPtr);
+}
+
+Ptr<TrackerTargetState> TrackerStateEstimatorMILBoosting::estimateImpl(const std::vector<ConfidenceMap>& /*confidenceMaps*/)
+{
+    //run ClfMilBoost classify in order to compute next location
+    if (currentConfidenceMap.empty())
+        return Ptr<TrackerTargetState>();
+
+    Mat positiveStates;
+    Mat negativeStates;
+
+    prepareData(currentConfidenceMap, positiveStates, negativeStates);
+
+    std::vector<float> prob = boostMILModel.classify(positiveStates);
+
+    int bestind = max_idx(prob);
+    //float resp = prob[bestind];
+
+    return currentConfidenceMap.at(bestind).first;
+}
+
+void TrackerStateEstimatorMILBoosting::prepareData(const ConfidenceMap& confidenceMap, Mat& positive, Mat& negative)
+{
+
+    int posCounter = 0;
+    int negCounter = 0;
+
+    for (size_t i = 0; i < confidenceMap.size(); i++)
+    {
+        Ptr<TrackerMILTargetState> currentTargetState = confidenceMap.at(i).first.staticCast<TrackerMILTargetState>();
+        CV_DbgAssert(currentTargetState);
+        if (currentTargetState->isTargetFg())
+            posCounter++;
+        else
+            negCounter++;
+    }
+
+    positive.create(posCounter, numFeatures, CV_32FC1);
+    negative.create(negCounter, numFeatures, CV_32FC1);
+
+    //TODO change with mat fast access
+    //initialize trainData (positive and negative)
+
+    int pc = 0;
+    int nc = 0;
+    for (size_t i = 0; i < confidenceMap.size(); i++)
+    {
+        Ptr<TrackerMILTargetState> currentTargetState = confidenceMap.at(i).first.staticCast<TrackerMILTargetState>();
+        Mat stateFeatures = currentTargetState->getFeatures();
+
+        if (currentTargetState->isTargetFg())
+        {
+            for (int j = 0; j < stateFeatures.rows; j++)
+            {
+                //fill the positive trainData with the value of the feature j for sample i
+                positive.at<float>(pc, j) = stateFeatures.at<float>(j, 0);
+            }
+            pc++;
+        }
+        else
+        {
+            for (int j = 0; j < stateFeatures.rows; j++)
+            {
+                //fill the negative trainData with the value of the feature j for sample i
+                negative.at<float>(nc, j) = stateFeatures.at<float>(j, 0);
+            }
+            nc++;
+        }
+    }
+}
+
+void TrackerStateEstimatorMILBoosting::updateImpl(std::vector<ConfidenceMap>& confidenceMaps)
+{
+
+    if (!trained)
+    {
+        //this is the first time that the classifier is built
+        //init MIL
+        boostMILModel.init();
+        trained = true;
+    }
+
+    ConfidenceMap lastConfidenceMap = confidenceMaps.back();
+    Mat positiveStates;
+    Mat negativeStates;
+
+    prepareData(lastConfidenceMap, positiveStates, negativeStates);
+    //update MIL
+    boostMILModel.update(positiveStates, negativeStates);
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracker_mil_state.hpp b/modules/video/src/tracking/detail/tracker_mil_state.hpp
new file mode 100644
index 0000000000..e78b19dec2
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_mil_state.hpp
@@ -0,0 +1,87 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEO_DETAIL_TRACKING_MIL_STATE_HPP
+#define OPENCV_VIDEO_DETAIL_TRACKING_MIL_STATE_HPP
+
+#include "opencv2/video/detail/tracking.private.hpp"
+#include "tracking_online_mil.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+/** @brief TrackerStateEstimator based on Boosting
+*/
+class CV_EXPORTS TrackerStateEstimatorMILBoosting : public TrackerStateEstimator
+{
+public:
+    /**
+    * Implementation of the target state for TrackerStateEstimatorMILBoosting
+    */
+    class TrackerMILTargetState : public TrackerTargetState
+    {
+
+    public:
+        /**
+        * \brief Constructor
+        * \param position Top left corner of the bounding box
+        * \param width Width of the bounding box
+        * \param height Height of the bounding box
+        * \param foreground label for target or background
+        * \param features features extracted
+        */
+        TrackerMILTargetState(const Point2f& position, int width, int height, bool foreground, const Mat& features);
+
+        ~TrackerMILTargetState() {};
+
+        /** @brief Set label: true for target foreground, false for background
+        @param foreground Label for background/foreground
+        */
+        void setTargetFg(bool foreground);
+        /** @brief Set the features extracted from TrackerFeatureSet
+        @param features The features extracted
+        */
+        void setFeatures(const Mat& features);
+        /** @brief Get the label. Return true for target foreground, false for background
+        */
+        bool isTargetFg() const;
+        /** @brief Get the features extracted
+        */
+        Mat getFeatures() const;
+
+    private:
+        bool isTarget;
+        Mat targetFeatures;
+    };
+
+    /** @brief Constructor
+    @param nFeatures Number of features for each sample
+    */
+    TrackerStateEstimatorMILBoosting(int nFeatures = 250);
+    ~TrackerStateEstimatorMILBoosting();
+
+    /** @brief Set the current confidenceMap
+    @param confidenceMap The current :cConfidenceMap
+    */
+    void setCurrentConfidenceMap(ConfidenceMap& confidenceMap);
+
+protected:
+    Ptr<TrackerTargetState> estimateImpl(const std::vector<ConfidenceMap>& confidenceMaps) CV_OVERRIDE;
+    void updateImpl(std::vector<ConfidenceMap>& confidenceMaps) CV_OVERRIDE;
+
+private:
+    uint max_idx(const std::vector<float>& v);
+    void prepareData(const ConfidenceMap& confidenceMap, Mat& positive, Mat& negative);
+
+    ClfMilBoost boostMILModel;
+    bool trained;
+    int numFeatures;
+
+    ConfidenceMap currentConfidenceMap;
+};
+
+}}}  // namespace cv::detail::tracking
+
+#endif
diff --git a/modules/video/src/tracking/detail/tracker_model.cpp b/modules/video/src/tracking/detail/tracker_model.cpp
new file mode 100644
index 0000000000..c9ea424aaf
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_model.cpp
@@ -0,0 +1,132 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+TrackerModel::TrackerModel()
+{
+    stateEstimator = Ptr<TrackerStateEstimator>();
+    maxCMLength = 10;
+}
+
+TrackerModel::~TrackerModel()
+{
+    // nothing
+}
+
+bool TrackerModel::setTrackerStateEstimator(Ptr<TrackerStateEstimator> trackerStateEstimator)
+{
+    if (stateEstimator.get())
+    {
+        return false;
+    }
+
+    stateEstimator = trackerStateEstimator;
+    return true;
+}
+
+Ptr<TrackerStateEstimator> TrackerModel::getTrackerStateEstimator() const
+{
+    return stateEstimator;
+}
+
+void TrackerModel::modelEstimation(const std::vector<Mat>& responses)
+{
+    modelEstimationImpl(responses);
+}
+
+void TrackerModel::clearCurrentConfidenceMap()
+{
+    currentConfidenceMap.clear();
+}
+
+void TrackerModel::modelUpdate()
+{
+    modelUpdateImpl();
+
+    if (maxCMLength != -1 && (int)confidenceMaps.size() >= maxCMLength - 1)
+    {
+        int l = maxCMLength / 2;
+        confidenceMaps.erase(confidenceMaps.begin(), confidenceMaps.begin() + l);
+    }
+    if (maxCMLength != -1 && (int)trajectory.size() >= maxCMLength - 1)
+    {
+        int l = maxCMLength / 2;
+        trajectory.erase(trajectory.begin(), trajectory.begin() + l);
+    }
+    confidenceMaps.push_back(currentConfidenceMap);
+    stateEstimator->update(confidenceMaps);
+
+    clearCurrentConfidenceMap();
+}
+
+bool TrackerModel::runStateEstimator()
+{
+    if (!stateEstimator)
+    {
+        CV_Error(-1, "Tracker state estimator is not setted");
+    }
+    Ptr<TrackerTargetState> targetState = stateEstimator->estimate(confidenceMaps);
+    if (!targetState)
+        return false;
+
+    setLastTargetState(targetState);
+    return true;
+}
+
+void TrackerModel::setLastTargetState(const Ptr<TrackerTargetState>& lastTargetState)
+{
+    trajectory.push_back(lastTargetState);
+}
+
+Ptr<TrackerTargetState> TrackerModel::getLastTargetState() const
+{
+    return trajectory.back();
+}
+
+const std::vector<ConfidenceMap>& TrackerModel::getConfidenceMaps() const
+{
+    return confidenceMaps;
+}
+
+const ConfidenceMap& TrackerModel::getLastConfidenceMap() const
+{
+    return confidenceMaps.back();
+}
+
+Point2f TrackerTargetState::getTargetPosition() const
+{
+    return targetPosition;
+}
+
+void TrackerTargetState::setTargetPosition(const Point2f& position)
+{
+    targetPosition = position;
+}
+
+int TrackerTargetState::getTargetWidth() const
+{
+    return targetWidth;
+}
+
+void TrackerTargetState::setTargetWidth(int width)
+{
+    targetWidth = width;
+}
+int TrackerTargetState::getTargetHeight() const
+{
+    return targetHeight;
+}
+
+void TrackerTargetState::setTargetHeight(int height)
+{
+    targetHeight = height;
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracker_sampler.cpp b/modules/video/src/tracking/detail/tracker_sampler.cpp
new file mode 100644
index 0000000000..ec11656958
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_sampler.cpp
@@ -0,0 +1,68 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+
+#include "opencv2/video/detail/tracking.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+TrackerSampler::TrackerSampler()
+{
+    blockAddTrackerSampler = false;
+}
+
+TrackerSampler::~TrackerSampler()
+{
+    // nothing
+}
+
+void TrackerSampler::sampling(const Mat& image, Rect boundingBox)
+{
+    clearSamples();
+
+    for (size_t i = 0; i < samplers.size(); i++)
+    {
+        CV_DbgAssert(samplers[i]);
+        std::vector<Mat> current_samples;
+        samplers[i]->sampling(image, boundingBox, current_samples);
+
+        //push in samples all current_samples
+        for (size_t j = 0; j < current_samples.size(); j++)
+        {
+            std::vector<Mat>::iterator it = samples.end();
+            samples.insert(it, current_samples.at(j));
+        }
+    }
+
+    blockAddTrackerSampler = true;
+}
+
+bool TrackerSampler::addTrackerSamplerAlgorithm(const Ptr<TrackerSamplerAlgorithm>& sampler)
+{
+    CV_Assert(!blockAddTrackerSampler);
+    CV_Assert(sampler);
+
+    samplers.push_back(sampler);
+    return true;
+}
+
+const std::vector<Ptr<TrackerSamplerAlgorithm>>& TrackerSampler::getSamplers() const
+{
+    return samplers;
+}
+
+const std::vector<Mat>& TrackerSampler::getSamples() const
+{
+    return samples;
+}
+
+void TrackerSampler::clearSamples()
+{
+    samples.clear();
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracker_sampler_algorithm.cpp b/modules/video/src/tracking/detail/tracker_sampler_algorithm.cpp
new file mode 100644
index 0000000000..b5eb285e1a
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_sampler_algorithm.cpp
@@ -0,0 +1,124 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+TrackerSamplerAlgorithm::~TrackerSamplerAlgorithm()
+{
+    // nothing
+}
+
+TrackerSamplerCSC::Params::Params()
+{
+    initInRad = 3;
+    initMaxNegNum = 65;
+    searchWinSize = 25;
+    trackInPosRad = 4;
+    trackMaxNegNum = 65;
+    trackMaxPosNum = 100000;
+}
+
+TrackerSamplerCSC::TrackerSamplerCSC(const TrackerSamplerCSC::Params& parameters)
+    : params(parameters)
+{
+    mode = MODE_INIT_POS;
+    rng = theRNG();
+}
+
+TrackerSamplerCSC::~TrackerSamplerCSC()
+{
+    // nothing
+}
+
+bool TrackerSamplerCSC::sampling(const Mat& image, const Rect& boundingBox, std::vector<Mat>& sample)
+{
+    CV_Assert(!image.empty());
+
+    float inrad = 0;
+    float outrad = 0;
+    int maxnum = 0;
+
+    switch (mode)
+    {
+    case MODE_INIT_POS:
+        inrad = params.initInRad;
+        sample = sampleImage(image, boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height, inrad);
+        break;
+    case MODE_INIT_NEG:
+        inrad = 2.0f * params.searchWinSize;
+        outrad = 1.5f * params.initInRad;
+        maxnum = params.initMaxNegNum;
+        sample = sampleImage(image, boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height, inrad, outrad, maxnum);
+        break;
+    case MODE_TRACK_POS:
+        inrad = params.trackInPosRad;
+        outrad = 0;
+        maxnum = params.trackMaxPosNum;
+        sample = sampleImage(image, boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height, inrad, outrad, maxnum);
+        break;
+    case MODE_TRACK_NEG:
+        inrad = 1.5f * params.searchWinSize;
+        outrad = params.trackInPosRad + 5;
+        maxnum = params.trackMaxNegNum;
+        sample = sampleImage(image, boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height, inrad, outrad, maxnum);
+        break;
+    case MODE_DETECT:
+        inrad = params.searchWinSize;
+        sample = sampleImage(image, boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height, inrad);
+        break;
+    default:
+        inrad = params.initInRad;
+        sample = sampleImage(image, boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height, inrad);
+        break;
+    }
+    return false;
+}
+
+void TrackerSamplerCSC::setMode(int samplingMode)
+{
+    mode = samplingMode;
+}
+
+std::vector<Mat> TrackerSamplerCSC::sampleImage(const Mat& img, int x, int y, int w, int h, float inrad, float outrad, int maxnum)
+{
+    int rowsz = img.rows - h - 1;
+    int colsz = img.cols - w - 1;
+    float inradsq = inrad * inrad;
+    float outradsq = outrad * outrad;
+    int dist;
+
+    uint minrow = max(0, (int)y - (int)inrad);
+    uint maxrow = min((int)rowsz - 1, (int)y + (int)inrad);
+    uint mincol = max(0, (int)x - (int)inrad);
+    uint maxcol = min((int)colsz - 1, (int)x + (int)inrad);
+
+    //fprintf(stderr,"inrad=%f minrow=%d maxrow=%d mincol=%d maxcol=%d\n",inrad,minrow,maxrow,mincol,maxcol);
+
+    std::vector<Mat> samples;
+    samples.resize((maxrow - minrow + 1) * (maxcol - mincol + 1));
+    int i = 0;
+
+    float prob = ((float)(maxnum)) / samples.size();
+
+    for (int r = minrow; r <= int(maxrow); r++)
+        for (int c = mincol; c <= int(maxcol); c++)
+        {
+            dist = (y - r) * (y - r) + (x - c) * (x - c);
+            if (float(rng.uniform(0.f, 1.f)) < prob && dist < inradsq && dist >= outradsq)
+            {
+                samples[i] = img(Rect(c, r, w, h));
+                i++;
+            }
+        }
+
+    samples.resize(min(i, maxnum));
+    return samples;
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracker_state_estimator.cpp b/modules/video/src/tracking/detail/tracker_state_estimator.cpp
new file mode 100644
index 0000000000..2410b5b076
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracker_state_estimator.cpp
@@ -0,0 +1,37 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+TrackerStateEstimator::~TrackerStateEstimator()
+{
+}
+
+Ptr<TrackerTargetState> TrackerStateEstimator::estimate(const std::vector<ConfidenceMap>& confidenceMaps)
+{
+    if (confidenceMaps.empty())
+        return Ptr<TrackerTargetState>();
+
+    return estimateImpl(confidenceMaps);
+}
+
+void TrackerStateEstimator::update(std::vector<ConfidenceMap>& confidenceMaps)
+{
+    if (confidenceMaps.empty())
+        return;
+
+    return updateImpl(confidenceMaps);
+}
+
+String TrackerStateEstimator::getClassName() const
+{
+    return className;
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracking_feature.cpp b/modules/video/src/tracking/detail/tracking_feature.cpp
new file mode 100644
index 0000000000..1850995fee
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracking_feature.cpp
@@ -0,0 +1,582 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "opencv2/video/detail/tracking.private.hpp"
+#include "opencv2/video/detail/tracking_feature.private.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+/*
+ * TODO This implementation is based on apps/traincascade/
+ * TODO Changed CvHaarEvaluator based on ADABOOSTING implementation (Grabner et al.)
+ */
+
+CvParams::CvParams()
+{
+    // nothing
+}
+
+//---------------------------- FeatureParams --------------------------------------
+
+CvFeatureParams::CvFeatureParams()
+    : maxCatCount(0)
+    , featSize(1)
+    , numFeatures(1)
+{
+    // nothing
+}
+
+//------------------------------------- FeatureEvaluator ---------------------------------------
+
+void CvFeatureEvaluator::init(const CvFeatureParams* _featureParams, int _maxSampleCount, Size _winSize)
+{
+    CV_Assert(_featureParams);
+    CV_Assert(_maxSampleCount > 0);
+    featureParams = (CvFeatureParams*)_featureParams;
+    winSize = _winSize;
+    numFeatures = _featureParams->numFeatures;
+    cls.create((int)_maxSampleCount, 1, CV_32FC1);
+    generateFeatures();
+}
+
+void CvFeatureEvaluator::setImage(const Mat& img, uchar clsLabel, int idx)
+{
+    winSize.width = img.cols;
+    winSize.height = img.rows;
+    //CV_Assert( img.cols == winSize.width );
+    //CV_Assert( img.rows == winSize.height );
+    CV_Assert(idx < cls.rows);
+    cls.ptr<float>(idx)[0] = clsLabel;
+}
+
+CvHaarFeatureParams::CvHaarFeatureParams()
+{
+    isIntegral = false;
+}
+
+//--------------------- HaarFeatureEvaluator ----------------
+
+void CvHaarEvaluator::init(const CvFeatureParams* _featureParams, int /*_maxSampleCount*/, Size _winSize)
+{
+    CV_Assert(_featureParams);
+    int cols = (_winSize.width + 1) * (_winSize.height + 1);
+    sum.create((int)1, cols, CV_32SC1);
+    isIntegral = ((CvHaarFeatureParams*)_featureParams)->isIntegral;
+    CvFeatureEvaluator::init(_featureParams, 1, _winSize);
+}
+
+void CvHaarEvaluator::setImage(const Mat& img, uchar /*clsLabel*/, int /*idx*/)
+{
+    CV_DbgAssert(!sum.empty());
+
+    winSize.width = img.cols;
+    winSize.height = img.rows;
+
+    CvFeatureEvaluator::setImage(img, 1, 0);
+    if (!isIntegral)
+    {
+        std::vector<Mat_<float>> ii_imgs;
+        compute_integral(img, ii_imgs);
+        _ii_img = ii_imgs[0];
+    }
+    else
+    {
+        _ii_img = img;
+    }
+}
+
+void CvHaarEvaluator::generateFeatures()
+{
+    generateFeatures(featureParams->numFeatures);
+}
+
+void CvHaarEvaluator::generateFeatures(int nFeatures)
+{
+    for (int i = 0; i < nFeatures; i++)
+    {
+        CvHaarEvaluator::FeatureHaar feature(Size(winSize.width, winSize.height));
+        features.push_back(feature);
+    }
+}
+
+#define INITSIGMA(numAreas) (static_cast<float>(sqrt(256.0f * 256.0f / 12.0f * (numAreas))));
+
+CvHaarEvaluator::FeatureHaar::FeatureHaar(Size patchSize)
+{
+    try
+    {
+        generateRandomFeature(patchSize);
+    }
+    catch (...)
+    {
+        // FIXIT
+        throw;
+    }
+}
+
+void CvHaarEvaluator::FeatureHaar::generateRandomFeature(Size patchSize)
+{
+    cv::Point2i position;
+    Size baseDim;
+    Size sizeFactor;
+    int area;
+
+    CV_Assert(!patchSize.empty());
+
+    //Size minSize = Size( 3, 3 );
+    int minArea = 9;
+
+    bool valid = false;
+    while (!valid)
+    {
+        //choose position and scale
+        position.y = rand() % (patchSize.height);
+        position.x = rand() % (patchSize.width);
+
+        baseDim.width = (int)((1 - sqrt(1 - (float)rand() * (float)(1.0 / RAND_MAX))) * patchSize.width);
+        baseDim.height = (int)((1 - sqrt(1 - (float)rand() * (float)(1.0 / RAND_MAX))) * patchSize.height);
+
+        //select types
+        //float probType[11] = {0.0909f, 0.0909f, 0.0909f, 0.0909f, 0.0909f, 0.0909f, 0.0909f, 0.0909f, 0.0909f, 0.0909f, 0.0950f};
+        float probType[11] = { 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
+        float prob = (float)rand() * (float)(1.0 / RAND_MAX);
+
+        if (prob < probType[0])
+        {
+            //check if feature is valid
+            sizeFactor.height = 2;
+            sizeFactor.width = 1;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 1;
+            m_numAreas = 2;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x;
+            m_areas[1].y = position.y + baseDim.height;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1])
+        {
+            //check if feature is valid
+            sizeFactor.height = 1;
+            sizeFactor.width = 2;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 2;
+            m_numAreas = 2;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x + baseDim.width;
+            m_areas[1].y = position.y;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1] + probType[2])
+        {
+            //check if feature is valid
+            sizeFactor.height = 4;
+            sizeFactor.width = 1;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 3;
+            m_numAreas = 3;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -2;
+            m_weights[2] = 1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x;
+            m_areas[1].y = position.y + baseDim.height;
+            m_areas[1].height = 2 * baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_areas[2].y = position.y + 3 * baseDim.height;
+            m_areas[2].x = position.x;
+            m_areas[2].height = baseDim.height;
+            m_areas[2].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1] + probType[2] + probType[3])
+        {
+            //check if feature is valid
+            sizeFactor.height = 1;
+            sizeFactor.width = 4;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 3;
+            m_numAreas = 3;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -2;
+            m_weights[2] = 1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x + baseDim.width;
+            m_areas[1].y = position.y;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = 2 * baseDim.width;
+            m_areas[2].y = position.y;
+            m_areas[2].x = position.x + 3 * baseDim.width;
+            m_areas[2].height = baseDim.height;
+            m_areas[2].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1] + probType[2] + probType[3] + probType[4])
+        {
+            //check if feature is valid
+            sizeFactor.height = 2;
+            sizeFactor.width = 2;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 5;
+            m_numAreas = 4;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -1;
+            m_weights[2] = -1;
+            m_weights[3] = 1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x + baseDim.width;
+            m_areas[1].y = position.y;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_areas[2].y = position.y + baseDim.height;
+            m_areas[2].x = position.x;
+            m_areas[2].height = baseDim.height;
+            m_areas[2].width = baseDim.width;
+            m_areas[3].y = position.y + baseDim.height;
+            m_areas[3].x = position.x + baseDim.width;
+            m_areas[3].height = baseDim.height;
+            m_areas[3].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1] + probType[2] + probType[3] + probType[4] + probType[5])
+        {
+            //check if feature is valid
+            sizeFactor.height = 3;
+            sizeFactor.width = 3;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 6;
+            m_numAreas = 2;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -9;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = 3 * baseDim.height;
+            m_areas[0].width = 3 * baseDim.width;
+            m_areas[1].x = position.x + baseDim.width;
+            m_areas[1].y = position.y + baseDim.height;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_initMean = -8 * 128;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1] + probType[2] + probType[3] + probType[4] + probType[5] + probType[6])
+        {
+            //check if feature is valid
+            sizeFactor.height = 3;
+            sizeFactor.width = 1;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 7;
+            m_numAreas = 3;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -2;
+            m_weights[2] = 1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x;
+            m_areas[1].y = position.y + baseDim.height;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_areas[2].y = position.y + baseDim.height * 2;
+            m_areas[2].x = position.x;
+            m_areas[2].height = baseDim.height;
+            m_areas[2].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1] + probType[2] + probType[3] + probType[4] + probType[5] + probType[6] + probType[7])
+        {
+            //check if feature is valid
+            sizeFactor.height = 1;
+            sizeFactor.width = 3;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+
+            if (area < minArea)
+                continue;
+
+            m_type = 8;
+            m_numAreas = 3;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -2;
+            m_weights[2] = 1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x + baseDim.width;
+            m_areas[1].y = position.y;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_areas[2].y = position.y;
+            m_areas[2].x = position.x + 2 * baseDim.width;
+            m_areas[2].height = baseDim.height;
+            m_areas[2].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob < probType[0] + probType[1] + probType[2] + probType[3] + probType[4] + probType[5] + probType[6] + probType[7] + probType[8])
+        {
+            //check if feature is valid
+            sizeFactor.height = 3;
+            sizeFactor.width = 3;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 9;
+            m_numAreas = 2;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -2;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = 3 * baseDim.height;
+            m_areas[0].width = 3 * baseDim.width;
+            m_areas[1].x = position.x + baseDim.width;
+            m_areas[1].y = position.y + baseDim.height;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_initMean = 0;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob
+                < probType[0] + probType[1] + probType[2] + probType[3] + probType[4] + probType[5] + probType[6] + probType[7] + probType[8] + probType[9])
+        {
+            //check if feature is valid
+            sizeFactor.height = 3;
+            sizeFactor.width = 1;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 10;
+            m_numAreas = 3;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -1;
+            m_weights[2] = 1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x;
+            m_areas[1].y = position.y + baseDim.height;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_areas[2].y = position.y + baseDim.height * 2;
+            m_areas[2].x = position.x;
+            m_areas[2].height = baseDim.height;
+            m_areas[2].width = baseDim.width;
+            m_initMean = 128;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else if (prob
+                < probType[0] + probType[1] + probType[2] + probType[3] + probType[4] + probType[5] + probType[6] + probType[7] + probType[8] + probType[9]
+                        + probType[10])
+        {
+            //check if feature is valid
+            sizeFactor.height = 1;
+            sizeFactor.width = 3;
+            if (position.y + baseDim.height * sizeFactor.height >= patchSize.height || position.x + baseDim.width * sizeFactor.width >= patchSize.width)
+                continue;
+            area = baseDim.height * sizeFactor.height * baseDim.width * sizeFactor.width;
+            if (area < minArea)
+                continue;
+
+            m_type = 11;
+            m_numAreas = 3;
+            m_weights.resize(m_numAreas);
+            m_weights[0] = 1;
+            m_weights[1] = -1;
+            m_weights[2] = 1;
+            m_areas.resize(m_numAreas);
+            m_areas[0].x = position.x;
+            m_areas[0].y = position.y;
+            m_areas[0].height = baseDim.height;
+            m_areas[0].width = baseDim.width;
+            m_areas[1].x = position.x + baseDim.width;
+            m_areas[1].y = position.y;
+            m_areas[1].height = baseDim.height;
+            m_areas[1].width = baseDim.width;
+            m_areas[2].y = position.y;
+            m_areas[2].x = position.x + 2 * baseDim.width;
+            m_areas[2].height = baseDim.height;
+            m_areas[2].width = baseDim.width;
+            m_initMean = 128;
+            m_initSigma = INITSIGMA(m_numAreas);
+            valid = true;
+        }
+        else
+            CV_Error(Error::StsAssert, "");
+    }
+
+    m_initSize = patchSize;
+    m_curSize = m_initSize;
+    m_scaleFactorWidth = m_scaleFactorHeight = 1.0f;
+    m_scaleAreas.resize(m_numAreas);
+    m_scaleWeights.resize(m_numAreas);
+    for (int curArea = 0; curArea < m_numAreas; curArea++)
+    {
+        m_scaleAreas[curArea] = m_areas[curArea];
+        m_scaleWeights[curArea] = (float)m_weights[curArea] / (float)(m_areas[curArea].width * m_areas[curArea].height);
+    }
+}
+
+bool CvHaarEvaluator::FeatureHaar::eval(const Mat& image, Rect /*ROI*/, float* result) const
+{
+
+    *result = 0.0f;
+
+    for (int curArea = 0; curArea < m_numAreas; curArea++)
+    {
+        *result += (float)getSum(image, Rect(m_areas[curArea].x, m_areas[curArea].y, m_areas[curArea].width, m_areas[curArea].height))
+                * m_scaleWeights[curArea];
+    }
+
+    /*
+   if( image->getUseVariance() )
+   {
+   float variance = (float) image->getVariance( ROI );
+   *result /= variance;
+   }
+   */
+
+    return true;
+}
+
+float CvHaarEvaluator::FeatureHaar::getSum(const Mat& image, Rect imageROI) const
+{
+    // left upper Origin
+    int OriginX = imageROI.x;
+    int OriginY = imageROI.y;
+
+    // Check and fix width and height
+    int Width = imageROI.width;
+    int Height = imageROI.height;
+
+    if (OriginX + Width >= image.cols - 1)
+        Width = (image.cols - 1) - OriginX;
+    if (OriginY + Height >= image.rows - 1)
+        Height = (image.rows - 1) - OriginY;
+
+    float value = 0;
+    int depth = image.depth();
+
+    if (depth == CV_8U || depth == CV_32S)
+        value = static_cast<float>(image.at<int>(OriginY + Height, OriginX + Width) + image.at<int>(OriginY, OriginX) - image.at<int>(OriginY, OriginX + Width)
+                - image.at<int>(OriginY + Height, OriginX));
+    else if (depth == CV_64F)
+        value = static_cast<float>(image.at<double>(OriginY + Height, OriginX + Width) + image.at<double>(OriginY, OriginX)
+                - image.at<double>(OriginY, OriginX + Width) - image.at<double>(OriginY + Height, OriginX));
+    else if (depth == CV_32F)
+        value = static_cast<float>(image.at<float>(OriginY + Height, OriginX + Width) + image.at<float>(OriginY, OriginX) - image.at<float>(OriginY, OriginX + Width)
+                - image.at<float>(OriginY + Height, OriginX));
+
+    return value;
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracking_online_mil.cpp b/modules/video/src/tracking/detail/tracking_online_mil.cpp
new file mode 100644
index 0000000000..c9472aa947
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracking_online_mil.cpp
@@ -0,0 +1,356 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../../precomp.hpp"
+#include "tracking_online_mil.hpp"
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+#define sign(s) ((s > 0) ? 1 : ((s < 0) ? -1 : 0))
+
+template <class T>
+class SortableElementRev
+{
+public:
+    T _val;
+    int _ind;
+    SortableElementRev()
+        : _val(), _ind(0)
+    {
+    }
+    SortableElementRev(T val, int ind)
+    {
+        _val = val;
+        _ind = ind;
+    }
+    bool operator<(SortableElementRev<T>& b)
+    {
+        return (_val < b._val);
+    };
+};
+
+static bool CompareSortableElementRev(const SortableElementRev<float>& i, const SortableElementRev<float>& j)
+{
+    return i._val < j._val;
+}
+
+template <class T>
+void sort_order_des(std::vector<T>& v, std::vector<int>& order)
+{
+    uint n = (uint)v.size();
+    std::vector<SortableElementRev<T>> v2;
+    v2.resize(n);
+    order.clear();
+    order.resize(n);
+    for (uint i = 0; i < n; i++)
+    {
+        v2[i]._ind = i;
+        v2[i]._val = v[i];
+    }
+    //std::sort( v2.begin(), v2.end() );
+    std::sort(v2.begin(), v2.end(), CompareSortableElementRev);
+    for (uint i = 0; i < n; i++)
+    {
+        order[i] = v2[i]._ind;
+        v[i] = v2[i]._val;
+    }
+};
+
+//implementations for strong classifier
+
+ClfMilBoost::Params::Params()
+{
+    _numSel = 50;
+    _numFeat = 250;
+    _lRate = 0.85f;
+}
+
+ClfMilBoost::ClfMilBoost()
+    : _numsamples(0)
+    , _counter(0)
+{
+    _myParams = ClfMilBoost::Params();
+    _numsamples = 0;
+}
+
+ClfMilBoost::~ClfMilBoost()
+{
+    _selectors.clear();
+    for (size_t i = 0; i < _weakclf.size(); i++)
+        delete _weakclf.at(i);
+}
+
+void ClfMilBoost::init(const ClfMilBoost::Params& parameters)
+{
+    _myParams = parameters;
+    _numsamples = 0;
+
+    //_ftrs = Ftr::generate( _myParams->_ftrParams, _myParams->_numFeat );
+    // if( params->_storeFtrHistory )
+    //  Ftr::toViz( _ftrs, "haarftrs" );
+    _weakclf.resize(_myParams._numFeat);
+    for (int k = 0; k < _myParams._numFeat; k++)
+    {
+        _weakclf[k] = new ClfOnlineStump(k);
+        _weakclf[k]->_lRate = _myParams._lRate;
+    }
+    _counter = 0;
+}
+
+void ClfMilBoost::update(const Mat& posx, const Mat& negx)
+{
+    int numneg = negx.rows;
+    int numpos = posx.rows;
+
+    // compute ftrs
+    //if( !posx.ftrsComputed() )
+    //  Ftr::compute( posx, _ftrs );
+    //if( !negx.ftrsComputed() )
+    //  Ftr::compute( negx, _ftrs );
+
+    // initialize H
+    static std::vector<float> Hpos, Hneg;
+    Hpos.clear();
+    Hneg.clear();
+    Hpos.resize(posx.rows, 0.0f), Hneg.resize(negx.rows, 0.0f);
+
+    _selectors.clear();
+    std::vector<float> posw(posx.rows), negw(negx.rows);
+    std::vector<std::vector<float>> pospred(_weakclf.size()), negpred(_weakclf.size());
+
+    // train all weak classifiers without weights
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+    for (int m = 0; m < _myParams._numFeat; m++)
+    {
+        _weakclf[m]->update(posx, negx);
+        pospred[m] = _weakclf[m]->classifySetF(posx);
+        negpred[m] = _weakclf[m]->classifySetF(negx);
+    }
+
+    // pick the best features
+    for (int s = 0; s < _myParams._numSel; s++)
+    {
+
+        // compute errors/likl for all weak clfs
+        std::vector<float> poslikl(_weakclf.size(), 1.0f), neglikl(_weakclf.size()), likl(_weakclf.size());
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+        for (int w = 0; w < (int)_weakclf.size(); w++)
+        {
+            float lll = 1.0f;
+            for (int j = 0; j < numpos; j++)
+                lll *= (1 - sigmoid(Hpos[j] + pospred[w][j]));
+            poslikl[w] = (float)-log(1 - lll + 1e-5);
+
+            lll = 0.0f;
+            for (int j = 0; j < numneg; j++)
+                lll += (float)-log(1e-5f + 1 - sigmoid(Hneg[j] + negpred[w][j]));
+            neglikl[w] = lll;
+
+            likl[w] = poslikl[w] / numpos + neglikl[w] / numneg;
+        }
+
+        // pick best weak clf
+        std::vector<int> order;
+        sort_order_des(likl, order);
+
+        // find best weakclf that isn't already included
+        for (uint k = 0; k < order.size(); k++)
+            if (std::count(_selectors.begin(), _selectors.end(), order[k]) == 0)
+            {
+                _selectors.push_back(order[k]);
+                break;
+            }
+
+            // update H = H + h_m
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+        for (int k = 0; k < posx.rows; k++)
+            Hpos[k] += pospred[_selectors[s]][k];
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+        for (int k = 0; k < negx.rows; k++)
+            Hneg[k] += negpred[_selectors[s]][k];
+    }
+
+    //if( _myParams->_storeFtrHistory )
+    //for ( uint j = 0; j < _selectors.size(); j++ )
+    // _ftrHist( _selectors[j], _counter ) = 1.0f / ( j + 1 );
+
+    _counter++;
+    /* */
+    return;
+}
+
+std::vector<float> ClfMilBoost::classify(const Mat& x, bool logR)
+{
+    int numsamples = x.rows;
+    std::vector<float> res(numsamples);
+    std::vector<float> tr;
+
+    for (uint w = 0; w < _selectors.size(); w++)
+    {
+        tr = _weakclf[_selectors[w]]->classifySetF(x);
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+        for (int j = 0; j < numsamples; j++)
+        {
+            res[j] += tr[j];
+        }
+    }
+
+    // return probabilities or log odds ratio
+    if (!logR)
+    {
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+        for (int j = 0; j < (int)res.size(); j++)
+        {
+            res[j] = sigmoid(res[j]);
+        }
+    }
+
+    return res;
+}
+
+//implementations for weak classifier
+
+ClfOnlineStump::ClfOnlineStump()
+    : _mu0(0), _mu1(0), _sig0(0), _sig1(0)
+    , _q(0)
+    , _s(0)
+    , _log_n1(0), _log_n0(0)
+    , _e1(0), _e0(0)
+    , _lRate(0)
+{
+    _trained = false;
+    _ind = -1;
+    init();
+}
+
+ClfOnlineStump::ClfOnlineStump(int ind)
+    : _mu0(0), _mu1(0), _sig0(0), _sig1(0)
+    , _q(0)
+    , _s(0)
+    , _log_n1(0), _log_n0(0)
+    , _e1(0), _e0(0)
+    , _lRate(0)
+{
+    _trained = false;
+    _ind = ind;
+    init();
+}
+void ClfOnlineStump::init()
+{
+    _mu0 = 0;
+    _mu1 = 0;
+    _sig0 = 1;
+    _sig1 = 1;
+    _lRate = 0.85f;
+    _trained = false;
+}
+
+void ClfOnlineStump::update(const Mat& posx, const Mat& negx, const Mat_<float>& /*posw*/, const Mat_<float>& /*negw*/)
+{
+    //std::cout << " ClfOnlineStump::update" << _ind << std::endl;
+    float posmu = 0.0, negmu = 0.0;
+    if (posx.cols > 0)
+        posmu = float(mean(posx.col(_ind))[0]);
+    if (negx.cols > 0)
+        negmu = float(mean(negx.col(_ind))[0]);
+
+    if (_trained)
+    {
+        if (posx.cols > 0)
+        {
+            _mu1 = (_lRate * _mu1 + (1 - _lRate) * posmu);
+            cv::Mat diff = posx.col(_ind) - _mu1;
+            _sig1 = _lRate * _sig1 + (1 - _lRate) * float(mean(diff.mul(diff))[0]);
+        }
+        if (negx.cols > 0)
+        {
+            _mu0 = (_lRate * _mu0 + (1 - _lRate) * negmu);
+            cv::Mat diff = negx.col(_ind) - _mu0;
+            _sig0 = _lRate * _sig0 + (1 - _lRate) * float(mean(diff.mul(diff))[0]);
+        }
+
+        _q = (_mu1 - _mu0) / 2;
+        _s = sign(_mu1 - _mu0);
+        _log_n0 = std::log(float(1.0f / pow(_sig0, 0.5f)));
+        _log_n1 = std::log(float(1.0f / pow(_sig1, 0.5f)));
+        //_e1 = -1.0f/(2.0f*_sig1+1e-99f);
+        //_e0 = -1.0f/(2.0f*_sig0+1e-99f);
+        _e1 = -1.0f / (2.0f * _sig1 + std::numeric_limits<float>::min());
+        _e0 = -1.0f / (2.0f * _sig0 + std::numeric_limits<float>::min());
+    }
+    else
+    {
+        _trained = true;
+        if (posx.cols > 0)
+        {
+            _mu1 = posmu;
+            cv::Scalar scal_mean, scal_std_dev;
+            cv::meanStdDev(posx.col(_ind), scal_mean, scal_std_dev);
+            _sig1 = float(scal_std_dev[0]) * float(scal_std_dev[0]) + 1e-9f;
+        }
+
+        if (negx.cols > 0)
+        {
+            _mu0 = negmu;
+            cv::Scalar scal_mean, scal_std_dev;
+            cv::meanStdDev(negx.col(_ind), scal_mean, scal_std_dev);
+            _sig0 = float(scal_std_dev[0]) * float(scal_std_dev[0]) + 1e-9f;
+        }
+
+        _q = (_mu1 - _mu0) / 2;
+        _s = sign(_mu1 - _mu0);
+        _log_n0 = std::log(float(1.0f / pow(_sig0, 0.5f)));
+        _log_n1 = std::log(float(1.0f / pow(_sig1, 0.5f)));
+        //_e1 = -1.0f/(2.0f*_sig1+1e-99f);
+        //_e0 = -1.0f/(2.0f*_sig0+1e-99f);
+        _e1 = -1.0f / (2.0f * _sig1 + std::numeric_limits<float>::min());
+        _e0 = -1.0f / (2.0f * _sig0 + std::numeric_limits<float>::min());
+    }
+}
+
+bool ClfOnlineStump::classify(const Mat& x, int i)
+{
+    float xx = x.at<float>(i, _ind);
+    double log_p0 = (xx - _mu0) * (xx - _mu0) * _e0 + _log_n0;
+    double log_p1 = (xx - _mu1) * (xx - _mu1) * _e1 + _log_n1;
+    return log_p1 > log_p0;
+}
+
+float ClfOnlineStump::classifyF(const Mat& x, int i)
+{
+    float xx = x.at<float>(i, _ind);
+    double log_p0 = (xx - _mu0) * (xx - _mu0) * _e0 + _log_n0;
+    double log_p1 = (xx - _mu1) * (xx - _mu1) * _e1 + _log_n1;
+    return float(log_p1 - log_p0);
+}
+
+inline std::vector<float> ClfOnlineStump::classifySetF(const Mat& x)
+{
+    std::vector<float> res(x.rows);
+
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+    for (int k = 0; k < (int)res.size(); k++)
+    {
+        res[k] = classifyF(x, k);
+    }
+    return res;
+}
+
+}}}  // namespace cv::detail::tracking
diff --git a/modules/video/src/tracking/detail/tracking_online_mil.hpp b/modules/video/src/tracking/detail/tracking_online_mil.hpp
new file mode 100644
index 0000000000..b08a628296
--- /dev/null
+++ b/modules/video/src/tracking/detail/tracking_online_mil.hpp
@@ -0,0 +1,79 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEO_DETAIL_TRACKING_ONLINE_MIL_HPP
+#define OPENCV_VIDEO_DETAIL_TRACKING_ONLINE_MIL_HPP
+
+#include <limits>
+
+namespace cv {
+namespace detail {
+inline namespace tracking {
+
+//! @addtogroup tracking_detail
+//! @{
+
+//TODO based on the original implementation
+//http://vision.ucsd.edu/~bbabenko/project_miltrack.shtml
+
+class ClfOnlineStump;
+
+class CV_EXPORTS ClfMilBoost
+{
+public:
+    struct CV_EXPORTS Params
+    {
+        Params();
+        int _numSel;
+        int _numFeat;
+        float _lRate;
+    };
+
+    ClfMilBoost();
+    ~ClfMilBoost();
+    void init(const ClfMilBoost::Params& parameters = ClfMilBoost::Params());
+    void update(const Mat& posx, const Mat& negx);
+    std::vector<float> classify(const Mat& x, bool logR = true);
+
+    inline float sigmoid(float x)
+    {
+        return 1.0f / (1.0f + exp(-x));
+    }
+
+private:
+    uint _numsamples;
+    ClfMilBoost::Params _myParams;
+    std::vector<int> _selectors;
+    std::vector<ClfOnlineStump*> _weakclf;
+    uint _counter;
+};
+
+class ClfOnlineStump
+{
+public:
+    float _mu0, _mu1, _sig0, _sig1;
+    float _q;
+    int _s;
+    float _log_n1, _log_n0;
+    float _e1, _e0;
+    float _lRate;
+
+    ClfOnlineStump();
+    ClfOnlineStump(int ind);
+    void init();
+    void update(const Mat& posx, const Mat& negx, const cv::Mat_<float>& posw = cv::Mat_<float>(), const cv::Mat_<float>& negw = cv::Mat_<float>());
+    bool classify(const Mat& x, int i);
+    float classifyF(const Mat& x, int i);
+    std::vector<float> classifySetF(const Mat& x);
+
+private:
+    bool _trained;
+    int _ind;
+};
+
+//! @}
+
+}}}  // namespace cv::detail::tracking
+
+#endif
diff --git a/modules/video/src/tracking/tracker.cpp b/modules/video/src/tracking/tracker.cpp
new file mode 100644
index 0000000000..ef2f416a4b
--- /dev/null
+++ b/modules/video/src/tracking/tracker.cpp
@@ -0,0 +1,19 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../precomp.hpp"
+
+namespace cv {
+
+Tracker::Tracker()
+{
+    // nothing
+}
+
+Tracker::~Tracker()
+{
+    // nothing
+}
+
+}  // namespace cv
diff --git a/modules/video/src/tracking/tracker_goturn.cpp b/modules/video/src/tracking/tracker_goturn.cpp
new file mode 100644
index 0000000000..a19f64994a
--- /dev/null
+++ b/modules/video/src/tracking/tracker_goturn.cpp
@@ -0,0 +1,140 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../precomp.hpp"
+
+#ifdef HAVE_OPENCV_DNN
+#include "opencv2/dnn.hpp"
+#endif
+
+namespace cv {
+
+TrackerGOTURN::TrackerGOTURN()
+{
+    // nothing
+}
+
+TrackerGOTURN::~TrackerGOTURN()
+{
+    // nothing
+}
+
+TrackerGOTURN::Params::Params()
+{
+    modelTxt = "goturn.prototxt";
+    modelBin = "goturn.caffemodel";
+}
+
+#ifdef HAVE_OPENCV_DNN
+
+class TrackerGOTURNImpl : public TrackerGOTURN
+{
+public:
+    TrackerGOTURNImpl(const TrackerGOTURN::Params& parameters)
+        : params(parameters)
+    {
+        // Load GOTURN architecture from *.prototxt and pretrained weights from *.caffemodel
+        net = dnn::readNetFromCaffe(params.modelTxt, params.modelBin);
+        CV_Assert(!net.empty());
+    }
+
+    void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
+    bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
+
+    void setBoudingBox(Rect boundingBox)
+    {
+        if (image_.empty())
+            CV_Error(Error::StsInternal, "Set image first");
+        boundingBox_ = boundingBox & Rect(Point(0, 0), image_.size());
+    }
+
+    TrackerGOTURN::Params params;
+
+    dnn::Net net;
+    Rect boundingBox_;
+    Mat image_;
+};
+
+void TrackerGOTURNImpl::init(InputArray image, const Rect& boundingBox)
+{
+    image_ = image.getMat().clone();
+    setBoudingBox(boundingBox);
+}
+
+bool TrackerGOTURNImpl::update(InputArray image, Rect& boundingBox)
+{
+    int INPUT_SIZE = 227;
+    //Using prevFrame & prevBB from model and curFrame GOTURN calculating curBB
+    InputArray curFrame = image;
+    Mat prevFrame = image_;
+    Rect2d prevBB = boundingBox_;
+    Rect curBB;
+
+    float padTargetPatch = 2.0;
+    Rect2f searchPatchRect, targetPatchRect;
+    Point2f currCenter, prevCenter;
+    Mat prevFramePadded, curFramePadded;
+    Mat searchPatch, targetPatch;
+
+    prevCenter.x = (float)(prevBB.x + prevBB.width / 2);
+    prevCenter.y = (float)(prevBB.y + prevBB.height / 2);
+
+    targetPatchRect.width = (float)(prevBB.width * padTargetPatch);
+    targetPatchRect.height = (float)(prevBB.height * padTargetPatch);
+    targetPatchRect.x = (float)(prevCenter.x - prevBB.width * padTargetPatch / 2.0 + targetPatchRect.width);
+    targetPatchRect.y = (float)(prevCenter.y - prevBB.height * padTargetPatch / 2.0 + targetPatchRect.height);
+
+    targetPatchRect.width = std::min(targetPatchRect.width, (float)prevFrame.cols);
+    targetPatchRect.height = std::min(targetPatchRect.height, (float)prevFrame.rows);
+    targetPatchRect.x = std::max(-prevFrame.cols * 0.5f, std::min(targetPatchRect.x, prevFrame.cols * 1.5f));
+    targetPatchRect.y = std::max(-prevFrame.rows * 0.5f, std::min(targetPatchRect.y, prevFrame.rows * 1.5f));
+
+    copyMakeBorder(prevFrame, prevFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
+    targetPatch = prevFramePadded(targetPatchRect).clone();
+
+    copyMakeBorder(curFrame, curFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
+    searchPatch = curFramePadded(targetPatchRect).clone();
+
+    // Preprocess
+    // Resize
+    resize(targetPatch, targetPatch, Size(INPUT_SIZE, INPUT_SIZE), 0, 0, INTER_LINEAR_EXACT);
+    resize(searchPatch, searchPatch, Size(INPUT_SIZE, INPUT_SIZE), 0, 0, INTER_LINEAR_EXACT);
+
+    // Convert to Float type and subtract mean
+    Mat targetBlob = dnn::blobFromImage(targetPatch, 1.0f, Size(), Scalar::all(128), false);
+    Mat searchBlob = dnn::blobFromImage(searchPatch, 1.0f, Size(), Scalar::all(128), false);
+
+    net.setInput(targetBlob, "data1");
+    net.setInput(searchBlob, "data2");
+
+    Mat resMat = net.forward("scale").reshape(1, 1);
+
+    curBB.x = cvRound(targetPatchRect.x + (resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - targetPatchRect.width);
+    curBB.y = cvRound(targetPatchRect.y + (resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - targetPatchRect.height);
+    curBB.width = cvRound((resMat.at<float>(2) - resMat.at<float>(0)) * targetPatchRect.width / INPUT_SIZE);
+    curBB.height = cvRound((resMat.at<float>(3) - resMat.at<float>(1)) * targetPatchRect.height / INPUT_SIZE);
+
+    // Predicted BB
+    boundingBox = curBB & Rect(Point(0, 0), image_.size());
+
+    // Set new model image and BB from current frame
+    image_ = image.getMat().clone();
+    setBoudingBox(curBB);
+    return true;
+}
+
+Ptr<TrackerGOTURN> TrackerGOTURN::create(const TrackerGOTURN::Params& parameters)
+{
+    return makePtr<TrackerGOTURNImpl>(parameters);
+}
+
+#else  // OPENCV_HAVE_DNN
+Ptr<TrackerGOTURN> TrackerGOTURN::create(const TrackerGOTURN::Params& parameters)
+{
+    (void)(parameters);
+    CV_Error(cv::Error::StsNotImplemented, "to use GOTURN, the tracking module needs to be built with opencv_dnn !");
+}
+#endif  // OPENCV_HAVE_DNN
+
+}  // namespace cv
diff --git a/modules/video/src/tracking/tracker_mil.cpp b/modules/video/src/tracking/tracker_mil.cpp
new file mode 100644
index 0000000000..ffe1be8483
--- /dev/null
+++ b/modules/video/src/tracking/tracker_mil.cpp
@@ -0,0 +1,227 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../precomp.hpp"
+#include "detail/tracker_mil_model.hpp"
+
+#include "detail/tracker_feature_haar.impl.hpp"
+
+namespace cv {
+inline namespace tracking {
+namespace impl {
+
+using cv::detail::tracking::internal::TrackerFeatureHAAR;
+
+
+class TrackerMILImpl CV_FINAL : public TrackerMIL
+{
+public:
+    TrackerMILImpl(const TrackerMIL::Params& parameters);
+
+    virtual void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
+    virtual bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
+
+    void compute_integral(const Mat& img, Mat& ii_img);
+
+    TrackerMIL::Params params;
+
+    Ptr<TrackerMILModel> model;
+    Ptr<TrackerSampler> sampler;
+    Ptr<TrackerFeatureSet> featureSet;
+};
+
+TrackerMILImpl::TrackerMILImpl(const TrackerMIL::Params& parameters)
+    : params(parameters)
+{
+    // nothing
+}
+
+void TrackerMILImpl::compute_integral(const Mat& img, Mat& ii_img)
+{
+    Mat ii;
+    std::vector<Mat> ii_imgs;
+    integral(img, ii, CV_32F);  // FIXIT split first
+    split(ii, ii_imgs);
+    ii_img = ii_imgs[0];
+}
+
+void TrackerMILImpl::init(InputArray image, const Rect& boundingBox)
+{
+    sampler = makePtr<TrackerSampler>();
+    featureSet = makePtr<TrackerFeatureSet>();
+
+    Mat intImage;
+    compute_integral(image.getMat(), intImage);
+    TrackerSamplerCSC::Params CSCparameters;
+    CSCparameters.initInRad = params.samplerInitInRadius;
+    CSCparameters.searchWinSize = params.samplerSearchWinSize;
+    CSCparameters.initMaxNegNum = params.samplerInitMaxNegNum;
+    CSCparameters.trackInPosRad = params.samplerTrackInRadius;
+    CSCparameters.trackMaxPosNum = params.samplerTrackMaxPosNum;
+    CSCparameters.trackMaxNegNum = params.samplerTrackMaxNegNum;
+
+    Ptr<TrackerSamplerAlgorithm> CSCSampler = makePtr<TrackerSamplerCSC>(CSCparameters);
+    CV_Assert(sampler->addTrackerSamplerAlgorithm(CSCSampler));
+
+    //or add CSC sampler with default parameters
+    //sampler->addTrackerSamplerAlgorithm( "CSC" );
+
+    //Positive sampling
+    CSCSampler.staticCast<TrackerSamplerCSC>()->setMode(TrackerSamplerCSC::MODE_INIT_POS);
+    sampler->sampling(intImage, boundingBox);
+    std::vector<Mat> posSamples = sampler->getSamples();
+
+    //Negative sampling
+    CSCSampler.staticCast<TrackerSamplerCSC>()->setMode(TrackerSamplerCSC::MODE_INIT_NEG);
+    sampler->sampling(intImage, boundingBox);
+    std::vector<Mat> negSamples = sampler->getSamples();
+
+    CV_Assert(!posSamples.empty());
+    CV_Assert(!negSamples.empty());
+
+    //compute HAAR features
+    TrackerFeatureHAAR::Params HAARparameters;
+    HAARparameters.numFeatures = params.featureSetNumFeatures;
+    HAARparameters.rectSize = Size((int)boundingBox.width, (int)boundingBox.height);
+    HAARparameters.isIntegral = true;
+    Ptr<TrackerFeature> trackerFeature = makePtr<TrackerFeatureHAAR>(HAARparameters);
+    featureSet->addTrackerFeature(trackerFeature);
+
+    featureSet->extraction(posSamples);
+    const std::vector<Mat> posResponse = featureSet->getResponses();
+
+    featureSet->extraction(negSamples);
+    const std::vector<Mat> negResponse = featureSet->getResponses();
+
+    model = makePtr<TrackerMILModel>(boundingBox);
+    Ptr<TrackerStateEstimatorMILBoosting> stateEstimator = makePtr<TrackerStateEstimatorMILBoosting>(params.featureSetNumFeatures);
+    model->setTrackerStateEstimator(stateEstimator);
+
+    //Run model estimation and update
+    model.staticCast<TrackerMILModel>()->setMode(TrackerMILModel::MODE_POSITIVE, posSamples);
+    model->modelEstimation(posResponse);
+    model.staticCast<TrackerMILModel>()->setMode(TrackerMILModel::MODE_NEGATIVE, negSamples);
+    model->modelEstimation(negResponse);
+    model->modelUpdate();
+}
+
+bool TrackerMILImpl::update(InputArray image, Rect& boundingBox)
+{
+    Mat intImage;
+    compute_integral(image.getMat(), intImage);
+
+    //get the last location [AAM] X(k-1)
+    Ptr<TrackerTargetState> lastLocation = model->getLastTargetState();
+    Rect lastBoundingBox((int)lastLocation->getTargetPosition().x, (int)lastLocation->getTargetPosition().y, lastLocation->getTargetWidth(),
+            lastLocation->getTargetHeight());
+
+    //sampling new frame based on last location
+    auto& samplers = sampler->getSamplers();
+    CV_Assert(!samplers.empty());
+    CV_Assert(samplers[0]);
+    samplers[0].staticCast<TrackerSamplerCSC>()->setMode(TrackerSamplerCSC::MODE_DETECT);
+    sampler->sampling(intImage, lastBoundingBox);
+    std::vector<Mat> detectSamples = sampler->getSamples();
+    if (detectSamples.empty())
+        return false;
+
+    /*//TODO debug samples
+   Mat f;
+   image.copyTo(f);
+
+   for( size_t i = 0; i < detectSamples.size(); i=i+10 )
+   {
+   Size sz;
+   Point off;
+   detectSamples.at(i).locateROI(sz, off);
+   rectangle(f, Rect(off.x,off.y,detectSamples.at(i).cols,detectSamples.at(i).rows), Scalar(255,0,0), 1);
+   }*/
+
+    //extract features from new samples
+    featureSet->extraction(detectSamples);
+    std::vector<Mat> response = featureSet->getResponses();
+
+    //predict new location
+    ConfidenceMap cmap;
+    model.staticCast<TrackerMILModel>()->setMode(TrackerMILModel::MODE_ESTIMATON, detectSamples);
+    model.staticCast<TrackerMILModel>()->responseToConfidenceMap(response, cmap);
+    model->getTrackerStateEstimator().staticCast<TrackerStateEstimatorMILBoosting>()->setCurrentConfidenceMap(cmap);
+
+    if (!model->runStateEstimator())
+    {
+        return false;
+    }
+
+    Ptr<TrackerTargetState> currentState = model->getLastTargetState();
+    boundingBox = Rect((int)currentState->getTargetPosition().x, (int)currentState->getTargetPosition().y, currentState->getTargetWidth(),
+            currentState->getTargetHeight());
+
+    /*//TODO debug
+   rectangle(f, lastBoundingBox, Scalar(0,255,0), 1);
+   rectangle(f, boundingBox, Scalar(0,0,255), 1);
+   imshow("f", f);
+   //waitKey( 0 );*/
+
+    //sampling new frame based on new location
+    //Positive sampling
+    samplers[0].staticCast<TrackerSamplerCSC>()->setMode(TrackerSamplerCSC::MODE_INIT_POS);
+    sampler->sampling(intImage, boundingBox);
+    std::vector<Mat> posSamples = sampler->getSamples();
+
+    //Negative sampling
+    samplers[0].staticCast<TrackerSamplerCSC>()->setMode(TrackerSamplerCSC::MODE_INIT_NEG);
+    sampler->sampling(intImage, boundingBox);
+    std::vector<Mat> negSamples = sampler->getSamples();
+
+    if (posSamples.empty() || negSamples.empty())
+        return false;
+
+    //extract features
+    featureSet->extraction(posSamples);
+    std::vector<Mat> posResponse = featureSet->getResponses();
+
+    featureSet->extraction(negSamples);
+    std::vector<Mat> negResponse = featureSet->getResponses();
+
+    //model estimate
+    model.staticCast<TrackerMILModel>()->setMode(TrackerMILModel::MODE_POSITIVE, posSamples);
+    model->modelEstimation(posResponse);
+    model.staticCast<TrackerMILModel>()->setMode(TrackerMILModel::MODE_NEGATIVE, negSamples);
+    model->modelEstimation(negResponse);
+
+    //model update
+    model->modelUpdate();
+
+    return true;
+}
+
+}}  // namespace tracking::impl
+
+TrackerMIL::Params::Params()
+{
+    samplerInitInRadius = 3;
+    samplerSearchWinSize = 25;
+    samplerInitMaxNegNum = 65;
+    samplerTrackInRadius = 4;
+    samplerTrackMaxPosNum = 100000;
+    samplerTrackMaxNegNum = 65;
+    featureSetNumFeatures = 250;
+}
+
+TrackerMIL::TrackerMIL()
+{
+    // nothing
+}
+
+TrackerMIL::~TrackerMIL()
+{
+    // nothing
+}
+
+Ptr<TrackerMIL> TrackerMIL::create(const TrackerMIL::Params& parameters)
+{
+    return makePtr<tracking::impl::TrackerMILImpl>(parameters);
+}
+
+}  // namespace cv
diff --git a/modules/video/test/test_main.cpp b/modules/video/test/test_main.cpp
index 93e4d2860e..9968380a17 100644
--- a/modules/video/test/test_main.cpp
+++ b/modules/video/test/test_main.cpp
@@ -7,4 +7,19 @@
     #include <hpx/hpx_main.hpp>
 #endif
 
-CV_TEST_MAIN("cv")
+static
+void initTests()
+{
+    const char* extraTestDataPath =
+#ifdef WINRT
+        NULL;
+#else
+        getenv("OPENCV_DNN_TEST_DATA_PATH");
+#endif
+    if (extraTestDataPath)
+        cvtest::addDataSearchPath(extraTestDataPath);
+
+    cvtest::addDataSearchSubDirectory("");  // override "cv" prefix below to access without "../dnn" hacks
+}
+
+CV_TEST_MAIN("cv", initTests())
diff --git a/modules/video/test/test_trackers.cpp b/modules/video/test/test_trackers.cpp
new file mode 100644
index 0000000000..7fd0470181
--- /dev/null
+++ b/modules/video/test/test_trackers.cpp
@@ -0,0 +1,97 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+
+//#define DEBUG_TEST
+#ifdef DEBUG_TEST
+#include <opencv2/highgui.hpp>
+#endif
+
+namespace opencv_test { namespace {
+//using namespace cv::tracking;
+
+#define TESTSET_NAMES testing::Values("david", "dudek", "faceocc2")
+
+const string TRACKING_DIR = "tracking";
+const string FOLDER_IMG = "data";
+const string FOLDER_OMIT_INIT = "initOmit";
+
+#include "test_trackers.impl.hpp"
+
+//[TESTDATA]
+PARAM_TEST_CASE(DistanceAndOverlap, string)
+{
+    string dataset;
+    virtual void SetUp()
+    {
+        dataset = GET_PARAM(0);
+    }
+};
+
+TEST_P(DistanceAndOverlap, MIL)
+{
+    TrackerTest<Tracker, Rect> test(TrackerMIL::create(), dataset, 30, .65f, NoTransform);
+    test.run();
+}
+
+TEST_P(DistanceAndOverlap, Shifted_Data_MIL)
+{
+    TrackerTest<Tracker, Rect> test(TrackerMIL::create(), dataset, 30, .6f, CenterShiftLeft);
+    test.run();
+}
+
+/***************************************************************************************/
+//Tests with scaled initial window
+
+TEST_P(DistanceAndOverlap, Scaled_Data_MIL)
+{
+    TrackerTest<Tracker, Rect> test(TrackerMIL::create(), dataset, 30, .7f, Scale_1_1);
+    test.run();
+}
+
+TEST_P(DistanceAndOverlap, GOTURN)
+{
+    std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
+    std::string weights = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.caffemodel", false);
+    cv::TrackerGOTURN::Params params;
+    params.modelTxt = model;
+    params.modelBin = weights;
+    TrackerTest<Tracker, Rect> test(TrackerGOTURN::create(params), dataset, 35, .35f, NoTransform);
+    test.run();
+}
+
+INSTANTIATE_TEST_CASE_P(Tracking, DistanceAndOverlap, TESTSET_NAMES);
+
+TEST(GOTURN, memory_usage)
+{
+    cv::Rect roi(145, 70, 85, 85);
+
+    std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
+    std::string weights = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.caffemodel", false);
+    cv::TrackerGOTURN::Params params;
+    params.modelTxt = model;
+    params.modelBin = weights;
+    cv::Ptr<Tracker> tracker = TrackerGOTURN::create(params);
+
+    string inputVideo = cvtest::findDataFile("tracking/david/data/david.webm");
+    cv::VideoCapture video(inputVideo);
+    ASSERT_TRUE(video.isOpened()) << inputVideo;
+
+    cv::Mat frame;
+    video >> frame;
+    ASSERT_FALSE(frame.empty()) << inputVideo;
+    tracker->init(frame, roi);
+    string ground_truth_bb;
+    for (int nframes = 0; nframes < 15; ++nframes)
+    {
+        std::cout << "Frame: " << nframes << std::endl;
+        video >> frame;
+        bool res = tracker->update(frame, roi);
+        ASSERT_TRUE(res);
+        std::cout << "Predicted ROI: " << roi << std::endl;
+    }
+}
+
+}}  // namespace opencv_test::
diff --git a/modules/video/test/test_trackers.impl.hpp b/modules/video/test/test_trackers.impl.hpp
new file mode 100644
index 0000000000..7fce94e748
--- /dev/null
+++ b/modules/video/test/test_trackers.impl.hpp
@@ -0,0 +1,368 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+/*
+ * The Evaluation Methodologies are partially based on:
+ * ====================================================================================================================
+ *  [OTB] Y. Wu, J. Lim, and M.-H. Yang, "Online object tracking: A benchmark," in Computer Vision and Pattern Recognition (CVPR), 2013
+ *
+ */
+
+enum BBTransformations
+{
+    NoTransform = 0,
+    CenterShiftLeft = 1,
+    CenterShiftRight = 2,
+    CenterShiftUp = 3,
+    CenterShiftDown = 4,
+    CornerShiftTopLeft = 5,
+    CornerShiftTopRight = 6,
+    CornerShiftBottomLeft = 7,
+    CornerShiftBottomRight = 8,
+    Scale_0_8 = 9,
+    Scale_0_9 = 10,
+    Scale_1_1 = 11,
+    Scale_1_2 = 12
+};
+
+namespace {
+
+std::vector<std::string> splitString(const std::string& s_, const std::string& delimiter)
+{
+    std::string s = s_;
+    std::vector<string> token;
+    size_t pos = 0;
+    while ((pos = s.find(delimiter)) != std::string::npos)
+    {
+        token.push_back(s.substr(0, pos));
+        s.erase(0, pos + delimiter.length());
+    }
+    token.push_back(s);
+    return token;
+}
+
+float calcDistance(const Rect& a, const Rect& b)
+{
+    Point2f p_a((float)(a.x + a.width / 2), (float)(a.y + a.height / 2));
+    Point2f p_b((float)(b.x + b.width / 2), (float)(b.y + b.height / 2));
+    return sqrt(pow(p_a.x - p_b.x, 2) + pow(p_a.y - p_b.y, 2));
+}
+
+float calcOverlap(const Rect& a, const Rect& b)
+{
+    float rectIntersectionArea = (float)(a & b).area();
+    return rectIntersectionArea / (a.area() + b.area() - rectIntersectionArea);
+}
+
+}  // namespace
+
+template <typename Tracker, typename ROI_t = Rect2d>
+class TrackerTest
+{
+public:
+    TrackerTest(const Ptr<Tracker>& tracker, const string& video, float distanceThreshold,
+            float overlapThreshold, int shift = NoTransform, int segmentIdx = 1, int numSegments = 10);
+    ~TrackerTest() {}
+    void run();
+
+protected:
+    void checkDataTest();
+
+    void distanceAndOverlapTest();
+
+    Ptr<Tracker> tracker;
+    string video;
+    std::vector<Rect> bbs;
+    int startFrame;
+    string suffix;
+    string prefix;
+    float overlapThreshold;
+    float distanceThreshold;
+    int segmentIdx;
+    int shift;
+    int numSegments;
+
+    int gtStartFrame;
+    int endFrame;
+    vector<int> validSequence;
+
+private:
+    Rect applyShift(const Rect& bb);
+};
+
+template <typename Tracker, typename ROI_t>
+TrackerTest<Tracker, ROI_t>::TrackerTest(const Ptr<Tracker>& _tracker, const string& _video, float _distanceThreshold,
+        float _overlapThreshold, int _shift, int _segmentIdx, int _numSegments)
+    : tracker(_tracker)
+    , video(_video)
+    , overlapThreshold(_overlapThreshold)
+    , distanceThreshold(_distanceThreshold)
+    , segmentIdx(_segmentIdx)
+    , shift(_shift)
+    , numSegments(_numSegments)
+{
+    // nothing
+}
+
+template <typename Tracker, typename ROI_t>
+Rect TrackerTest<Tracker, ROI_t>::applyShift(const Rect& bb_)
+{
+    Rect bb = bb_;
+    Point center(bb.x + (bb.width / 2), bb.y + (bb.height / 2));
+
+    int xLimit = bb.x + bb.width - 1;
+    int yLimit = bb.y + bb.height - 1;
+
+    int h = 0;
+    int w = 0;
+    float ratio = 1.0;
+
+    switch (shift)
+    {
+    case CenterShiftLeft:
+        bb.x = bb.x - (int)ceil(0.1 * bb.width);
+        break;
+    case CenterShiftRight:
+        bb.x = bb.x + (int)ceil(0.1 * bb.width);
+        break;
+    case CenterShiftUp:
+        bb.y = bb.y - (int)ceil(0.1 * bb.height);
+        break;
+    case CenterShiftDown:
+        bb.y = bb.y + (int)ceil(0.1 * bb.height);
+        break;
+    case CornerShiftTopLeft:
+        bb.x = (int)cvRound(bb.x - 0.1 * bb.width);
+        bb.y = (int)cvRound(bb.y - 0.1 * bb.height);
+
+        bb.width = xLimit - bb.x + 1;
+        bb.height = yLimit - bb.y + 1;
+        break;
+    case CornerShiftTopRight:
+        xLimit = (int)cvRound(xLimit + 0.1 * bb.width);
+
+        bb.y = (int)cvRound(bb.y - 0.1 * bb.height);
+        bb.width = xLimit - bb.x + 1;
+        bb.height = yLimit - bb.y + 1;
+        break;
+    case CornerShiftBottomLeft:
+        bb.x = (int)cvRound(bb.x - 0.1 * bb.width);
+        yLimit = (int)cvRound(yLimit + 0.1 * bb.height);
+
+        bb.width = xLimit - bb.x + 1;
+        bb.height = yLimit - bb.y + 1;
+        break;
+    case CornerShiftBottomRight:
+        xLimit = (int)cvRound(xLimit + 0.1 * bb.width);
+        yLimit = (int)cvRound(yLimit + 0.1 * bb.height);
+
+        bb.width = xLimit - bb.x + 1;
+        bb.height = yLimit - bb.y + 1;
+        break;
+    case Scale_0_8:
+        ratio = 0.8f;
+        w = (int)(ratio * bb.width);
+        h = (int)(ratio * bb.height);
+
+        bb = Rect(center.x - (w / 2), center.y - (h / 2), w, h);
+        break;
+    case Scale_0_9:
+        ratio = 0.9f;
+        w = (int)(ratio * bb.width);
+        h = (int)(ratio * bb.height);
+
+        bb = Rect(center.x - (w / 2), center.y - (h / 2), w, h);
+        break;
+    case 11:
+        //scale 1.1
+        ratio = 1.1f;
+        w = (int)(ratio * bb.width);
+        h = (int)(ratio * bb.height);
+
+        bb = Rect(center.x - (w / 2), center.y - (h / 2), w, h);
+        break;
+    case 12:
+        //scale 1.2
+        ratio = 1.2f;
+        w = (int)(ratio * bb.width);
+        h = (int)(ratio * bb.height);
+
+        bb = Rect(center.x - (w / 2), center.y - (h / 2), w, h);
+        break;
+    default:
+        break;
+    }
+
+    return bb;
+}
+
+template <typename Tracker, typename ROI_t>
+void TrackerTest<Tracker, ROI_t>::distanceAndOverlapTest()
+{
+    bool initialized = false;
+
+    int fc = (startFrame - gtStartFrame);
+
+    bbs.at(fc) = applyShift(bbs.at(fc));
+    Rect currentBBi = bbs.at(fc);
+    ROI_t currentBB(currentBBi);
+    float sumDistance = 0;
+    float sumOverlap = 0;
+
+    string folder = cvtest::TS::ptr()->get_data_path() + "/" + TRACKING_DIR + "/" + video + "/" + FOLDER_IMG;
+    string videoPath = folder + "/" + video + ".webm";
+
+    VideoCapture c;
+    c.open(videoPath);
+    if (!c.isOpened())
+        throw SkipTestException("Can't open video file");
+#if 0
+    c.set(CAP_PROP_POS_FRAMES, startFrame);
+#else
+    if (startFrame)
+        std::cout << "startFrame = " << startFrame << std::endl;
+    for (int i = 0; i < startFrame; i++)
+    {
+        Mat dummy_frame;
+        c >> dummy_frame;
+        ASSERT_FALSE(dummy_frame.empty()) << i << ": " << videoPath;
+    }
+#endif
+
+    for (int frameCounter = startFrame; frameCounter < endFrame; frameCounter++)
+    {
+        Mat frame;
+        c >> frame;
+
+        ASSERT_FALSE(frame.empty()) << "frameCounter=" << frameCounter << " video=" << videoPath;
+        if (!initialized)
+        {
+            tracker->init(frame, currentBB);
+            std::cout << "frame size = " << frame.size() << std::endl;
+            initialized = true;
+        }
+        else if (initialized)
+        {
+            if (frameCounter >= (int)bbs.size())
+                break;
+            tracker->update(frame, currentBB);
+        }
+        float curDistance = calcDistance(currentBB, bbs.at(fc));
+        float curOverlap = calcOverlap(currentBB, bbs.at(fc));
+
+#ifdef DEBUG_TEST
+        Mat result;
+        repeat(frame, 1, 2, result);
+        rectangle(result, currentBB, Scalar(0, 255, 0), 1);
+        Rect roi2(frame.cols, 0, frame.cols, frame.rows);
+        rectangle(result(roi2), bbs.at(fc), Scalar(0, 0, 255), 1);
+        imshow("result", result);
+        waitKey(1);
+#endif
+
+        sumDistance += curDistance;
+        sumOverlap += curOverlap;
+        fc++;
+    }
+
+    float meanDistance = sumDistance / (endFrame - startFrame);
+    float meanOverlap = sumOverlap / (endFrame - startFrame);
+
+    EXPECT_LE(meanDistance, distanceThreshold);
+    EXPECT_GE(meanOverlap, overlapThreshold);
+}
+
+template <typename Tracker, typename ROI_t>
+void TrackerTest<Tracker, ROI_t>::checkDataTest()
+{
+
+    FileStorage fs;
+    fs.open(cvtest::TS::ptr()->get_data_path() + TRACKING_DIR + "/" + video + "/" + video + ".yml", FileStorage::READ);
+    fs["start"] >> startFrame;
+    fs["prefix"] >> prefix;
+    fs["suffix"] >> suffix;
+    fs.release();
+
+    string gtFile = cvtest::TS::ptr()->get_data_path() + TRACKING_DIR + "/" + video + "/gt.txt";
+    std::ifstream gt;
+    //open the ground truth
+    gt.open(gtFile.c_str());
+    ASSERT_TRUE(gt.is_open()) << gtFile;
+    string line;
+    int bbCounter = 0;
+    while (getline(gt, line))
+    {
+        bbCounter++;
+    }
+    gt.close();
+
+    int seqLength = bbCounter;
+    for (int i = startFrame; i < seqLength; i++)
+    {
+        validSequence.push_back(i);
+    }
+
+    //exclude from the images sequence, the frames where the target is occluded or out of view
+    string omitFile = cvtest::TS::ptr()->get_data_path() + TRACKING_DIR + "/" + video + "/" + FOLDER_OMIT_INIT + "/" + video + ".txt";
+    std::ifstream omit;
+    omit.open(omitFile.c_str());
+    if (omit.is_open())
+    {
+        string omitLine;
+        while (getline(omit, omitLine))
+        {
+            vector<string> tokens = splitString(omitLine, " ");
+            int s_start = atoi(tokens.at(0).c_str());
+            int s_end = atoi(tokens.at(1).c_str());
+            for (int k = s_start; k <= s_end; k++)
+            {
+                std::vector<int>::iterator position = std::find(validSequence.begin(), validSequence.end(), k);
+                if (position != validSequence.end())
+                    validSequence.erase(position);
+            }
+        }
+    }
+    omit.close();
+    gtStartFrame = startFrame;
+    //compute the start and the and for each segment
+    int numFrame = (int)(validSequence.size() / numSegments);
+    startFrame += (segmentIdx - 1) * numFrame;
+    endFrame = startFrame + numFrame;
+
+    std::ifstream gt2;
+    //open the ground truth
+    gt2.open(gtFile.c_str());
+    ASSERT_TRUE(gt2.is_open()) << gtFile;
+    string line2;
+    int bbCounter2 = 0;
+    while (getline(gt2, line2))
+    {
+        vector<string> tokens = splitString(line2, ",");
+        Rect bb(atoi(tokens.at(0).c_str()), atoi(tokens.at(1).c_str()), atoi(tokens.at(2).c_str()), atoi(tokens.at(3).c_str()));
+        ASSERT_EQ((size_t)4, tokens.size()) << "Incorrect ground truth file " << gtFile;
+
+        bbs.push_back(bb);
+        bbCounter2++;
+    }
+    gt2.close();
+
+    if (segmentIdx == numSegments)
+        endFrame = (int)bbs.size();
+}
+
+template <typename Tracker, typename ROI_t>
+void TrackerTest<Tracker, ROI_t>::run()
+{
+    srand(1);  // FIXIT remove that, ensure that there is no "rand()" in implementation
+
+    ASSERT_TRUE(tracker);
+
+    checkDataTest();
+
+    //check for failure
+    if (::testing::Test::HasFatalFailure())
+        return;
+
+    distanceAndOverlapTest();
+}
diff --git a/modules/videoio/CMakeLists.txt b/modules/videoio/CMakeLists.txt
index 12ff992294..a31d969ab8 100644
--- a/modules/videoio/CMakeLists.txt
+++ b/modules/videoio/CMakeLists.txt
@@ -138,6 +138,15 @@ if(TARGET ocv.3rdparty.ximea)
   list(APPEND tgts ocv.3rdparty.ximea)
 endif()
 
+if(TARGET ocv.3rdparty.ueye)
+  if("ueye" IN_LIST VIDEOIO_PLUGIN_LIST OR VIDEOIO_PLUGIN_LIST STREQUAL "all")
+    ocv_create_builtin_videoio_plugin("opencv_videoio_ueye" ocv.3rdparty.ueye "cap_ueye.cpp")
+  else()
+    list(APPEND videoio_srcs ${CMAKE_CURRENT_LIST_DIR}/src/cap_ueye.cpp)
+    list(APPEND tgts ocv.3rdparty.ueye)
+  endif()
+endif()
+
 if(TARGET ocv.3rdparty.ffmpeg)
   if(HAVE_FFMPEG_WRAPPER)
     list(APPEND tgts ocv.3rdparty.ffmpeg)
diff --git a/modules/videoio/cmake/detect_ueye.cmake b/modules/videoio/cmake/detect_ueye.cmake
new file mode 100644
index 0000000000..495e9c2450
--- /dev/null
+++ b/modules/videoio/cmake/detect_ueye.cmake
@@ -0,0 +1,25 @@
+if(NOT HAVE_UEYE)
+  if(WIN32)
+    if(X86_64)
+      set(_WIN_LIB_SUFFIX "_64")
+    endif()
+  endif()
+  find_path(UEYE_INCLUDE "ueye.h"
+    PATHS "${UEYE_ROOT}" ENV UEYE_ROOT "/usr" "C:/Program Files/IDS/uEye/Develop"
+    HINTS "${regpath}"
+    PATH_SUFFIXES "include")
+  find_library(UEYE_LIBRARY ueye_api${_WIN_LIB_SUFFIX}
+    PATHS "${UEYE_ROOT}" ENV UEYE_ROOT "/usr" "C:/Program Files/IDS/uEye/Develop"
+    HINTS "${regpath}"
+    PATH_SUFFIXES "lib")
+  if(UEYE_INCLUDE AND UEYE_LIBRARY)
+    set(HAVE_UEYE TRUE)
+  endif()
+endif()
+unset(_WIN_LIB_SUFFIX)
+
+if(HAVE_UEYE)
+  ocv_add_external_target(ueye "${UEYE_INCLUDE}" "${UEYE_LIBRARY}" "HAVE_UEYE")
+endif()
+
+set(HAVE_UEYE ${HAVE_UEYE} PARENT_SCOPE)
diff --git a/modules/videoio/cmake/init.cmake b/modules/videoio/cmake/init.cmake
index 1efef12c5e..500b9386ff 100644
--- a/modules/videoio/cmake/init.cmake
+++ b/modules/videoio/cmake/init.cmake
@@ -12,8 +12,16 @@ function(ocv_add_external_target name inc link def)
   set_target_properties(ocv.3rdparty.${name} PROPERTIES
     INTERFACE_INCLUDE_DIRECTORIES "${inc}"
     INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${inc}"
-    INTERFACE_LINK_LIBRARIES "${link}"
     INTERFACE_COMPILE_DEFINITIONS "${def}")
+  # When cmake version is greater than or equal to 3.11, INTERFACE_LINK_LIBRARIES no longer applies to interface library
+  # See https://github.com/opencv/opencv/pull/18658
+  if (CMAKE_VERSION VERSION_LESS 3.11)
+    set_target_properties(ocv.3rdparty.${name} PROPERTIES
+      INTERFACE_LINK_LIBRARIES "${link}")
+  else()
+    target_link_libraries(ocv.3rdparty.${name} INTERFACE ${link})
+  endif()
+  #
   if(NOT BUILD_SHARED_LIBS)
     install(TARGETS ocv.3rdparty.${name} EXPORT OpenCVModules)
   endif()
@@ -30,6 +38,7 @@ add_backend("msdk" WITH_MFX)
 add_backend("openni2" WITH_OPENNI2)
 add_backend("pvapi" WITH_PVAPI)
 add_backend("realsense" WITH_LIBREALSENSE)
+add_backend("ueye" WITH_UEYE)
 add_backend("ximea" WITH_XIMEA)
 add_backend("xine" WITH_XINE)
 
diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp
index eb5645ab77..a75c2cf0d8 100644
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@@ -112,6 +112,7 @@ enum VideoCaptureAPIs {
        CAP_REALSENSE    = 1500,         //!< Synonym for CAP_INTELPERC
        CAP_OPENNI2      = 1600,         //!< OpenNI2 (for Kinect)
        CAP_OPENNI2_ASUS = 1610,         //!< OpenNI2 (for Asus Xtion and Occipital Structure sensors)
+       CAP_OPENNI2_ASTRA= 1620,         //!< OpenNI2 (for Orbbec Astra)
        CAP_GPHOTO2      = 1700,         //!< gPhoto2 connection
        CAP_GSTREAMER    = 1800,         //!< GStreamer
        CAP_FFMPEG       = 1900,         //!< Open and record video file or stream using the FFMPEG library
@@ -120,6 +121,7 @@ enum VideoCaptureAPIs {
        CAP_OPENCV_MJPEG = 2200,         //!< Built-in OpenCV MotionJPEG codec
        CAP_INTEL_MFX    = 2300,         //!< Intel MediaSDK
        CAP_XINE         = 2400,         //!< XINE engine (Linux)
+       CAP_UEYE         = 2500,         //!< uEye Camera API
      };
 
 /** @brief %VideoCapture generic properties identifier.
@@ -825,7 +827,7 @@ public:
     @throws Exception %Exception on stream errors (check .isOpened() to filter out malformed streams) or VideoCapture type is not supported
 
     The primary use of the function is in multi-camera environments.
-    The method fills the ready state vector, grabbs video frame, if camera is ready.
+    The method fills the ready state vector, grabs video frame, if camera is ready.
 
     After this call use VideoCapture::retrieve() to decode and fetch frame data.
     */
diff --git a/modules/videoio/src/cap_avfoundation.mm b/modules/videoio/src/cap_avfoundation.mm
index 8ac8d85d8d..19f54be8c3 100644
--- a/modules/videoio/src/cap_avfoundation.mm
+++ b/modules/videoio/src/cap_avfoundation.mm
@@ -36,6 +36,7 @@
 #include "opencv2/imgproc.hpp"
 #include "cap_interface.hpp"
 #include <iostream>
+#include <Availability.h>
 #import <AVFoundation/AVFoundation.h>
 #import <Foundation/NSException.h>
 
@@ -1255,16 +1256,25 @@ CvVideoWriter_AVFoundation::CvVideoWriter_AVFoundation(const char* filename, int
         //exception;
     }
 
-    // Two codec supported AVVideoCodecH264 AVVideoCodecJPEG
+    // Three codec supported AVVideoCodecH264 AVVideoCodecJPEG AVVideoCodecTypeHEVC
     // On iPhone 3G H264 is not supported.
     if (fourcc == CV_FOURCC('J','P','E','G') || fourcc == CV_FOURCC('j','p','e','g') ||
-            fourcc == CV_FOURCC('M','J','P','G') || fourcc == CV_FOURCC('m','j','p','g') ){
+            fourcc == CV_FOURCC('M','J','P','G') || fourcc == CV_FOURCC('m','j','p','g')){
         codec = [AVVideoCodecJPEG copy]; // Use JPEG codec if specified, otherwise H264
     }else if(fourcc == CV_FOURCC('H','2','6','4') || fourcc == CV_FOURCC('a','v','c','1')){
             codec = [AVVideoCodecH264 copy];
+// Available since iOS 11
+#if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000
+    }else if(fourcc == CV_FOURCC('H','2','6','5') || fourcc == CV_FOURCC('h','v','c','1') ||
+            fourcc == CV_FOURCC('H','E','V','C') || fourcc == CV_FOURCC('h','e','v','c')){
+        if (@available(iOS 11, *)) {
+            codec = [AVVideoCodecTypeHEVC copy];
+        } else {
+            codec = [AVVideoCodecH264 copy];
+        }
+#endif
     }else{
         codec = [AVVideoCodecH264 copy]; // default canonical H264.
-
     }
 
     //NSLog(@"Path: %@", path);
diff --git a/modules/videoio/src/cap_avfoundation_mac.mm b/modules/videoio/src/cap_avfoundation_mac.mm
index 011bc08466..ed966ceffa 100644
--- a/modules/videoio/src/cap_avfoundation_mac.mm
+++ b/modules/videoio/src/cap_avfoundation_mac.mm
@@ -1199,13 +1199,23 @@ CvVideoWriter_AVFoundation::CvVideoWriter_AVFoundation(const std::string &filena
         is_good = false;
     }
 
-    // Two codec supported AVVideoCodecH264 AVVideoCodecJPEG
+    // Three codec supported AVVideoCodecH264 AVVideoCodecJPEG AVVideoCodecTypeHEVC
     // On iPhone 3G H264 is not supported.
     if (fourcc == CV_FOURCC('J','P','E','G') || fourcc == CV_FOURCC('j','p','e','g') ||
-            fourcc == CV_FOURCC('M','J','P','G') || fourcc == CV_FOURCC('m','j','p','g') ){
+            fourcc == CV_FOURCC('M','J','P','G') || fourcc == CV_FOURCC('m','j','p','g')){
         codec = [AVVideoCodecJPEG copy]; // Use JPEG codec if specified, otherwise H264
     }else if(fourcc == CV_FOURCC('H','2','6','4') || fourcc == CV_FOURCC('a','v','c','1')){
             codec = [AVVideoCodecH264 copy];
+    // Available since macOS 10.13
+#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101300
+    }else if(fourcc == CV_FOURCC('H','2','6','5') || fourcc == CV_FOURCC('h','v','c','1') ||
+            fourcc == CV_FOURCC('H','E','V','C') || fourcc == CV_FOURCC('h','e','v','c')){
+        if (@available(macOS 10.13, *)) {
+            codec = [AVVideoCodecTypeHEVC copy];
+        } else {
+            is_good = false;
+        }
+#endif
     }else{
         is_good = false;
     }
diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp
index 4d9330daf8..67c119cd36 100644
--- a/modules/videoio/src/cap_gstreamer.cpp
+++ b/modules/videoio/src/cap_gstreamer.cpp
@@ -54,6 +54,7 @@
 
 #include <iostream>
 #include <string.h>
+#include <thread>
 
 #include <gst/gst.h>
 #include <gst/gstbuffer.h>
@@ -107,6 +108,7 @@ template<> inline void GSafePtr_release<GstBuffer>(GstBuffer** pPtr) { if (pPtr)
 template<> inline void GSafePtr_release<GstSample>(GstSample** pPtr) { if (pPtr) { gst_sample_unref(*pPtr); *pPtr = NULL; } }
 template<> inline void GSafePtr_release<GstBus>(GstBus** pPtr) { if (pPtr) { gst_object_unref(G_OBJECT(*pPtr)); *pPtr = NULL; } }
 template<> inline void GSafePtr_release<GstMessage>(GstMessage** pPtr) { if (pPtr) { gst_message_unref(*pPtr); *pPtr = NULL; } }
+template<> inline void GSafePtr_release<GMainLoop>(GMainLoop** pPtr) { if (pPtr) { g_main_loop_unref(*pPtr); *pPtr = NULL; } }
 
 template<> inline void GSafePtr_release<GstEncodingVideoProfile>(GstEncodingVideoProfile** pPtr) { if (pPtr) { gst_encoding_profile_unref(*pPtr); *pPtr = NULL; } }
 template<> inline void GSafePtr_release<GstEncodingContainerProfile>(GstEncodingContainerProfile** pPtr) { if (pPtr) { gst_object_unref(G_OBJECT(*pPtr)); *pPtr = NULL; } }
@@ -194,10 +196,15 @@ public:
 private:
     bool isFailed;
     bool call_deinit;
+    bool start_loop;
+    GSafePtr<GMainLoop> loop;
+    std::thread thread;
+
     gst_initializer() :
         isFailed(false)
     {
         call_deinit = utils::getConfigurationParameterBool("OPENCV_VIDEOIO_GSTREAMER_CALL_DEINIT", false);
+        start_loop = utils::getConfigurationParameterBool("OPENCV_VIDEOIO_GSTREAMER_START_MAINLOOP", false);
 
         GSafePtr<GError> err;
         gst_init_check(NULL, NULL, err.getRef());
@@ -215,6 +222,14 @@ private:
             isFailed = true;
             return;
         }
+
+        if (start_loop)
+        {
+            loop.attach(g_main_loop_new (NULL, FALSE));
+            thread = std::thread([this](){
+                g_main_loop_run (loop);
+            });
+        }
     }
     ~gst_initializer()
     {
@@ -223,6 +238,12 @@ private:
             // Debug leaks: GST_LEAKS_TRACER_STACK_TRACE=1 GST_DEBUG="GST_TRACER:7" GST_TRACERS="leaks"
             gst_deinit();
         }
+
+        if (start_loop)
+        {
+            g_main_loop_quit(loop);
+            thread.join();
+        }
     }
 };
 
diff --git a/modules/videoio/src/cap_interface.hpp b/modules/videoio/src/cap_interface.hpp
index 34bef9b9e1..5112fffe6f 100644
--- a/modules/videoio/src/cap_interface.hpp
+++ b/modules/videoio/src/cap_interface.hpp
@@ -301,6 +301,8 @@ Ptr<IVideoCapture> create_PvAPI_capture( int index );
 Ptr<IVideoCapture> create_XIMEA_capture_cam( int index );
 Ptr<IVideoCapture> create_XIMEA_capture_file( const std::string &serialNumber );
 
+Ptr<IVideoCapture> create_ueye_camera(int camera);
+
 Ptr<IVideoCapture> create_Aravis_capture( int index );
 
 Ptr<IVideoCapture> createMotionJpegCapture(const std::string& filename);
diff --git a/modules/videoio/src/cap_mfx_reader.cpp b/modules/videoio/src/cap_mfx_reader.cpp
index f178909913..330f9bd340 100644
--- a/modules/videoio/src/cap_mfx_reader.cpp
+++ b/modules/videoio/src/cap_mfx_reader.cpp
@@ -112,6 +112,7 @@ VideoCapture_IntelMFX::VideoCapture_IntelMFX(const cv::String &filename)
         return;
     }
 
+    frameSize = Size(params.mfx.FrameInfo.CropW, params.mfx.FrameInfo.CropH);
     good = true;
 }
 
@@ -127,10 +128,23 @@ VideoCapture_IntelMFX::~VideoCapture_IntelMFX()
     cleanup(deviceHandler);
 }
 
-double VideoCapture_IntelMFX::getProperty(int) const
+double VideoCapture_IntelMFX::getProperty(int prop) const
 {
-    MSG(cerr << "MFX: getProperty() is not implemented" << endl);
-    return 0;
+    if (!good)
+    {
+        MSG(cerr << "MFX: can not call getProperty(), backend has not been initialized" << endl);
+        return 0;
+    }
+    switch (prop)
+    {
+        case CAP_PROP_FRAME_WIDTH:
+            return frameSize.width;
+        case CAP_PROP_FRAME_HEIGHT:
+            return frameSize.height;
+        default:
+            MSG(cerr << "MFX: unsupported property" << endl);
+            return 0;
+    }
 }
 
 bool VideoCapture_IntelMFX::setProperty(int, double)
diff --git a/modules/videoio/src/cap_mfx_reader.hpp b/modules/videoio/src/cap_mfx_reader.hpp
index 26e8f77e3b..122bc50763 100644
--- a/modules/videoio/src/cap_mfx_reader.hpp
+++ b/modules/videoio/src/cap_mfx_reader.hpp
@@ -34,6 +34,7 @@ private:
     MFXVideoDECODE *decoder;
     SurfacePool *pool;
     void *outSurface;
+    cv::Size frameSize;
     bool good;
 };
 
diff --git a/modules/videoio/src/cap_openni2.cpp b/modules/videoio/src/cap_openni2.cpp
index adec7359fb..bc17b01a48 100644
--- a/modules/videoio/src/cap_openni2.cpp
+++ b/modules/videoio/src/cap_openni2.cpp
@@ -103,7 +103,7 @@ private:
 class CvCapture_OpenNI2 : public CvCapture
 {
 public:
-    enum { DEVICE_DEFAULT=0, DEVICE_MS_KINECT=0, DEVICE_ASUS_XTION=1, DEVICE_MAX=1 };
+    enum { DEVICE_DEFAULT=0, DEVICE_MS_KINECT=0, DEVICE_ASUS_XTION=1, DEVICE_ORBBEC_ASTRA=2, DEVICE_MAX=2 };
 
     static const int INVALID_PIXEL_VAL = 0;
     static const int INVALID_COORDINATE_VAL = 0;
@@ -120,6 +120,7 @@ public:
     virtual bool setProperty(int probIdx, double propVal) CV_OVERRIDE;
     virtual bool grabFrame() CV_OVERRIDE;
     virtual IplImage* retrieveFrame(int outputType) CV_OVERRIDE;
+    virtual int getCaptureDomain() CV_OVERRIDE { return cv::CAP_OPENNI2; }
 
     bool isOpened() const;
 
@@ -261,7 +262,8 @@ CvCapture_OpenNI2::CvCapture_OpenNI2(int index, const char * filename) :
             index %= 10;
         }
         // Asus XTION and Occipital Structure Sensor do not have an image generator
-        needColor = (deviceType != DEVICE_ASUS_XTION);
+        // Orbbec Astra cameras don't provide OpenNI interface for color stream reading
+        needColor = (deviceType != DEVICE_ASUS_XTION) && (deviceType != DEVICE_ORBBEC_ASTRA);
 
         // find appropriate device URI
         openni::Array<openni::DeviceInfo> ldevs;
@@ -300,6 +302,11 @@ CvCapture_OpenNI2::CvCapture_OpenNI2(int index, const char * filename) :
     setProperty(CV_CAP_PROP_OPENNI2_MIRROR, 0.0);
 
     isContextOpened = true;
+
+    CV_LOG_INFO(NULL, cv::format("Opened OpenNI camera: %s %s (%04x:%04x)",
+                      device.getDeviceInfo().getVendor(), device.getDeviceInfo().getName(),
+                      device.getDeviceInfo().getUsbVendorId(), device.getDeviceInfo().getUsbProductId())
+    );
 }
 
 CvCapture_OpenNI2::~CvCapture_OpenNI2()
diff --git a/modules/videoio/src/cap_ueye.cpp b/modules/videoio/src/cap_ueye.cpp
new file mode 100644
index 0000000000..3912da52bc
--- /dev/null
+++ b/modules/videoio/src/cap_ueye.cpp
@@ -0,0 +1,499 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+/*
+This file adds support for uEye cameras in OpenCV.
+
+Cameras can be opened by ID. If 0 is passed as ID the first available camera
+will be used. For any other number, the camera associated with that ID will be
+opened (c.f. IDS documentation for is_InitCamera).
+
+Images are double buffered in a ring buffer of size 2 (called 'image memory
+sequence' in the uEye SDK c.f. is_AddToSequence). The memory is locked on a
+'grab' call and copied and unlocked during 'retrieve'. The image queue provided
+in the uEye SDK is not used since it automatically locks the buffers when a new
+image arrives, which means the buffer can fill up when frames are retrieved too
+slowly.
+*/
+
+#include "precomp.hpp"
+
+#include <ueye.h>
+
+#include <array>
+#include <chrono>
+#include <cstdlib>
+#include <memory>
+#include <thread>
+
+namespace cv
+{
+namespace
+{
+struct image_buffer
+{
+    char* data;
+    INT id;
+};
+}
+#define ASSERT_UEYE(expr) { UINT expr_result = expr; if(IS_SUCCESS != expr_result) CV_Error_(Error::StsAssert, ("%s %s %d: failed with code %u", #expr, __FILE__, __LINE__, expr_result)); }
+#define PRINT_ON_UEYE_ERROR( expr ) { UINT expr_result = expr; if(IS_SUCCESS != expr_result) CV_LOG_ERROR(NULL, "VIDEOIO(UEYE:" << cam_id << "): " << #expr << " " << __FILE__ << " " << __LINE__ << ": failed with code " << expr_result); }
+
+struct VideoCapture_uEye CV_FINAL: public IVideoCapture
+{
+    int getCaptureDomain() CV_OVERRIDE
+    {
+        return cv::CAP_UEYE;
+    }
+
+    VideoCapture_uEye(int camera);
+
+    bool isOpened() const CV_OVERRIDE
+    {
+        return 255 != cam_id;
+    }
+
+    ~VideoCapture_uEye() CV_OVERRIDE
+    {
+        close();
+    }
+
+    double getProperty(int property_id) const CV_OVERRIDE;
+    bool setProperty(int property_id, double value) CV_OVERRIDE;
+    bool grabFrame() CV_OVERRIDE;
+    bool retrieveFrame(int outputType, OutputArray frame) CV_OVERRIDE;
+
+    void close();
+    void start_camera();
+    void stop_camera();
+
+    void unlock_image_buffer();
+
+    HIDS cam_id = 255;
+    SENSORINFO sensor_info;
+    double fps;
+    int width;
+    int height;
+    int pitch;
+    std::array<image_buffer, 2> ring_buffer = {{{nullptr, 0}, {nullptr, 0}}};
+    char* locked_image = nullptr;
+};
+
+Ptr<IVideoCapture> create_ueye_camera(int camera)
+{
+    return cv::makePtr<VideoCapture_uEye>(camera);
+}
+
+namespace
+{
+std::vector<IMAGE_FORMAT_INFO> get_freerun_formats(HIDS cam_id)
+{
+    UINT count;
+    ASSERT_UEYE(is_ImageFormat(cam_id, IMGFRMT_CMD_GET_NUM_ENTRIES, &count, sizeof(count)));
+    UINT sizeof_list = sizeof(IMAGE_FORMAT_LIST) + (count - 1) * sizeof(IMAGE_FORMAT_INFO);
+    std::unique_ptr<IMAGE_FORMAT_LIST> list(new (std::malloc(sizeof_list)) IMAGE_FORMAT_LIST);
+
+    list->nSizeOfListEntry = sizeof(IMAGE_FORMAT_INFO);
+    list->nNumListElements = count;
+    ASSERT_UEYE(is_ImageFormat(cam_id, IMGFRMT_CMD_GET_LIST, list.get(), sizeof_list));
+
+    // copy to vector and filter out non-live modes
+    std::vector<IMAGE_FORMAT_INFO> formats;
+    formats.reserve(count + 1);
+    std::copy_if(list->FormatInfo, list->FormatInfo+count, std::back_inserter(formats), [](const IMAGE_FORMAT_INFO& format)
+    {
+        return (format.nSupportedCaptureModes & CAPTMODE_FREERUN);
+    });
+
+    return formats;
+}
+
+void set_matching_format(HIDS cam_id, const SENSORINFO& sensor_info, int width, int height)
+{
+    // uEye camera formats sometimes do not include the native resolution (without binning, subsampling or AOI)
+    if(width == int(sensor_info.nMaxWidth) && height == int(sensor_info.nMaxHeight))
+    {
+        ASSERT_UEYE(is_SetBinning(cam_id, IS_BINNING_DISABLE));
+        ASSERT_UEYE(is_SetSubSampling(cam_id, IS_SUBSAMPLING_DISABLE));
+        IS_RECT rectAOI = {0, 0, width, height};
+        ASSERT_UEYE(is_AOI(cam_id, IS_AOI_IMAGE_SET_AOI, &rectAOI, sizeof(rectAOI)));
+        return;
+    }
+    auto formats = get_freerun_formats(cam_id);
+    CV_Assert(formats.size() > 0);
+    auto calc_err = [=](const IMAGE_FORMAT_INFO& format)
+    {
+        return format.nWidth - width + format.nHeight - height + (sensor_info.nMaxWidth - width)/2 - format.nX0 + (sensor_info.nMaxHeight - height)/2 - format.nY0;
+    };
+
+    std::sort(formats.begin(), formats.end(), [=](const IMAGE_FORMAT_INFO& f0, const IMAGE_FORMAT_INFO& f1)
+    {
+        return calc_err(f0) < calc_err(f1);
+    });
+
+    ASSERT_UEYE(is_ImageFormat(cam_id, IMGFRMT_CMD_SET_FORMAT, &formats.front().nFormatID, sizeof(UINT)));
+}
+}
+
+
+VideoCapture_uEye::VideoCapture_uEye(int camera)
+{
+    CV_Assert(camera >= 0);
+    CV_Assert(camera < 255); // max camera id is 254
+    cam_id = static_cast<HIDS>(camera);
+    CV_LOG_DEBUG(NULL, "VIDEOIO(UEYE:" << cam_id << "): opening...");
+    ASSERT_UEYE(is_InitCamera(&cam_id, nullptr));
+
+    IS_INIT_EVENT init_event = {IS_SET_EVENT_FRAME, FALSE, FALSE};
+    ASSERT_UEYE(is_Event(cam_id, IS_EVENT_CMD_INIT, &init_event, sizeof(init_event)));
+    UINT frame_event = IS_SET_EVENT_FRAME;
+    ASSERT_UEYE(is_Event(cam_id, IS_EVENT_CMD_ENABLE, &frame_event, sizeof(frame_event)));
+
+    ASSERT_UEYE(is_ResetToDefault(cam_id));
+
+    ASSERT_UEYE(is_SetFrameRate(cam_id, IS_GET_FRAMERATE, &fps));
+
+    start_camera();
+}
+
+double VideoCapture_uEye::getProperty(int property_id) const
+{
+    auto value = 0.;
+    switch (property_id)
+    {
+    case CAP_PROP_FRAME_WIDTH:
+        value = width;
+        break;
+    case CAP_PROP_FRAME_HEIGHT:
+        value = height;
+        break;
+    case CAP_PROP_FPS:
+        value = fps;
+        break;
+    }
+    return value;
+}
+
+bool VideoCapture_uEye::setProperty(int property_id, double value)
+{
+    if(!isOpened())
+        return false;
+    try
+    {
+        bool set_format = false;
+        switch (property_id)
+        {
+        case CAP_PROP_FRAME_WIDTH:
+            if(width == value)
+                break;
+            width = static_cast<int>(value);
+            set_format = true;
+            break;
+        case CAP_PROP_FRAME_HEIGHT:
+            if(height == value)
+                break;
+            height = static_cast<int>(value);
+            set_format = true;
+            break;
+        case CAP_PROP_FPS:
+            if(fps == value)
+                break;
+            ASSERT_UEYE(is_SetFrameRate(cam_id, value, &fps));
+            break;
+        }
+        if(set_format)
+        {
+            set_matching_format(cam_id, sensor_info, width, height);
+            start_camera();
+        }
+    }
+    catch(const cv::Exception& e)
+    {
+        CV_LOG_ERROR(NULL, "VIDEOIO(UEYE:" << cam_id << "): " <<  e.what());
+        return false;
+    }
+
+    return true;
+}
+
+bool VideoCapture_uEye::grabFrame()
+{
+    if (!isOpened())
+        return false;
+
+    try
+    {
+        IS_WAIT_EVENT wait_event{IS_SET_EVENT_FRAME, static_cast<UINT>(3*1000/fps), 0, 0}; // wait for the time it should take to get 3 frames
+        ASSERT_UEYE(is_Event(cam_id, IS_EVENT_CMD_WAIT, &wait_event, sizeof(wait_event)));
+        INT current_buffer_id;
+        char* current_buffer;
+        char* last;
+        ASSERT_UEYE(is_GetActSeqBuf(cam_id, &current_buffer_id, &current_buffer, &last));
+
+        const int lock_tries = 4;
+        std::chrono::milliseconds lock_time_out(static_cast<int>(1000/(fps*4))); // wait for a quarter of a frame if not lockable, should not occur in event mode
+        UINT ret;
+        for(int i = 0; i < lock_tries; i++) // try locking the buffer
+        {
+            ret = is_LockSeqBuf(cam_id, IS_IGNORE_PARAMETER, last);
+            if(IS_SEQ_BUFFER_IS_LOCKED == ret)
+                std::this_thread::sleep_for(lock_time_out);
+            else
+                break;
+        }
+        ASSERT_UEYE(ret);
+        locked_image = last;
+    }
+    catch(const cv::Exception& e)
+    {
+        CV_LOG_ERROR(NULL, "VIDEOIO(UEYE:" << cam_id << "): " <<  e.what());
+        close();
+        return false;
+    }
+    return true;
+}
+
+bool VideoCapture_uEye::retrieveFrame(int /*outputType*/, OutputArray frame)
+{
+    if(!locked_image)
+        return false;
+    Mat(height, width, CV_8UC3, locked_image, pitch).copyTo(frame);
+    try
+    {
+        unlock_image_buffer();
+    }
+    catch(const cv::Exception& e)
+    {
+        CV_LOG_ERROR(NULL, "VIDEOIO(UEYE:" << cam_id << "): " <<  e.what());
+        return false;
+    }
+
+    return true;
+}
+
+void VideoCapture_uEye::start_camera()
+{
+    stop_camera();
+
+    IS_RECT aoi;
+    ASSERT_UEYE(is_AOI(cam_id, IS_AOI_IMAGE_GET_AOI, &aoi, sizeof(aoi)));
+
+    UINT x_is_abs_pos;
+    UINT y_is_abs_pos;
+
+    ASSERT_UEYE(is_AOI(cam_id, IS_AOI_IMAGE_GET_POS_X_ABS, &x_is_abs_pos , sizeof(x_is_abs_pos)));
+    ASSERT_UEYE(is_AOI(cam_id, IS_AOI_IMAGE_GET_POS_Y_ABS, &y_is_abs_pos , sizeof(y_is_abs_pos)));
+
+    ASSERT_UEYE(is_GetSensorInfo(cam_id, &sensor_info));
+    width  = x_is_abs_pos? sensor_info.nMaxWidth: aoi.s32Width;
+    height = y_is_abs_pos? sensor_info.nMaxHeight: aoi.s32Height;
+
+    // allocate ring_buffer
+    int bpp = 24;
+    for(auto& image_memory: ring_buffer)
+    {
+        ASSERT_UEYE(is_AllocImageMem(cam_id, width, height, bpp, &image_memory.data, &image_memory.id));
+        ASSERT_UEYE(is_AddToSequence(cam_id, image_memory.data, image_memory.id));
+    }
+
+    // TODO: this could be set according to sensor_info.nColorMode and CAP_PROP_FOURCC
+    ASSERT_UEYE(is_SetColorMode(cam_id, IS_CM_BGR8_PACKED));
+    ASSERT_UEYE(is_GetImageMemPitch (cam_id, &pitch));
+
+    ASSERT_UEYE(is_CaptureVideo(cam_id, IS_DONT_WAIT));
+}
+
+void VideoCapture_uEye::stop_camera()
+{
+    if(is_CaptureVideo(cam_id, IS_GET_LIVE))
+        ASSERT_UEYE(is_StopLiveVideo(cam_id, IS_FORCE_VIDEO_STOP));
+
+    if(locked_image)
+        unlock_image_buffer();
+    ASSERT_UEYE(is_ClearSequence(cam_id));
+    for(auto buffer: ring_buffer)
+    {
+        if(buffer.data)
+        {
+            ASSERT_UEYE(is_FreeImageMem(cam_id, buffer.data, buffer.id));
+            buffer.data = nullptr;
+        }
+    }
+}
+
+void VideoCapture_uEye::close()
+{
+    if(!isOpened())
+        return;
+    CV_LOG_DEBUG(NULL, "VIDEOIO(UEYE:" << cam_id << "): closing...");
+    // During closing we do not care about correct error handling as much.
+    // Either something has gone wrong already or it has been called from the
+    // destructor. Just make sure that all calls are done.
+    try
+    {
+        stop_camera();
+    }
+    catch(const cv::Exception& e)
+    {
+        CV_LOG_ERROR(NULL, "VIDEOIO(UEYE:" << cam_id << "): " <<  e.what());
+    }
+    UINT frame_event = IS_SET_EVENT_FRAME;
+    PRINT_ON_UEYE_ERROR(is_Event(cam_id, IS_EVENT_CMD_DISABLE, &frame_event, sizeof(frame_event)));
+    PRINT_ON_UEYE_ERROR(is_Event(cam_id, IS_EVENT_CMD_EXIT, &frame_event, sizeof(frame_event)));
+    PRINT_ON_UEYE_ERROR(is_ExitCamera(cam_id));
+    cam_id = 255;
+}
+
+void VideoCapture_uEye::unlock_image_buffer()
+{
+    char* tmp_buffer = nullptr;
+    std::swap(locked_image, tmp_buffer);
+    ASSERT_UEYE(is_UnlockSeqBuf(cam_id, IS_IGNORE_PARAMETER, tmp_buffer));
+}
+} // namespace cv
+
+// plugin glue
+#if defined(BUILD_PLUGIN)
+
+#include "plugin_api.hpp"
+
+namespace cv
+{
+
+namespace
+{
+#define CV_PLUGIN_NULL_FAIL(ptr) if(!ptr) return CV_ERROR_FAIL;
+#define CV_PLUGIN_CALL_BEGIN CV_PLUGIN_NULL_FAIL(handle) try {
+#define CV_PLUGIN_CALL_END } catch (...) { return CV_ERROR_FAIL; }
+
+CvResult CV_API_CALL cv_capture_open(const char*, int cam_id, CV_OUT CvPluginCapture* handle)
+{
+    CV_PLUGIN_CALL_BEGIN
+
+    *handle = NULL;
+    std::unique_ptr<VideoCapture_uEye> cap(new VideoCapture_uEye(cam_id));
+    if (cap->isOpened())
+    {
+        *handle = (CvPluginCapture)cap.release();
+        return CV_ERROR_OK;
+    }
+    return CV_ERROR_FAIL;
+
+    CV_PLUGIN_CALL_END
+}
+
+CvResult CV_API_CALL cv_capture_release(CvPluginCapture handle)
+{
+    CV_PLUGIN_NULL_FAIL(handle)
+
+    VideoCapture_uEye* instance = (VideoCapture_uEye*)handle;
+    delete instance;
+    return CV_ERROR_OK;
+}
+
+
+CvResult CV_API_CALL cv_capture_get_prop(CvPluginCapture handle, int prop, CV_OUT double* val)
+{
+    CV_PLUGIN_NULL_FAIL(val)
+    CV_PLUGIN_CALL_BEGIN
+
+    VideoCapture_uEye* instance = (VideoCapture_uEye*)handle;
+    *val = instance->getProperty(prop);
+    return CV_ERROR_OK;
+
+    CV_PLUGIN_CALL_END
+}
+
+CvResult CV_API_CALL cv_capture_set_prop(CvPluginCapture handle, int prop, double val)
+{
+    CV_PLUGIN_CALL_BEGIN
+
+    VideoCapture_uEye* instance = (VideoCapture_uEye*)handle;
+    return instance->setProperty(prop, val) ? CV_ERROR_OK : CV_ERROR_FAIL;
+
+    CV_PLUGIN_CALL_END
+}
+
+CvResult CV_API_CALL cv_capture_grab(CvPluginCapture handle)
+{
+    CV_PLUGIN_CALL_BEGIN
+
+    VideoCapture_uEye* instance = (VideoCapture_uEye*)handle;
+    return instance->grabFrame() ? CV_ERROR_OK : CV_ERROR_FAIL;
+
+    CV_PLUGIN_CALL_END
+}
+
+CvResult CV_API_CALL cv_capture_retrieve(CvPluginCapture handle, int stream_idx, cv_videoio_retrieve_cb_t callback, void* userdata)
+{
+    CV_PLUGIN_CALL_BEGIN
+
+    VideoCapture_uEye* instance = (VideoCapture_uEye*)handle;
+    Mat img;
+    if (instance->retrieveFrame(stream_idx, img))
+        return callback(stream_idx, img.data, (int)img.step, img.cols, img.rows, img.channels(), userdata);
+    return CV_ERROR_FAIL;
+
+    CV_PLUGIN_CALL_END
+}
+
+CvResult CV_API_CALL cv_writer_open(const char* /*filename*/, int /*fourcc*/, double /*fps*/, int /*width*/, int /*height*/, int /*isColor*/,
+                                    CV_OUT CvPluginWriter* /*handle*/)
+{
+    return CV_ERROR_FAIL;
+}
+
+CvResult CV_API_CALL cv_writer_release(CvPluginWriter /*handle*/)
+{
+    return CV_ERROR_FAIL;
+}
+
+CvResult CV_API_CALL cv_writer_get_prop(CvPluginWriter /*handle*/, int /*prop*/, CV_OUT double* /*val*/)
+{
+    return CV_ERROR_FAIL;
+}
+
+CvResult CV_API_CALL cv_writer_set_prop(CvPluginWriter /*handle*/, int /*prop*/, double /*val*/)
+{
+    return CV_ERROR_FAIL;
+}
+
+CvResult CV_API_CALL cv_writer_write(CvPluginWriter /*handle*/, const unsigned char* /*data*/, int /*step*/, int /*width*/, int /*height*/, int /*cn*/)
+{
+    return CV_ERROR_FAIL;
+}
+
+const OpenCV_VideoIO_Plugin_API_preview plugin_api_v0 =
+{
+    {
+        sizeof(OpenCV_VideoIO_Plugin_API_preview), ABI_VERSION, API_VERSION,
+        CV_VERSION_MAJOR, CV_VERSION_MINOR, CV_VERSION_REVISION, CV_VERSION_STATUS,
+        "uEye OpenCV Video I/O plugin"
+    },
+    /*  1*/CAP_UEYE,
+    /*  2*/cv_capture_open,
+    /*  3*/cv_capture_release,
+    /*  4*/cv_capture_get_prop,
+    /*  5*/cv_capture_set_prop,
+    /*  6*/cv_capture_grab,
+    /*  7*/cv_capture_retrieve,
+    /*  8*/cv_writer_open,
+    /*  9*/cv_writer_release,
+    /* 10*/cv_writer_get_prop,
+    /* 11*/cv_writer_set_prop,
+    /* 12*/cv_writer_write
+};
+} // namespace
+} // namespace cv
+
+const OpenCV_VideoIO_Plugin_API_preview* opencv_videoio_plugin_init_v0(int requested_abi_version, int requested_api_version, void* /*reserved=NULL*/) CV_NOEXCEPT
+{
+    if (requested_abi_version != 0)
+        return NULL;
+    if (requested_api_version != 0)
+        return NULL;
+    return &cv::plugin_api_v0;
+}
+
+#endif // BUILD_PLUGIN
diff --git a/modules/videoio/src/videoio_registry.cpp b/modules/videoio/src/videoio_registry.cpp
index b5798db80e..3ee1bab822 100644
--- a/modules/videoio/src/videoio_registry.cpp
+++ b/modules/videoio/src/videoio_registry.cpp
@@ -51,7 +51,7 @@ namespace {
 - platform specific universal SDK: WINRT, AVFOUNDATION, MSMF/DSHOW, V4L/V4L2
 - RGB-D: OpenNI/OpenNI2, REALSENSE
 - special OpenCV (file-based): "images", "mjpeg"
-- special camera SDKs, including stereo: other special SDKs: FIREWIRE/1394, XIMEA/ARAVIS/GIGANETIX/PVAPI(GigE)
+- special camera SDKs, including stereo: other special SDKs: FIREWIRE/1394, XIMEA/ARAVIS/GIGANETIX/PVAPI(GigE)/uEye
 - other: XINE, gphoto2, etc
 */
 static const struct VideoBackendInfo builtin_backends[] =
@@ -130,6 +130,12 @@ static const struct VideoBackendInfo builtin_backends[] =
     DECLARE_STATIC_BACKEND(CAP_ARAVIS, "ARAVIS", MODE_CAPTURE_BY_INDEX, 0, create_Aravis_capture, 0),
 #endif
 
+#ifdef HAVE_UEYE // uEye
+    DECLARE_STATIC_BACKEND(CAP_UEYE, "UEYE", MODE_CAPTURE_BY_INDEX, 0, create_ueye_camera, 0),
+#elif defined(ENABLE_PLUGINS)
+    DECLARE_DYNAMIC_BACKEND(CAP_UEYE, "UEYE", MODE_CAPTURE_BY_INDEX),
+#endif
+
 #ifdef HAVE_GPHOTO2
     DECLARE_STATIC_BACKEND(CAP_GPHOTO2, "GPHOTO2", MODE_CAPTURE_ALL, createGPhoto2Capture, createGPhoto2Capture, 0),
 #endif
diff --git a/modules/videoio/test/test_mfx.cpp b/modules/videoio/test/test_mfx.cpp
index 363345635d..2048fe5af9 100644
--- a/modules/videoio/test/test_mfx.cpp
+++ b/modules/videoio/test/test_mfx.cpp
@@ -118,6 +118,8 @@ TEST_P(videoio_mfx, read_write_raw)
     VideoCapture cap;
     cap.open(filename, CAP_INTEL_MFX);
     ASSERT_TRUE(cap.isOpened());
+    EXPECT_EQ(FRAME_SIZE.width, cap.get(CAP_PROP_FRAME_WIDTH));
+    EXPECT_EQ(FRAME_SIZE.height, cap.get(CAP_PROP_FRAME_HEIGHT));
     for (int i = 0; i < FRAME_COUNT; ++i)
     {
         ASSERT_TRUE(cap.read(frame));
diff --git a/platforms/apple/__init__.py b/platforms/apple/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/platforms/apple/build_xcframework.py b/platforms/apple/build_xcframework.py
new file mode 100755
index 0000000000..669d798ae4
--- /dev/null
+++ b/platforms/apple/build_xcframework.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+This script builds OpenCV into an xcframework compatible with the platforms
+of your choice. Just run it and grab a snack; you'll be waiting a while.
+"""
+
+import sys, os, argparse, pathlib, traceback
+from cv_build_utils import execute, print_error, print_header, get_xcode_version, get_cmake_version
+
+if __name__ == "__main__":
+
+    # Check for dependencies
+    assert sys.version_info >= (3, 6), f"Python 3.6 or later is required! Current version is {sys.version_info}"
+    # Need CMake 3.18.5/3.19 or later for a Silicon-related fix to building for the iOS Simulator.
+    # See https://gitlab.kitware.com/cmake/cmake/-/issues/21425 for context.
+    assert get_cmake_version() >= (3, 18, 5), f"CMake 3.18.5 or later is required. Current version is {get_cmake_version()}"
+    # Need Xcode 12.2 for Apple Silicon support
+    assert get_xcode_version() >= (12, 2), f"Xcode 12.2 command line tools or later are required! Current version is {get_xcode_version()}. \
+    Run xcode-select to switch if you have multiple Xcode installs."
+
+    # Parse arguments
+    description = """
+        This script builds OpenCV into an xcframework supporting the Apple platforms of your choice.
+        """
+    epilog = """
+        Any arguments that are not recognized by this script are passed through to the ios/osx build_framework.py scripts.
+        """
+    parser = argparse.ArgumentParser(description=description, epilog=epilog)
+    parser.add_argument('out', metavar='OUTDIR', help='The directory where the xcframework will be created')
+    parser.add_argument('--framework_name', default='opencv2', help='Name of OpenCV xcframework (default: opencv2, will change to OpenCV in future version)')
+    parser.add_argument('--iphoneos_archs', default=None, help='select iPhoneOS target ARCHS. Default is "armv7,arm64"')
+    parser.add_argument('--iphonesimulator_archs', default=None, help='select iPhoneSimulator target ARCHS. Default is "x86_64,arm64"')
+    parser.add_argument('--macos_archs', default=None, help='Select MacOS ARCHS. Default is "x86_64,arm64"')
+    parser.add_argument('--catalyst_archs', default=None, help='Select Catalyst ARCHS. Default is "x86_64,arm64"')
+    parser.add_argument('--build_only_specified_archs', default=False, action='store_true', help='if enabled, only directly specified archs are built and defaults are ignored')
+
+    args, unknown_args = parser.parse_known_args()
+    if unknown_args:
+        print(f"The following args are not recognized by this script and will be passed through to the ios/osx build_framework.py scripts: {unknown_args}")
+
+    # Parse architectures from args
+    iphoneos_archs = args.iphoneos_archs
+    if not iphoneos_archs and not args.build_only_specified_archs:
+        # Supply defaults
+        iphoneos_archs = "armv7,arm64"
+    print(f'Using iPhoneOS ARCHS={iphoneos_archs}')
+
+    iphonesimulator_archs = args.iphonesimulator_archs
+    if not iphonesimulator_archs and not args.build_only_specified_archs:
+        # Supply defaults
+        iphonesimulator_archs = "x86_64,arm64"
+    print(f'Using iPhoneSimulator ARCHS={iphonesimulator_archs}')
+
+    macos_archs = args.macos_archs
+    if not macos_archs and not args.build_only_specified_archs:
+        # Supply defaults
+        macos_archs = "x86_64,arm64"
+    print(f'Using MacOS ARCHS={macos_archs}')
+
+    catalyst_archs = args.macos_archs
+    if not catalyst_archs and not args.build_only_specified_archs:
+        # Supply defaults
+        catalyst_archs = "x86_64,arm64"
+    print(f'Using Catalyst ARCHS={catalyst_archs}')
+
+    # Build phase
+
+    try:
+        # Build .frameworks for each platform
+        osx_script_path = os.path.abspath(os.path.abspath(os.path.dirname(__file__))+'/../osx/build_framework.py')
+        ios_script_path = os.path.abspath(os.path.abspath(os.path.dirname(__file__))+'/../ios/build_framework.py')
+
+        build_folders = []
+
+        def get_or_create_build_folder(base_dir, platform):
+            build_folder = f"./{base_dir}/{platform}".replace(" ", "\\ ")  # Escape spaces in output path
+            pathlib.Path(build_folder).mkdir(parents=True, exist_ok=True)
+            return build_folder
+
+        if iphoneos_archs:
+            build_folder = get_or_create_build_folder(args.out, "iphoneos")
+            build_folders.append(build_folder)
+            command = ["python3", ios_script_path, "--iphoneos_archs", iphoneos_archs, "--framework_name", args.framework_name, "--build_only_specified_archs", build_folder] + unknown_args
+            print_header("Building iPhoneOS frameworks")
+            print(command)
+            execute(command, cwd=os.getcwd())
+        if iphonesimulator_archs:
+            build_folder = get_or_create_build_folder(args.out, "iphonesimulator")
+            build_folders.append(build_folder)
+            command = ["python3", ios_script_path, "--iphonesimulator_archs", iphonesimulator_archs, "--framework_name", args.framework_name, "--build_only_specified_archs", build_folder] + unknown_args
+            print_header("Building iPhoneSimulator frameworks")
+            execute(command, cwd=os.getcwd())
+        if macos_archs:
+            build_folder = get_or_create_build_folder(args.out, "macos")
+            build_folders.append(build_folder)
+            command = ["python3", osx_script_path, "--macos_archs", macos_archs, "--framework_name", args.framework_name, "--build_only_specified_archs", build_folder] + unknown_args
+            print_header("Building MacOS frameworks")
+            execute(command, cwd=os.getcwd())
+        if catalyst_archs:
+            build_folder = get_or_create_build_folder(args.out, "catalyst")
+            build_folders.append(build_folder)
+            command = ["python3", osx_script_path, "--catalyst_archs", catalyst_archs, "--framework_name", args.framework_name, "--build_only_specified_archs", build_folder] + unknown_args
+            print_header("Building Catalyst frameworks")
+            execute(command, cwd=os.getcwd())
+
+        # Put all the built .frameworks together into a .xcframework
+        print_header("Building xcframework")
+        xcframework_build_command = [
+            "xcodebuild",
+            "-create-xcframework",
+            "-output",
+            f"{args.out}/{args.framework_name}.xcframework",
+        ]
+        for folder in build_folders:
+            xcframework_build_command += ["-framework", f"{folder}/{args.framework_name}.framework"]
+        execute(xcframework_build_command, cwd=os.getcwd())
+
+        print("")
+        print_header(f"Finished building {args.out}/{args.framework_name}.xcframework")
+    except Exception as e:
+        print_error(e)
+        traceback.print_exc(file=sys.stderr)
+        sys.exit(1)
diff --git a/platforms/apple/cv_build_utils.py b/platforms/apple/cv_build_utils.py
new file mode 100644
index 0000000000..d764b70fd1
--- /dev/null
+++ b/platforms/apple/cv_build_utils.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+"""
+Common utilities. These should be compatible with Python 2 and 3.
+"""
+
+from __future__ import print_function
+import sys, re
+from subprocess import check_call, check_output, CalledProcessError
+
+def execute(cmd, cwd = None):
+    print("Executing: %s in %s" % (cmd, cwd), file=sys.stderr)
+    print('Executing: ' + ' '.join(cmd))
+    retcode = check_call(cmd, cwd = cwd)
+    if retcode != 0:
+        raise Exception("Child returned:", retcode)
+
+def print_header(text):
+    print("="*60)
+    print(text)
+    print("="*60)
+
+def print_error(text):
+    print("="*60, file=sys.stderr)
+    print("ERROR: %s" % text, file=sys.stderr)
+    print("="*60, file=sys.stderr)
+
+def get_xcode_major():
+    ret = check_output(["xcodebuild", "-version"]).decode('utf-8')
+    m = re.match(r'Xcode\s+(\d+)\..*', ret, flags=re.IGNORECASE)
+    if m:
+        return int(m.group(1))
+    else:
+        raise Exception("Failed to parse Xcode version")
+
+def get_xcode_version():
+    """
+    Returns the major and minor version of the current Xcode
+    command line tools as a tuple of (major, minor)
+    """
+    ret = check_output(["xcodebuild", "-version"]).decode('utf-8')
+    m = re.match(r'Xcode\s+(\d+)\.(\d+)', ret, flags=re.IGNORECASE)
+    if m:
+        return (int(m.group(1)), int(m.group(2)))
+    else:
+        raise Exception("Failed to parse Xcode version")
+
+def get_xcode_setting(var, projectdir):
+    ret = check_output(["xcodebuild", "-showBuildSettings"], cwd = projectdir).decode('utf-8')
+    m = re.search("\s" + var + " = (.*)", ret)
+    if m:
+        return m.group(1)
+    else:
+        raise Exception("Failed to parse Xcode settings")
+
+def get_cmake_version():
+    """
+    Returns the major and minor version of the current CMake
+    command line tools as a tuple of (major, minor, revision)
+    """
+    ret = check_output(["cmake", "--version"]).decode('utf-8')
+    m = re.match(r'cmake\sversion\s+(\d+)\.(\d+).(\d+)', ret, flags=re.IGNORECASE)
+    if m:
+        return (int(m.group(1)), int(m.group(2)), int(m.group(3)))
+    else:
+        raise Exception("Failed to parse CMake version")
diff --git a/platforms/apple/readme.md b/platforms/apple/readme.md
new file mode 100644
index 0000000000..f12446c060
--- /dev/null
+++ b/platforms/apple/readme.md
@@ -0,0 +1,40 @@
+# Building for Apple Platforms
+
+build_xcframework.py creates an xcframework supporting a variety of Apple platforms.
+
+You'll need the following to run these steps:
+- MacOS 10.15 or later
+- Python 3.6 or later
+- CMake 3.18.5/3.19.0 or later (make sure the `cmake` command is available on your PATH)
+- Xcode 12.2 or later (and its command line tools)
+
+You can then run build_xcframework.py, as below:
+```
+cd ~/<my_working_directory>
+python opencv/platforms/apple/build_xcframework.py ./build_xcframework
+```
+
+Grab a coffee, because you'll be here for a while. By default this builds OpenCV for 8 architectures across 4 platforms:
+
+- iOS (`--iphoneos_archs`): arm64, armv7
+- iOS Simulator (`--iphonesimulator_archs`): x86_64, arm64
+- macOS (`--macos_archs`): x86_64, arm64
+- Mac Catalyst (`--catalyst_archs`): x86_64, arm64
+
+If everything's fine, you will eventually get `opencv2.xcframework` in the output directory.
+
+The script has some configuration options to exclude platforms and architectures you don't want to build for. Use the `--help` flag for more information.
+
+## Examples
+
+You may override the defaults by specifying a value for any of the `*_archs` flags. For example, if you want to build for arm64 on every platform, you can do this:
+
+```
+python build_xcframework.py somedir --iphoneos_archs arm64 --iphonesimulator_archs arm64 --macos_archs arm64 --catalyst_archs arm64
+```
+
+If you want to build only for certain platforms, you can supply the `--build_only_specified_archs` flag, which makes the script build only the archs you directly ask for. For example, to build only for Catalyst, you can do this:
+
+```
+python build_xcframework.py somedir --catalyst_archs x86_64,arm64 --build_only_specified_archs
+```
diff --git a/platforms/ios/__init__.py b/platforms/ios/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/platforms/ios/build_framework.py b/platforms/ios/build_framework.py
index af0db8f09a..4489475ef2 100755
--- a/platforms/ios/build_framework.py
+++ b/platforms/ios/build_framework.py
@@ -31,36 +31,16 @@ However, {framework_name}.framework directory is erased and recreated on each ru
 Adding --dynamic parameter will build {framework_name}.framework as App Store dynamic framework. Only iOS 8+ versions are supported.
 """
 
-from __future__ import print_function
-import glob, re, os, os.path, shutil, string, sys, argparse, traceback, multiprocessing
+from __future__ import print_function, unicode_literals
+import glob, os, os.path, shutil, string, sys, argparse, traceback, multiprocessing
 from subprocess import check_call, check_output, CalledProcessError
 from distutils.dir_util import copy_tree
 
+sys.path.insert(0, os.path.abspath(os.path.abspath(os.path.dirname(__file__))+'/../apple'))
+from cv_build_utils import execute, print_error, get_xcode_major, get_xcode_setting
+
 IPHONEOS_DEPLOYMENT_TARGET='9.0'  # default, can be changed via command line options or environment variable
 
-def execute(cmd, cwd = None):
-    print("Executing: %s in %s" % (cmd, cwd), file=sys.stderr)
-    print('Executing: ' + ' '.join(cmd))
-    retcode = check_call(cmd, cwd = cwd)
-    if retcode != 0:
-        raise Exception("Child returned:", retcode)
-
-def getXCodeMajor():
-    ret = check_output(["xcodebuild", "-version"])
-    m = re.match(r'Xcode\s+(\d+)\..*', ret, flags=re.IGNORECASE)
-    if m:
-        return int(m.group(1))
-    else:
-        raise Exception("Failed to parse Xcode version")
-
-def getXCodeSetting(var, projectdir):
-    ret = check_output(["xcodebuild", "-showBuildSettings"], cwd = projectdir)
-    m = re.search("\s" + var + " = (.*)", ret)
-    if m:
-        return m.group(1)
-    else:
-        raise Exception("Failed to parse Xcode settings")
-
 class Builder:
     def __init__(self, opencv, contrib, dynamic, bitcodedisabled, exclude, disable, enablenonfree, targets, debug, debug_info, framework_name, run_tests, build_docs):
         self.opencv = os.path.abspath(opencv)
@@ -99,7 +79,7 @@ class Builder:
         main_working_dir = os.path.join(outdir, "build")
         dirs = []
 
-        xcode_ver = getXCodeMajor()
+        xcode_ver = get_xcode_major()
 
         # build each architecture separately
         alltargets = []
@@ -119,6 +99,30 @@ class Builder:
             if xcode_ver >= 7 and target[1] == 'iPhoneOS' and self.bitcodedisabled == False:
                 cmake_flags.append("-DCMAKE_C_FLAGS=-fembed-bitcode")
                 cmake_flags.append("-DCMAKE_CXX_FLAGS=-fembed-bitcode")
+            if xcode_ver >= 7 and target[1] == 'Catalyst':
+                sdk_path = check_output(["xcodebuild", "-version", "-sdk", "macosx", "Path"]).decode('utf-8').rstrip()
+                c_flags = [
+                    "-target %s-apple-ios13.0-macabi" % target[0],  # e.g. x86_64-apple-ios13.2-macabi # -mmacosx-version-min=10.15
+                    "-isysroot %s" % sdk_path,
+                    "-iframework %s/System/iOSSupport/System/Library/Frameworks" % sdk_path,
+                    "-isystem %s/System/iOSSupport/usr/include" % sdk_path,
+                ]
+                if self.bitcodedisabled == False:
+                    c_flags.append("-fembed-bitcode")
+                cmake_flags.append("-DCMAKE_C_FLAGS=" + " ".join(c_flags))
+                cmake_flags.append("-DCMAKE_CXX_FLAGS=" + " ".join(c_flags))
+                cmake_flags.append("-DCMAKE_EXE_LINKER_FLAGS=" + " ".join(c_flags))
+
+                # CMake cannot compile Swift for Catalyst https://gitlab.kitware.com/cmake/cmake/-/issues/21436
+                # cmake_flags.append("-DCMAKE_Swift_FLAGS=" + " " + target_flag)
+                cmake_flags.append("-DSWIFT_DISABLED=1")
+
+                cmake_flags.append("-DIOS=1")  # Build the iOS codebase
+                cmake_flags.append("-DMAC_CATALYST=1")  # Set a flag for Mac Catalyst, just in case we need it
+                cmake_flags.append("-DWITH_OPENCL=OFF")  # Disable OpenCL; it isn't compatible with iOS
+                cmake_flags.append("-DCMAKE_OSX_SYSROOT=%s" % sdk_path)
+                cmake_flags.append("-DCMAKE_CXX_COMPILER_WORKS=TRUE")
+                cmake_flags.append("-DCMAKE_C_COMPILER_WORKS=TRUE")
             self.buildOne(target[0], target[1], main_build_dir, cmake_flags)
 
             if not self.dynamic:
@@ -128,10 +132,10 @@ class Builder:
         self.makeFramework(outdir, dirs)
         if self.build_objc_wrapper:
             if self.run_tests:
-                check_call([sys.argv[0].replace("build_framework", "run_tests"), "--framework_dir=" + outdir, "--framework_name=" + self.framework_name, dirs[0] +  "/modules/objc/test"])
+                check_call([sys.argv[0].replace("build_framework", "run_tests"), "--framework_dir=" + outdir, "--framework_name=" + self.framework_name, dirs[0] +  "/modules/objc_bindings_generator/{}/test".format(self.getObjcTarget(target[1]))])
             else:
                 print("To run tests call:")
-                print(sys.argv[0].replace("build_framework", "run_tests") + " --framework_dir=" + outdir + " --framework_name=" + self.framework_name + " " + dirs[0] +  "/modules/objc/test")
+                print(sys.argv[0].replace("build_framework", "run_tests") + " --framework_dir=" + outdir + " --framework_name=" + self.framework_name + " " + dirs[0] +  "/modules/objc_bindings_generator/{}/test".format(self.getObjcTarget(target[1])))
             if self.build_docs:
                 check_call([sys.argv[0].replace("build_framework", "build_docs"), dirs[0] + "/modules/objc/framework_build"])
                 doc_path = os.path.join(dirs[0], "modules", "objc", "doc_build", "docs")
@@ -147,9 +151,7 @@ class Builder:
         try:
             self._build(outdir)
         except Exception as e:
-            print("="*60, file=sys.stderr)
-            print("ERROR: %s" % e, file=sys.stderr)
-            print("="*60, file=sys.stderr)
+            print_error(e)
             traceback.print_exc(file=sys.stderr)
             sys.exit(1)
 
@@ -170,17 +172,19 @@ class Builder:
             "-DOPENCV_INCLUDE_INSTALL_PATH=include",
             "-DOPENCV_3P_LIB_INSTALL_PATH=lib/3rdparty",
             "-DFRAMEWORK_NAME=%s" % self.framework_name,
-        ] + ([
-            "-DBUILD_SHARED_LIBS=ON",
-            "-DCMAKE_MACOSX_BUNDLE=ON",
-            "-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO",
-        ] if self.dynamic and not self.build_objc_wrapper else []) + ([
-            "-DDYNAMIC_PLIST=ON"
-        ] if self.dynamic else []) + ([
-            "-DOPENCV_ENABLE_NONFREE=ON"
-        ] if self.enablenonfree else []) + ([
-            "-DBUILD_WITH_DEBUG_INFO=ON"
-        ] if self.debug_info else [])
+        ]
+        if self.dynamic:
+            args += [
+                "-DDYNAMIC_PLIST=ON"
+            ]
+        if self.enablenonfree:
+            args += [
+                "-DOPENCV_ENABLE_NONFREE=ON"
+            ]
+        if self.debug_info:
+            args += [
+                "-DBUILD_WITH_DEBUG_INFO=ON"
+            ]
 
         if len(self.exclude) > 0:
             args += ["-DBUILD_opencv_%s=OFF" % m for m in self.exclude]
@@ -202,29 +206,49 @@ class Builder:
         buildcmd += [
             "IPHONEOS_DEPLOYMENT_TARGET=" + os.environ['IPHONEOS_DEPLOYMENT_TARGET'],
             "ARCHS=%s" % arch,
+            "-sdk", target.lower(),
+            "-configuration", self.getConfiguration(),
+            "-parallelizeTargets",
+            "-jobs", str(multiprocessing.cpu_count()),
         ]
 
-        buildcmd += [
-                "-sdk", target.lower(),
-                "-configuration", self.getConfiguration(),
-                "-parallelizeTargets",
-                "-jobs", str(multiprocessing.cpu_count()),
-            ]
-
         return buildcmd
 
     def getInfoPlist(self, builddirs):
         return os.path.join(builddirs[0], "ios", "Info.plist")
 
+    def getObjcTarget(self, target):
+        # Obj-C generation target
+        return 'ios'
+
     def makeCMakeCmd(self, arch, target, dir, cmakeargs = []):
         toolchain = self.getToolchain(arch, target)
         cmakecmd = self.getCMakeArgs(arch, target) + \
             (["-DCMAKE_TOOLCHAIN_FILE=%s" % toolchain] if toolchain is not None else [])
         if target.lower().startswith("iphoneos"):
             cmakecmd.append("-DCPU_BASELINE=DETECT")
-        if target.lower() == "macosx":
-            build_arch = check_output(["uname", "-m"]).rstrip()
+        if target.lower().startswith("iphonesimulator"):
+            build_arch = check_output(["uname", "-m"]).decode('utf-8').rstrip()
             if build_arch != arch:
+                print("build_arch (%s) != arch (%s)" % (build_arch, arch))
+                cmakecmd.append("-DCMAKE_SYSTEM_PROCESSOR=" + arch)
+                cmakecmd.append("-DCMAKE_OSX_ARCHITECTURES=" + arch)
+                cmakecmd.append("-DCPU_BASELINE=DETECT")
+                cmakecmd.append("-DCMAKE_CROSSCOMPILING=ON")
+                cmakecmd.append("-DOPENCV_WORKAROUND_CMAKE_20989=ON")
+        if target.lower() == "catalyst":
+            build_arch = check_output(["uname", "-m"]).decode('utf-8').rstrip()
+            if build_arch != arch:
+                print("build_arch (%s) != arch (%s)" % (build_arch, arch))
+                cmakecmd.append("-DCMAKE_SYSTEM_PROCESSOR=" + arch)
+                cmakecmd.append("-DCMAKE_OSX_ARCHITECTURES=" + arch)
+                cmakecmd.append("-DCPU_BASELINE=DETECT")
+                cmakecmd.append("-DCMAKE_CROSSCOMPILING=ON")
+                cmakecmd.append("-DOPENCV_WORKAROUND_CMAKE_20989=ON")
+        if target.lower() == "macosx":
+            build_arch = check_output(["uname", "-m"]).decode('utf-8').rstrip()
+            if build_arch != arch:
+                print("build_arch (%s) != arch (%s)" % (build_arch, arch))
                 cmakecmd.append("-DCMAKE_SYSTEM_PROCESSOR=" + arch)
                 cmakecmd.append("-DCMAKE_OSX_ARCHITECTURES=" + arch)
                 cmakecmd.append("-DCPU_BASELINE=DETECT")
@@ -245,7 +269,17 @@ class Builder:
         #cmakecmd.append(self.opencv)
         #cmakecmd.extend(cmakeargs)
         cmakecmd = self.makeCMakeCmd(arch, target, self.opencv, cmakeargs)
+        print("")
+        print("=================================")
+        print("CMake")
+        print("=================================")
+        print("")
         execute(cmakecmd, cwd = builddir)
+        print("")
+        print("=================================")
+        print("Xcodebuild")
+        print("=================================")
+        print("")
 
         # Clean and build
         clean_dir = os.path.join(builddir, "install")
@@ -255,7 +289,9 @@ class Builder:
         execute(buildcmd + ["-target", "ALL_BUILD", "build"], cwd = builddir)
         execute(["cmake", "-DBUILD_TYPE=%s" % self.getConfiguration(), "-P", "cmake_install.cmake"], cwd = builddir)
         if self.build_objc_wrapper:
-            cmakecmd = self.makeCMakeCmd(arch, target, builddir + "/modules/objc/gen", cmakeargs)
+            cmakecmd = self.makeCMakeCmd(arch, target, builddir + "/modules/objc_bindings_generator/{}/gen".format(self.getObjcTarget(target)), cmakeargs)
+            # cmakecmd.append("-DCMAKE_Swift_FLAGS=" + "-target x86_64-apple-ios13.0-macabi")
+            # cmakecmd.append("-DCMAKE_EXE_LINKER_FLAGS=" + "-target x86_64-apple-ios13.0-macabi")
             cmakecmd.append("-DBUILD_ROOT=%s" % builddir)
             cmakecmd.append("-DCMAKE_INSTALL_NAME_TOOL=install_name_tool")
             cmakecmd.append("--no-warn-unused-cli")
@@ -276,24 +312,51 @@ class Builder:
     def makeDynamicLib(self, builddir):
         target = builddir[(builddir.rfind("build-") + 6):]
         target_platform = target[(target.rfind("-") + 1):]
-        is_device = target_platform == "iphoneos"
-        res = os.path.join(builddir, "install", "lib", self.framework_name + ".framework", self.framework_name)
+        is_device = target_platform == "iphoneos" or target_platform == "catalyst"
+        framework_dir = os.path.join(builddir, "install", "lib", self.framework_name + ".framework")
+        if not os.path.exists(framework_dir):
+            os.makedirs(framework_dir)
+        res = os.path.join(framework_dir, self.framework_name)
         libs = glob.glob(os.path.join(builddir, "install", "lib", "*.a"))
-        module = [os.path.join(builddir, "lib", self.getConfiguration(), self.framework_name + ".framework", self.framework_name)]
+        if self.build_objc_wrapper:
+            module = [os.path.join(builddir, "lib", self.getConfiguration(), self.framework_name + ".framework", self.framework_name)]
+        else:
+            module = []
 
         libs3 = glob.glob(os.path.join(builddir, "install", "lib", "3rdparty", "*.a"))
 
-        link_target = target[:target.find("-")] + "-apple-ios" + os.environ['IPHONEOS_DEPLOYMENT_TARGET'] + ("-simulator" if target.endswith("simulator") else "")
+        if os.environ.get('IPHONEOS_DEPLOYMENT_TARGET'):
+            link_target = target[:target.find("-")] + "-apple-ios" + os.environ['IPHONEOS_DEPLOYMENT_TARGET'] + ("-simulator" if target.endswith("simulator") else "")
+        else:
+            if target_platform == "catalyst":
+                link_target = "%s-apple-ios13.0-macabi" % target[:target.find("-")]
+            else:
+                link_target = "%s-apple-darwin" % target[:target.find("-")]
         bitcode_flags = ["-fembed-bitcode", "-Xlinker", "-bitcode_verify"] if is_device and not self.bitcodedisabled else []
-        toolchain_dir = getXCodeSetting("TOOLCHAIN_DIR", builddir)
+        toolchain_dir = get_xcode_setting("TOOLCHAIN_DIR", builddir)
+        sdk_dir = get_xcode_setting("SDK_DIR", builddir)
+        framework_options = []
         swift_link_dirs = ["-L" + toolchain_dir + "/usr/lib/swift/" + target_platform, "-L/usr/lib/swift"]
-        sdk_dir = getXCodeSetting("SDK_DIR", builddir)
+        if target_platform == "catalyst":
+            swift_link_dirs = ["-L" + toolchain_dir + "/usr/lib/swift/" + "maccatalyst", "-L/usr/lib/swift"]
+            framework_options = [
+                "-iframework", "%s/System/iOSSupport/System/Library/Frameworks" % sdk_dir,
+                "-framework", "AVFoundation", "-framework", "UIKit", "-framework", "CoreGraphics",
+                "-framework", "CoreImage", "-framework", "CoreMedia", "-framework", "QuartzCore",
+            ]
+        elif target_platform == "macosx":
+            framework_options = [
+                "-framework", "AVFoundation", "-framework", "AppKit", "-framework", "CoreGraphics",
+                "-framework", "CoreImage", "-framework", "CoreMedia", "-framework", "QuartzCore",
+                "-framework", "Accelerate", "-framework", "OpenCL",
+            ]
         execute([
             "clang++",
             "-Xlinker", "-rpath",
             "-Xlinker", "/usr/lib/swift",
             "-target", link_target,
-            "-isysroot", sdk_dir,
+            "-isysroot", sdk_dir,] +
+            framework_options + [
             "-install_name", "@rpath/" + self.framework_name + ".framework/" + self.framework_name,
             "-dynamiclib", "-dead_strip", "-fobjc-link-runtime", "-all_load",
             "-o", res
@@ -398,6 +461,8 @@ class iOSBuilder(Builder):
     def copy_samples(self, outdir):
         print('Copying samples to: ' + outdir)
         samples_dir = os.path.join(outdir, "samples")
+        if os.path.exists(samples_dir):
+            shutil.rmtree(samples_dir)
         shutil.copytree(os.path.join(self.opencv, "samples", "swift", "ios"), samples_dir)
         if self.framework_name != "OpenCV":
             for dirname, dirs, files in os.walk(samples_dir):
@@ -426,8 +491,9 @@ if __name__ == "__main__":
     parser.add_argument('--dynamic', default=False, action='store_true', help='build dynamic framework (default is "False" - builds static framework)')
     parser.add_argument('--disable-bitcode', default=False, dest='bitcodedisabled', action='store_true', help='disable bitcode (enabled by default)')
     parser.add_argument('--iphoneos_deployment_target', default=os.environ.get('IPHONEOS_DEPLOYMENT_TARGET', IPHONEOS_DEPLOYMENT_TARGET), help='specify IPHONEOS_DEPLOYMENT_TARGET')
-    parser.add_argument('--iphoneos_archs', default='armv7s,arm64', help='select iPhoneOS target ARCHS')
-    parser.add_argument('--iphonesimulator_archs', default='x86_64', help='select iPhoneSimulator target ARCHS')
+    parser.add_argument('--build_only_specified_archs', default=False, action='store_true', help='if enabled, only directly specified archs are built and defaults are ignored')
+    parser.add_argument('--iphoneos_archs', default=None, help='select iPhoneOS target ARCHS. Default is "armv7s,arm64"')
+    parser.add_argument('--iphonesimulator_archs', default=None, help='select iPhoneSimulator target ARCHS. Default is "x86_64"')
     parser.add_argument('--enable_nonfree', default=False, dest='enablenonfree', action='store_true', help='enable non-free modules (disabled by default)')
     parser.add_argument('--debug', default=False, dest='debug', action='store_true', help='Build "Debug" binaries (disabled by default)')
     parser.add_argument('--debug_info', default=False, dest='debug_info', action='store_true', help='Build with debug information (useful for Release mode: BUILD_WITH_DEBUG_INFO=ON)')
@@ -436,26 +502,58 @@ if __name__ == "__main__":
     parser.add_argument('--run_tests', default=False, dest='run_tests', action='store_true', help='Run tests')
     parser.add_argument('--build_docs', default=False, dest='build_docs', action='store_true', help='Build docs')
 
-    args = parser.parse_args()
+    args, unknown_args = parser.parse_known_args()
+    if unknown_args:
+        print("The following args are not recognized and will not be used: %s" % unknown_args)
 
     os.environ['IPHONEOS_DEPLOYMENT_TARGET'] = args.iphoneos_deployment_target
     print('Using IPHONEOS_DEPLOYMENT_TARGET=' + os.environ['IPHONEOS_DEPLOYMENT_TARGET'])
-    iphoneos_archs = args.iphoneos_archs.split(',')
+
+    iphoneos_archs = None
+    if args.iphoneos_archs:
+        iphoneos_archs = args.iphoneos_archs.split(',')
+    elif not args.build_only_specified_archs:
+        # Supply defaults
+        iphoneos_archs = ["armv7s", "arm64"]
     print('Using iPhoneOS ARCHS=' + str(iphoneos_archs))
-    iphonesimulator_archs = args.iphonesimulator_archs.split(',')
+
+    iphonesimulator_archs = None
+    if args.iphonesimulator_archs:
+        iphonesimulator_archs = args.iphonesimulator_archs.split(',')
+    elif not args.build_only_specified_archs:
+        # Supply defaults
+        iphonesimulator_archs = ["x86_64"]
     print('Using iPhoneSimulator ARCHS=' + str(iphonesimulator_archs))
+
+    # Prevent the build from happening if the same architecture is specified for multiple platforms.
+    # When `lipo` is run to stitch the frameworks together into a fat framework, it'll fail, so it's
+    # better to stop here while we're ahead.
+    if iphoneos_archs and iphonesimulator_archs:
+        duplicate_archs = set(iphoneos_archs).intersection(iphonesimulator_archs)
+        if duplicate_archs:
+            print_error("Cannot have the same architecture for multiple platforms in a fat framework! Consider using build_xcframework.py in the apple platform folder instead. Duplicate archs are %s" % duplicate_archs)
+            exit(1)
+
     if args.legacy_build:
         args.framework_name = "opencv2"
         if not "objc" in args.without:
             args.without.append("objc")
 
-    b = iOSBuilder(args.opencv, args.contrib, args.dynamic, args.bitcodedisabled, args.without, args.disable, args.enablenonfree,
-        [
-            (iphoneos_archs, "iPhoneOS"),
-        ] if os.environ.get('BUILD_PRECOMMIT', None) else
-        [
-            (iphoneos_archs, "iPhoneOS"),
-            (iphonesimulator_archs, "iPhoneSimulator"),
-        ], args.debug, args.debug_info, args.framework_name, args.run_tests, args.build_docs)
+    targets = []
+    if os.environ.get('BUILD_PRECOMMIT', None):
+        if not iphoneos_archs:
+            print_error("--iphoneos_archs must have at least one value")
+            sys.exit(1)
+        targets.append((iphoneos_archs, "iPhoneOS"))
+    else:
+        if not iphoneos_archs and not iphonesimulator_archs:
+            print_error("--iphoneos_archs and --iphonesimulator_archs are undefined; nothing will be built.")
+            sys.exit(1)
+        if iphoneos_archs:
+            targets.append((iphoneos_archs, "iPhoneOS"))
+        if iphonesimulator_archs:
+            targets.append((iphonesimulator_archs, "iPhoneSimulator"))
+
+    b = iOSBuilder(args.opencv, args.contrib, args.dynamic, args.bitcodedisabled, args.without, args.disable, args.enablenonfree, targets, args.debug, args.debug_info, args.framework_name, args.run_tests, args.build_docs)
 
     b.build(args.out)
diff --git a/platforms/ios/cmake/Toolchains/Toolchain-Catalyst_Xcode.cmake b/platforms/ios/cmake/Toolchains/Toolchain-Catalyst_Xcode.cmake
new file mode 100644
index 0000000000..a22c10a7af
--- /dev/null
+++ b/platforms/ios/cmake/Toolchains/Toolchain-Catalyst_Xcode.cmake
@@ -0,0 +1,4 @@
+set(MAC_CATALYST TRUE)
+message(STATUS "Setting up Catalyst toolchain for IOS_ARCH='${IOS_ARCH}'")
+include(${CMAKE_CURRENT_LIST_DIR}/common-ios-toolchain.cmake)
+message(STATUS "Catalyst toolchain loaded")
diff --git a/platforms/ios/cmake/Toolchains/common-ios-toolchain.cmake b/platforms/ios/cmake/Toolchains/common-ios-toolchain.cmake
index 13aea357f1..4cbe4f1729 100644
--- a/platforms/ios/cmake/Toolchains/common-ios-toolchain.cmake
+++ b/platforms/ios/cmake/Toolchains/common-ios-toolchain.cmake
@@ -79,8 +79,11 @@ endif()
 if(NOT DEFINED CMAKE_OSX_SYSROOT)
   if(IPHONEOS)
     set(CMAKE_OSX_SYSROOT "iphoneos")
-  else()
+  elseif(IPHONESIMULATOR)
     set(CMAKE_OSX_SYSROOT "iphonesimulator")
+  elseif(MAC_CATALYST)
+    # Use MacOS SDK for Catalyst builds
+    set(CMAKE_OSX_SYSROOT "macosx")
   endif()
 endif()
 set(CMAKE_MACOSX_BUNDLE YES)
@@ -90,7 +93,7 @@ if(APPLE_FRAMEWORK AND NOT BUILD_SHARED_LIBS)
   set(CMAKE_OSX_ARCHITECTURES "${IOS_ARCH}" CACHE INTERNAL "Build architecture for iOS" FORCE)
 endif()
 
-if(NOT DEFINED IPHONEOS_DEPLOYMENT_TARGET)
+if(NOT DEFINED IPHONEOS_DEPLOYMENT_TARGET AND NOT MAC_CATALYST)
   if(NOT DEFINED ENV{IPHONEOS_DEPLOYMENT_TARGET})
     message(FATAL_ERROR "IPHONEOS_DEPLOYMENT_TARGET is not specified")
   endif()
diff --git a/platforms/js/build_js.py b/platforms/js/build_js.py
index 10e04ea020..8a4b64a4da 100644
--- a/platforms/js/build_js.py
+++ b/platforms/js/build_js.py
@@ -203,6 +203,9 @@ class Builder:
     def build_doc(self):
         execute(["make", "-j", str(multiprocessing.cpu_count()), "doxygen"])
 
+    def build_loader(self):
+        execute(["make", "-j", str(multiprocessing.cpu_count()), "opencv_js_loader"])
+
 
 #===================================================================================================
 
@@ -223,6 +226,7 @@ if __name__ == "__main__":
     parser.add_argument('--build_test', action="store_true", help="Build tests")
     parser.add_argument('--build_perf', action="store_true", help="Build performance tests")
     parser.add_argument('--build_doc', action="store_true", help="Build tutorials")
+    parser.add_argument('--build_loader', action="store_true", help="Build OpenCV.js loader")
     parser.add_argument('--clean_build_dir', action="store_true", help="Clean build dir")
     parser.add_argument('--skip_config', action="store_true", help="Skip cmake config")
     parser.add_argument('--config_only', action="store_true", help="Only do cmake config")
@@ -294,6 +298,11 @@ if __name__ == "__main__":
         log.info("=====")
         builder.build_doc()
 
+    if args.build_loader:
+        log.info("=====")
+        log.info("===== Building OpenCV.js loader")
+        log.info("=====")
+        builder.build_loader()
 
     log.info("=====")
     log.info("===== Build finished")
@@ -318,3 +327,8 @@ if __name__ == "__main__":
         opencvjs_tutorial_path = find_file("tutorial_js_root.html", os.path.join(builder.build_dir, "doc", "doxygen", "html"))
         if check_file(opencvjs_tutorial_path):
             log.info("OpenCV.js tutorials location: %s", opencvjs_tutorial_path)
+
+    if args.build_loader:
+        opencvjs_loader_path = os.path.join(builder.build_dir, "bin", "loader.js")
+        if check_file(opencvjs_loader_path):
+            log.info("OpenCV.js loader location: %s", opencvjs_loader_path)
diff --git a/platforms/osx/__init__.py b/platforms/osx/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/platforms/osx/build_framework.py b/platforms/osx/build_framework.py
index ccca582615..95c8268454 100755
--- a/platforms/osx/build_framework.py
+++ b/platforms/osx/build_framework.py
@@ -9,11 +9,20 @@ import os, os.path, sys, argparse, traceback, multiprocessing
 # import common code
 sys.path.insert(0, os.path.abspath(os.path.abspath(os.path.dirname(__file__))+'/../ios'))
 from build_framework import Builder
+sys.path.insert(0, os.path.abspath(os.path.abspath(os.path.dirname(__file__))+'/../apple'))
+from cv_build_utils import print_error
 
 MACOSX_DEPLOYMENT_TARGET='10.12'  # default, can be changed via command line options or environment variable
 
 class OSXBuilder(Builder):
 
+    def getObjcTarget(self, target):
+        # Obj-C generation target
+        if target == "Catalyst":
+            return 'ios'
+        else:
+            return 'osx'
+
     def getToolchain(self, arch, target):
         return None
 
@@ -22,11 +31,21 @@ class OSXBuilder(Builder):
             "xcodebuild",
             "MACOSX_DEPLOYMENT_TARGET=" + os.environ['MACOSX_DEPLOYMENT_TARGET'],
             "ARCHS=%s" % arch,
-            "-sdk", target.lower(),
+            "-sdk", "macosx" if target == "Catalyst" else target.lower(),
             "-configuration", "Debug" if self.debug else "Release",
             "-parallelizeTargets",
             "-jobs", str(multiprocessing.cpu_count())
         ]
+
+        if target == "Catalyst":
+            buildcmd.append("-destination 'platform=macOS,arch=%s,variant=Mac Catalyst'" % arch)
+            buildcmd.append("-UseModernBuildSystem=YES")
+            buildcmd.append("SKIP_INSTALL=NO")
+            buildcmd.append("BUILD_LIBRARY_FOR_DISTRIBUTION=YES")
+            buildcmd.append("TARGETED_DEVICE_FAMILY=\"1,2\"")
+            buildcmd.append("SDKROOT=iphoneos")
+            buildcmd.append("SUPPORTS_MAC_CATALYST=YES")
+
         return buildcmd
 
     def getInfoPlist(self, builddirs):
@@ -41,30 +60,68 @@ if __name__ == "__main__":
     parser.add_argument('--contrib', metavar='DIR', default=None, help='folder with opencv_contrib repository (default is "None" - build only main framework)')
     parser.add_argument('--without', metavar='MODULE', default=[], action='append', help='OpenCV modules to exclude from the framework')
     parser.add_argument('--disable', metavar='FEATURE', default=[], action='append', help='OpenCV features to disable (add WITH_*=OFF)')
+    parser.add_argument('--dynamic', default=False, action='store_true', help='build dynamic framework (default is "False" - builds static framework)')
     parser.add_argument('--enable_nonfree', default=False, dest='enablenonfree', action='store_true', help='enable non-free modules (disabled by default)')
     parser.add_argument('--macosx_deployment_target', default=os.environ.get('MACOSX_DEPLOYMENT_TARGET', MACOSX_DEPLOYMENT_TARGET), help='specify MACOSX_DEPLOYMENT_TARGET')
-    parser.add_argument('--archs', default='x86_64', help='Select target ARCHS (set to "x86_64,arm64" to build Universal Binary for Big Sur and later)')
+    parser.add_argument('--build_only_specified_archs', default=False, action='store_true', help='if enabled, only directly specified archs are built and defaults are ignored')
+    parser.add_argument('--archs', default=None, help='(Deprecated! Prefer --macos_archs instead.) Select target ARCHS (set to "x86_64,arm64" to build Universal Binary for Big Sur and later). Default is "x86_64".')
+    parser.add_argument('--macos_archs', default=None, help='Select target ARCHS (set to "x86_64,arm64" to build Universal Binary for Big Sur and later). Default is "x86_64"')
+    parser.add_argument('--catalyst_archs', default=None, help='Select target ARCHS (set to "x86_64,arm64" to build Universal Binary for Big Sur and later). Default is None')
     parser.add_argument('--debug', action='store_true', help='Build "Debug" binaries (CMAKE_BUILD_TYPE=Debug)')
     parser.add_argument('--debug_info', action='store_true', help='Build with debug information (useful for Release mode: BUILD_WITH_DEBUG_INFO=ON)')
-    parser.add_argument('--framework_name', default='opencv2', dest='framework_name', action='store_true', help='Name of OpenCV framework (default: opencv2, will change to OpenCV in future version)')
+    parser.add_argument('--framework_name', default='opencv2', dest='framework_name', help='Name of OpenCV framework (default: opencv2, will change to OpenCV in future version)')
     parser.add_argument('--legacy_build', default=False, dest='legacy_build', action='store_true', help='Build legacy framework (default: False, equivalent to "--framework_name=opencv2 --without=objc")')
     parser.add_argument('--run_tests', default=False, dest='run_tests', action='store_true', help='Run tests')
     parser.add_argument('--build_docs', default=False, dest='build_docs', action='store_true', help='Build docs')
 
-    args = parser.parse_args()
+    args, unknown_args = parser.parse_known_args()
+    if unknown_args:
+        print("The following args are not recognized and will not be used: %s" % unknown_args)
 
     os.environ['MACOSX_DEPLOYMENT_TARGET'] = args.macosx_deployment_target
     print('Using MACOSX_DEPLOYMENT_TARGET=' + os.environ['MACOSX_DEPLOYMENT_TARGET'])
-    archs = args.archs.split(',')
-    print('Using ARCHS=' + str(archs))
+
+    macos_archs = None
+    if args.archs:
+        # The archs flag is replaced by macos_archs. If the user specifies archs,
+        # treat it as if the user specified the macos_archs flag instead.
+        args.macos_archs = args.archs
+        print("--archs is deprecated! Prefer --macos_archs instead.")
+    if args.macos_archs:
+        macos_archs = args.macos_archs.split(',')
+    elif not args.build_only_specified_archs:
+        # Supply defaults
+        macos_archs = ["x86_64"]
+    print('Using MacOS ARCHS=' + str(macos_archs))
+
+    catalyst_archs = None
+    if args.catalyst_archs:
+        catalyst_archs = args.catalyst_archs.split(',')
+    # TODO: To avoid breaking existing CI, catalyst_archs has no defaults. When we can make a breaking change, this should specify a default arch.
+    print('Using Catalyst ARCHS=' + str(catalyst_archs))
+
+    # Prevent the build from happening if the same architecture is specified for multiple platforms.
+    # When `lipo` is run to stitch the frameworks together into a fat framework, it'll fail, so it's
+    # better to stop here while we're ahead.
+    if macos_archs and catalyst_archs:
+        duplicate_archs = set(macos_archs).intersection(catalyst_archs)
+        if duplicate_archs:
+            print_error("Cannot have the same architecture for multiple platforms in a fat framework! Consider using build_xcframework.py in the apple platform folder instead. Duplicate archs are %s" % duplicate_archs)
+            exit(1)
 
     if args.legacy_build:
         args.framework_name = "opencv2"
         if not "objc" in args.without:
             args.without.append("objc")
 
-    b = OSXBuilder(args.opencv, args.contrib, False, False, args.without, args.disable, args.enablenonfree,
-        [
-            (archs, "MacOSX")
-        ], args.debug, args.debug_info, args.framework_name, args.run_tests, args.build_docs)
+    targets = []
+    if not macos_archs and not catalyst_archs:
+        print_error("--macos_archs and --catalyst_archs are undefined; nothing will be built.")
+        sys.exit(1)
+    if macos_archs:
+        targets.append((macos_archs, "MacOSX"))
+    if catalyst_archs:
+        targets.append((catalyst_archs, "Catalyst")),
+
+    b = OSXBuilder(args.opencv, args.contrib, args.dynamic, True, args.without, args.disable, args.enablenonfree, targets, args.debug, args.debug_info, args.framework_name, args.run_tests, args.build_docs)
     b.build(args.out)
diff --git a/platforms/winpack_dldt/2020.4/20201005-dldt-fix-cldnn-compilation.patch b/platforms/winpack_dldt/2020.4/20201005-dldt-fix-cldnn-compilation.patch
new file mode 100644
index 0000000000..152af26c6f
--- /dev/null
+++ b/platforms/winpack_dldt/2020.4/20201005-dldt-fix-cldnn-compilation.patch
@@ -0,0 +1,12 @@
+diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
+index 3dbdfd0b..6b04b910 100644
+--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
++++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
+@@ -15,6 +15,7 @@
+ 
+ #pragma once
+ 
++#include <stdexcept>
+ #include "common_types.h"
+ #include "common_tools.h"
+ #include <vector>
diff --git a/platforms/winpack_dldt/2020.4/patch.config.py b/platforms/winpack_dldt/2020.4/patch.config.py
index 496f383800..6fe3e6e1c1 100644
--- a/platforms/winpack_dldt/2020.4/patch.config.py
+++ b/platforms/winpack_dldt/2020.4/patch.config.py
@@ -1,3 +1,4 @@
 applyPatch('20200701-dldt-disable-unused-targets.patch')
 applyPatch('20200413-dldt-pdb.patch')
 applyPatch('20200604-dldt-disable-multidevice.patch')
+applyPatch('20201005-dldt-fix-cldnn-compilation.patch')
diff --git a/platforms/winpack_dldt/2021.1/20200413-dldt-pdb.patch b/platforms/winpack_dldt/2021.1/20200413-dldt-pdb.patch
new file mode 100644
index 0000000000..081c3c04f6
--- /dev/null
+++ b/platforms/winpack_dldt/2021.1/20200413-dldt-pdb.patch
@@ -0,0 +1,14 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 1f981ed2..90eb500a 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -22,6 +22,9 @@ endif()
+ 
+ project(OpenVINO)
+ 
++set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi /FS")
++set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
++
+ set(OpenVINO_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+ set(IE_MAIN_SOURCE_DIR ${OpenVINO_MAIN_SOURCE_DIR}/inference-engine)
+ list(APPEND CMAKE_MODULE_PATH "${OpenVINO_MAIN_SOURCE_DIR}/cmake")
diff --git a/platforms/winpack_dldt/2021.1/20200604-dldt-disable-multidevice.patch b/platforms/winpack_dldt/2021.1/20200604-dldt-disable-multidevice.patch
new file mode 100644
index 0000000000..b4d1ef9bfe
--- /dev/null
+++ b/platforms/winpack_dldt/2021.1/20200604-dldt-disable-multidevice.patch
@@ -0,0 +1,13 @@
+diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt
+index 0ba0dd78..7d34e7cb 100644
+--- a/inference-engine/src/CMakeLists.txt
++++ b/inference-engine/src/CMakeLists.txt
+@@ -26,7 +26,7 @@ endif()
+ 
+ add_subdirectory(hetero_plugin)
+ 
+-add_subdirectory(multi_device)
++#add_subdirectory(multi_device)
+ 
+ add_subdirectory(transformations)
+ 
diff --git a/platforms/winpack_dldt/2021.1/20201005-dldt-disable-unused-targets.patch b/platforms/winpack_dldt/2021.1/20201005-dldt-disable-unused-targets.patch
new file mode 100644
index 0000000000..0f56717ee4
--- /dev/null
+++ b/platforms/winpack_dldt/2021.1/20201005-dldt-disable-unused-targets.patch
@@ -0,0 +1,178 @@
+diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt
+index 7f45ab02..a7bac7e9 100644
+--- a/inference-engine/CMakeLists.txt
++++ b/inference-engine/CMakeLists.txt
+@@ -70,7 +70,7 @@ if(ENABLE_TESTS)
+     add_subdirectory(tests)
+ endif()
+ 
+-add_subdirectory(tools)
++#add_subdirectory(tools)
+ 
+ function(ie_build_samples)
+     # samples should be build with the same flags as from OpenVINO package,
+@@ -89,7 +89,7 @@ endfunction()
+ 
+ # gflags and format_reader targets are kept inside of samples directory and
+ # they must be built even if samples build is disabled (required for tests and tools).
+-ie_build_samples()
++#ie_build_samples()
+ 
+ file(GLOB_RECURSE SAMPLES_SOURCES samples/*.cpp samples/*.hpp samples/*.h)
+ add_cpplint_target(sample_cpplint
+@@ -180,7 +180,7 @@ endif()
+ # Developer package
+ #
+ 
+-ie_developer_export_targets(format_reader)
++#ie_developer_export_targets(format_reader)
+ ie_developer_export_targets(${NGRAPH_LIBRARIES})
+ 
+ # for Template plugin
+@@ -188,7 +188,7 @@ if(NGRAPH_INTERPRETER_ENABLE)
+     ie_developer_export_targets(ngraph_backend interpreter_backend)
+ endif()
+ 
+-ie_developer_export()
++#ie_developer_export()
+ 
+ configure_file(
+     "${IE_MAIN_SOURCE_DIR}/cmake/developer_package_config.cmake.in"
+diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
+index 9ab88898..8badb591 100644
+--- a/inference-engine/src/inference_engine/CMakeLists.txt
++++ b/inference-engine/src/inference_engine/CMakeLists.txt
+@@ -118,7 +118,7 @@ add_cpplint_target(${TARGET_NAME}_plugin_api_cpplint FOR_SOURCES ${plugin_api_sr
+ 
+ # Create common base object library
+ 
+-add_library(${TARGET_NAME}_common_obj OBJECT
++add_library(${TARGET_NAME}_common_obj OBJECT EXCLUDE_FROM_ALL
+             ${IE_BASE_SOURCE_FILES})
+ 
+ target_compile_definitions(${TARGET_NAME}_common_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
+@@ -132,7 +132,7 @@ target_include_directories(${TARGET_NAME}_common_obj SYSTEM PRIVATE
+ 
+ # Create object library
+ 
+-add_library(${TARGET_NAME}_obj OBJECT
++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL
+             ${LIBRARY_SRC}
+             ${LIBRARY_HEADERS}
+             ${PUBLIC_HEADERS})
+@@ -183,7 +183,7 @@ ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
+ 
+ # Static library used for unit tests which are always built
+ 
+-add_library(${TARGET_NAME}_s STATIC
++add_library(${TARGET_NAME}_s STATIC EXCLUDE_FROM_ALL
+             $<TARGET_OBJECTS:${TARGET_NAME}_obj>
+             $<TARGET_OBJECTS:${TARGET_NAME}_common_obj>
+             $<TARGET_OBJECTS:${TARGET_NAME}_legacy_obj>
+diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt
+index ed87a073..b30e6671 100644
+--- a/inference-engine/src/legacy_api/CMakeLists.txt
++++ b/inference-engine/src/legacy_api/CMakeLists.txt
+@@ -26,7 +26,7 @@ endif()
+ 
+ # Create object library
+ 
+-add_library(${TARGET_NAME}_obj OBJECT
++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL
+             ${LIBRARY_SRC}
+             ${PUBLIC_HEADERS})
+ 
+diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+index 166818cd..6c1e8e36 100644
+--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
++++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+@@ -193,7 +193,7 @@ cross_compiled_file(${TARGET_NAME}
+ 
+ #  add test object library
+ 
+-add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS})
++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL ${SOURCES} ${HEADERS})
+ 
+ target_include_directories(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:inference_engine_preproc_s,INTERFACE_INCLUDE_DIRECTORIES>
+                                                       $<TARGET_PROPERTY:inference_engine_lp_transformations,INTERFACE_INCLUDE_DIRECTORIES>
+diff --git a/inference-engine/src/preprocessing/CMakeLists.txt b/inference-engine/src/preprocessing/CMakeLists.txt
+index f4fed72a..9cedd6b5 100644
+--- a/inference-engine/src/preprocessing/CMakeLists.txt
++++ b/inference-engine/src/preprocessing/CMakeLists.txt
+@@ -124,7 +124,7 @@ endif()
+ 
+ # Create object library
+ 
+-add_library(${TARGET_NAME}_obj OBJECT
++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL
+             ${LIBRARY_SRC}
+             ${LIBRARY_HEADERS})
+ 
+@@ -175,7 +175,7 @@ add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}
+ 
+ # Static library used for unit tests which are always built
+ 
+-add_library(${TARGET_NAME}_s STATIC
++add_library(${TARGET_NAME}_s STATIC EXCLUDE_FROM_ALL
+             $<TARGET_OBJECTS:${TARGET_NAME}_obj>)
+ 
+ set_ie_threading_interface_for(${TARGET_NAME}_s)
+diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt
+index b291d5b4..74ab8287 100644
+--- a/inference-engine/src/vpu/common/CMakeLists.txt
++++ b/inference-engine/src/vpu/common/CMakeLists.txt
+@@ -57,7 +57,7 @@ add_common_target("vpu_common_lib" FALSE)
+ 
+ # Unit tests support for graph transformer
+ if(WIN32)
+-    add_common_target("vpu_common_lib_test_static" TRUE)
++    #add_common_target("vpu_common_lib_test_static" TRUE)
+ else()
+     add_library("vpu_common_lib_test_static" ALIAS "vpu_common_lib")
+ endif()
+diff --git a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
+index a4543745..807b8e36 100644
+--- a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
++++ b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
+@@ -65,7 +65,7 @@ add_graph_transformer_target("vpu_graph_transformer" FALSE)
+ 
+ # Unit tests support for graph transformer
+ if(WIN32)
+-    add_graph_transformer_target("vpu_graph_transformer_test_static" TRUE)
++    #add_graph_transformer_target("vpu_graph_transformer_test_static" TRUE)
+ else()
+     add_library("vpu_graph_transformer_test_static" ALIAS "vpu_graph_transformer")
+ endif()
+diff --git a/inference-engine/thirdparty/CMakeLists.txt b/inference-engine/thirdparty/CMakeLists.txt
+index a2550bfa..10ce316f 100644
+--- a/inference-engine/thirdparty/CMakeLists.txt
++++ b/inference-engine/thirdparty/CMakeLists.txt
+@@ -56,13 +56,13 @@ function(build_with_lto)
+     endfunction()
+ 
+     ie_build_pugixml()
+-    add_subdirectory(stb_lib)
++    #add_subdirectory(stb_lib)
+     add_subdirectory(ade)
+     add_subdirectory(fluid/modules/gapi)
+ 
+     target_include_directories(pugixml INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pugixml/src>")
+ 
+-    set_target_properties(pugixml ade fluid stb_image
++    set_target_properties(pugixml ade fluid
+                           PROPERTIES FOLDER thirdparty)
+ 
+     # developer package
+diff --git a/inference-engine/thirdparty/pugixml/CMakeLists.txt b/inference-engine/thirdparty/pugixml/CMakeLists.txt
+index 8bcb2801..380fb468 100644
+--- a/inference-engine/thirdparty/pugixml/CMakeLists.txt
++++ b/inference-engine/thirdparty/pugixml/CMakeLists.txt
+@@ -41,7 +41,7 @@ if(BUILD_SHARED_LIBS)
+ else()
+ 	add_library(pugixml STATIC ${SOURCES})
+ 	if (MSVC)
+-		add_library(pugixml_mt STATIC ${SOURCES})
++               #add_library(pugixml_mt STATIC ${SOURCES})
+ 		#if (WIN32)
+ 		#	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
+ 		#	set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
diff --git a/platforms/winpack_dldt/2021.1/patch.config.py b/platforms/winpack_dldt/2021.1/patch.config.py
new file mode 100644
index 0000000000..8c890159e6
--- /dev/null
+++ b/platforms/winpack_dldt/2021.1/patch.config.py
@@ -0,0 +1,3 @@
+applyPatch('20201005-dldt-disable-unused-targets.patch')
+applyPatch('20200413-dldt-pdb.patch')
+applyPatch('20200604-dldt-disable-multidevice.patch')
diff --git a/platforms/winpack_dldt/2021.1/sysroot.config.py b/platforms/winpack_dldt/2021.1/sysroot.config.py
new file mode 100644
index 0000000000..fc8dffd32a
--- /dev/null
+++ b/platforms/winpack_dldt/2021.1/sysroot.config.py
@@ -0,0 +1,56 @@
+sysroot_bin_dir = prepare_dir(self.sysrootdir / 'bin')
+copytree(self.build_dir / 'install', self.sysrootdir / 'ngraph')
+#rm_one(self.sysrootdir / 'ngraph' / 'lib' / 'ngraph.dll')
+
+build_config = 'Release' if not self.config.build_debug else 'Debug'
+build_bin_dir = self.build_dir / 'bin' / 'intel64' / build_config
+
+def copy_bin(name):
+    global build_bin_dir, sysroot_bin_dir
+    copytree(build_bin_dir / name, sysroot_bin_dir / name)
+
+dll_suffix = 'd' if self.config.build_debug else ''
+def copy_dll(name):
+    global copy_bin, dll_suffix
+    copy_bin(name + dll_suffix + '.dll')
+    copy_bin(name + dll_suffix + '.pdb')
+
+copy_bin('cache.json')
+copy_dll('clDNNPlugin')
+copy_dll('HeteroPlugin')
+copy_dll('inference_engine')
+copy_dll('inference_engine_ir_reader')
+copy_dll('inference_engine_legacy')
+copy_dll('inference_engine_transformations')  # runtime
+copy_dll('inference_engine_lp_transformations')  # runtime
+copy_dll('MKLDNNPlugin')  # runtime
+copy_dll('myriadPlugin')  # runtime
+#copy_dll('MultiDevicePlugin')  # runtime, not used
+copy_dll('ngraph')
+copy_bin('plugins.xml')
+copytree(self.build_dir / 'bin' / 'intel64' / 'pcie-ma248x.elf', sysroot_bin_dir / 'pcie-ma248x.elf')
+copytree(self.build_dir / 'bin' / 'intel64' / 'usb-ma2x8x.mvcmd', sysroot_bin_dir / 'usb-ma2x8x.mvcmd')
+copytree(self.build_dir / 'bin' / 'intel64' / 'usb-ma2450.mvcmd', sysroot_bin_dir / 'usb-ma2450.mvcmd')
+
+copytree(self.srcdir / 'inference-engine' / 'temp' / 'tbb' / 'bin', sysroot_bin_dir)
+copytree(self.srcdir / 'inference-engine' / 'temp' / 'tbb', self.sysrootdir / 'tbb')
+
+sysroot_ie_dir = prepare_dir(self.sysrootdir / 'deployment_tools' / 'inference_engine')
+sysroot_ie_lib_dir = prepare_dir(sysroot_ie_dir / 'lib' / 'intel64')
+
+copytree(self.srcdir / 'inference-engine' / 'include', sysroot_ie_dir / 'include')
+if not self.config.build_debug:
+    copytree(self.build_dir / 'install' / 'lib' / 'ngraph.lib', sysroot_ie_lib_dir / 'ngraph.lib')
+    copytree(build_bin_dir / 'inference_engine.lib', sysroot_ie_lib_dir / 'inference_engine.lib')
+    copytree(build_bin_dir / 'inference_engine_ir_reader.lib', sysroot_ie_lib_dir / 'inference_engine_ir_reader.lib')
+    copytree(build_bin_dir / 'inference_engine_legacy.lib', sysroot_ie_lib_dir / 'inference_engine_legacy.lib')
+else:
+    copytree(self.build_dir / 'install' / 'lib' / 'ngraphd.lib', sysroot_ie_lib_dir / 'ngraphd.lib')
+    copytree(build_bin_dir / 'inference_engined.lib', sysroot_ie_lib_dir / 'inference_engined.lib')
+    copytree(build_bin_dir / 'inference_engine_ir_readerd.lib', sysroot_ie_lib_dir / 'inference_engine_ir_readerd.lib')
+    copytree(build_bin_dir / 'inference_engine_legacyd.lib', sysroot_ie_lib_dir / 'inference_engine_legacyd.lib')
+
+sysroot_license_dir = prepare_dir(self.sysrootdir / 'etc' / 'licenses')
+copytree(self.srcdir / 'LICENSE', sysroot_license_dir / 'dldt-LICENSE')
+copytree(self.srcdir / 'ngraph/LICENSE', sysroot_license_dir / 'ngraph-LICENSE')
+copytree(self.sysrootdir / 'tbb/LICENSE', sysroot_license_dir / 'tbb-LICENSE')
diff --git a/platforms/winpack_dldt/build_package.py b/platforms/winpack_dldt/build_package.py
index c33e07026b..05991da6b4 100644
--- a/platforms/winpack_dldt/build_package.py
+++ b/platforms/winpack_dldt/build_package.py
@@ -350,6 +350,8 @@ class Builder:
             INSTALL_PDB='ON',
             INSTALL_PDB_COMPONENT_EXCLUDE_FROM_ALL='OFF',
 
+            VIDEOIO_PLUGIN_LIST='all',
+
             OPENCV_SKIP_CMAKE_ROOT_CONFIG='ON',
             OPENCV_BIN_INSTALL_PATH='bin',
             OPENCV_INCLUDE_INSTALL_PATH='include',
@@ -443,8 +445,8 @@ class Builder:
 def main():
 
     dldt_src_url = 'https://github.com/openvinotoolkit/openvino'
-    dldt_src_commit = '2020.4'
-    dldt_release = '2020040000'
+    dldt_src_commit = '2021.1'
+    dldt_release = '2021010000'
 
     build_cache_dir_default = os.environ.get('BUILD_CACHE_DIR', '.build_cache')
     build_subst_drive = os.environ.get('BUILD_SUBST_DRIVE', None)
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index 617629df2e..14ab6141df 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -14,6 +14,7 @@ set(OPENCV_CPP_SAMPLES_REQUIRED_DEPS
   opencv_features2d
   opencv_calib3d
   opencv_stitching
+  opencv_dnn
   ${OPENCV_MODULES_PUBLIC}
   ${OpenCV_LIB_COMPONENTS})
 ocv_check_dependencies(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS})
diff --git a/samples/cpp/stitching.cpp b/samples/cpp/stitching.cpp
index 5bf34f45b1..7de0536452 100644
--- a/samples/cpp/stitching.cpp
+++ b/samples/cpp/stitching.cpp
@@ -52,7 +52,7 @@ void printUsage(char** argv)
          "      for stitching materials under affine transformation, such as scans.\n"
          "  --output <result_img>\n"
          "      The default is 'result.jpg'.\n\n"
-         "Example usage :\n" << argv[0] << " --d3 --try_use_gpu yes --mode scans img1.jpg img2.jpg\n";
+         "Example usage :\n" << argv[0] << " --d3 --mode scans img1.jpg img2.jpg\n";
 }
 
 
diff --git a/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp b/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp
index 48b0c2e6e3..33e006269d 100644
--- a/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp
@@ -25,6 +25,7 @@ int const max_kernel_size = 21;
 void Erosion( int, void* );
 void Dilation( int, void* );
 
+//![main]
 /**
  * @function main
  */
@@ -70,6 +71,7 @@ int main( int argc, char** argv )
   waitKey(0);
   return 0;
 }
+//![main]
 
 //![erosion]
 /**
diff --git a/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp b/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp
new file mode 100644
index 0000000000..a6dc6dd75c
--- /dev/null
+++ b/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp
@@ -0,0 +1,195 @@
+#include <opencv2/videoio/videoio.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <list>
+#include <iostream>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#include <atomic>
+
+using namespace cv;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+
+// Stores frames along with their timestamps
+struct Frame
+{
+    int64 timestamp;
+    Mat frame;
+};
+
+int main()
+{
+    //! [Open streams]
+    // Open color stream
+    VideoCapture colorStream(CAP_V4L2);
+    // Open depth stream
+    VideoCapture depthStream(CAP_OPENNI2_ASTRA);
+    //! [Open streams]
+
+    // Check that stream has opened
+    if (!colorStream.isOpened())
+    {
+        cerr << "ERROR: Unable to open color stream" << endl;
+        return 1;
+    }
+
+    // Check that stream has opened
+    if (!depthStream.isOpened())
+    {
+        cerr << "ERROR: Unable to open depth stream" << endl;
+        return 1;
+    }
+
+    //! [Setup streams]
+    // Set color and depth stream parameters
+    colorStream.set(CAP_PROP_FRAME_WIDTH,  640);
+    colorStream.set(CAP_PROP_FRAME_HEIGHT, 480);
+    depthStream.set(CAP_PROP_FRAME_WIDTH,  640);
+    depthStream.set(CAP_PROP_FRAME_HEIGHT, 480);
+    depthStream.set(CAP_PROP_OPENNI2_MIRROR, 0);
+    //! [Setup streams]
+
+    // Print color stream parameters
+    cout << "Color stream: "
+         << colorStream.get(CAP_PROP_FRAME_WIDTH) << "x" << colorStream.get(CAP_PROP_FRAME_HEIGHT)
+         << " @" << colorStream.get(CAP_PROP_FPS) << " fps" << endl;
+
+    //! [Get properties]
+    // Print depth stream parameters
+    cout << "Depth stream: "
+         << depthStream.get(CAP_PROP_FRAME_WIDTH) << "x" << depthStream.get(CAP_PROP_FRAME_HEIGHT)
+         << " @" << depthStream.get(CAP_PROP_FPS) << " fps" << endl;
+    //! [Get properties]
+
+    //! [Read streams]
+    // Create two lists to store frames
+    std::list<Frame> depthFrames, colorFrames;
+    std::mutex depthFramesMtx, colorFramesMtx;
+    const std::size_t maxFrames = 64;
+
+    // Synchronization objects
+    std::mutex mtx;
+    std::condition_variable dataReady;
+    std::atomic<bool> isFinish;
+
+    isFinish = false;
+
+    // Start depth reading thread
+    std::thread depthReader([&]
+    {
+        while (!isFinish)
+        {
+            // Grab and decode new frame
+            if (depthStream.grab())
+            {
+                Frame f;
+                f.timestamp = cv::getTickCount();
+                depthStream.retrieve(f.frame, CAP_OPENNI_DEPTH_MAP);
+                //depthStream.retrieve(f.frame, CAP_OPENNI_DISPARITY_MAP);
+                //depthStream.retrieve(f.frame, CAP_OPENNI_IR_IMAGE);
+                if (f.frame.empty())
+                {
+                    cerr << "ERROR: Failed to decode frame from depth stream" << endl;
+                    break;
+                }
+
+                {
+                    std::lock_guard<std::mutex> lk(depthFramesMtx);
+                    if (depthFrames.size() >= maxFrames)
+                        depthFrames.pop_front();
+                    depthFrames.push_back(f);
+                }
+                dataReady.notify_one();
+            }
+        }
+    });
+
+    // Start color reading thread
+    std::thread colorReader([&]
+    {
+        while (!isFinish)
+        {
+            // Grab and decode new frame
+            if (colorStream.grab())
+            {
+                Frame f;
+                f.timestamp = cv::getTickCount();
+                colorStream.retrieve(f.frame);
+                if (f.frame.empty())
+                {
+                    cerr << "ERROR: Failed to decode frame from color stream" << endl;
+                    break;
+                }
+
+                {
+                    std::lock_guard<std::mutex> lk(colorFramesMtx);
+                    if (colorFrames.size() >= maxFrames)
+                        colorFrames.pop_front();
+                    colorFrames.push_back(f);
+                }
+                dataReady.notify_one();
+            }
+        }
+    });
+    //! [Read streams]
+
+    while (true)
+    {
+        std::unique_lock<std::mutex> lk(mtx);
+        while (depthFrames.empty() && colorFrames.empty())
+            dataReady.wait(lk);
+
+        depthFramesMtx.lock();
+        if (depthFrames.empty())
+        {
+            depthFramesMtx.unlock();
+        }
+        else
+        {
+            // Get a frame from the list
+            Mat depthMap = depthFrames.front().frame;
+            depthFrames.pop_front();
+            depthFramesMtx.unlock();
+
+            // Show depth frame
+            Mat d8, dColor;
+            depthMap.convertTo(d8, CV_8U, 255.0 / 2500);
+            applyColorMap(d8, dColor, COLORMAP_OCEAN);
+            imshow("Depth (colored)", dColor);
+        }
+
+        //! [Show color frame]
+        colorFramesMtx.lock();
+        if (colorFrames.empty())
+        {
+            colorFramesMtx.unlock();
+        }
+        else
+        {
+            // Get a frame from the list
+            Mat colorFrame = colorFrames.front().frame;
+            colorFrames.pop_front();
+            colorFramesMtx.unlock();
+
+            // Show color frame
+            imshow("Color", colorFrame);
+        }
+        //! [Show color frame]
+
+        // Exit on Esc key press
+        int key = waitKey(1);
+        if (key == 27) // ESC
+            break;
+    }
+
+    isFinish = true;
+    depthReader.join();
+    colorReader.join();
+
+    return 0;
+}
diff --git a/samples/cpp/videocapture_openni.cpp b/samples/cpp/videocapture_openni.cpp
index 0b67d92f61..5b4b23f19b 100644
--- a/samples/cpp/videocapture_openni.cpp
+++ b/samples/cpp/videocapture_openni.cpp
@@ -61,7 +61,7 @@ static void printCommandLineParams()
     cout << "-fmd=      Fixed max disparity? (0 or 1; 0 by default) Ignored if disparity map is not colorized (-cd 0)." << endl;
     cout << "-mode=     image mode: resolution and fps, supported three values:  0 - CAP_OPENNI_VGA_30HZ, 1 - CAP_OPENNI_SXGA_15HZ," << endl;
     cout << "          2 - CAP_OPENNI_SXGA_30HZ (0 by default). Ignored if rgb image or gray image are not selected to show." << endl;
-    cout << "-m=        Mask to set which output images are need. It is a string of size 5. Each element of this is '0' or '1' and" << endl;
+    cout << "-m=        Mask to set which output images are need. It is a string of size 6. Each element of this is '0' or '1' and" << endl;
     cout << "          determine: is depth map, disparity map, valid pixels mask, rgb image, gray image need or not (correspondently), ir image" << endl ;
     cout << "          By default -m=010100 i.e. disparity map and rgb image will be shown." << endl ;
     cout << "-r=        Filename of .oni video file. The data will grabbed from it." << endl ;
diff --git a/samples/dnn/classification.py b/samples/dnn/classification.py
index 5a2373d363..1c6908a2bc 100644
--- a/samples/dnn/classification.py
+++ b/samples/dnn/classification.py
@@ -5,7 +5,7 @@ import numpy as np
 from common import *
 
 backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL)
 
 parser = argparse.ArgumentParser(add_help=False)
 parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'),
@@ -25,7 +25,8 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU,
                          '%d: CPU target (by default), '
                          '%d: OpenCL, '
                          '%d: OpenCL fp16 (half-float precision), '
-                         '%d: VPU' % targets)
+                         '%d: NCS2 VPU, '
+                         '%d: HDDL VPU' % targets)
 args, _ = parser.parse_known_args()
 add_preproc_args(args.zoo, parser, 'classification')
 parser = argparse.ArgumentParser(parents=[parser],
diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py
index f84d2038e4..09371fe4a9 100644
--- a/samples/dnn/human_parsing.py
+++ b/samples/dnn/human_parsing.py
@@ -46,7 +46,7 @@ import cv2 as cv
 
 
 backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL)
 
 
 def preprocess(image):
@@ -168,7 +168,8 @@ if __name__ == '__main__':
                              '%d: CPU target (by default), '
                              '%d: OpenCL, '
                              '%d: OpenCL fp16 (half-float precision), '
-                             '%d: VPU' % targets)
+                             '%d: NCS2 VPU, '
+                             '%d: HDDL VPU' % targets)
     args, _ = parser.parse_known_args()
 
     if not os.path.isfile(args.model):
diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py
index babac0dbe8..ec8bf82866 100644
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@@ -15,7 +15,7 @@ from tf_text_graph_ssd import createSSDGraph
 from tf_text_graph_faster_rcnn import createFasterRCNNGraph
 
 backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL)
 
 parser = argparse.ArgumentParser(add_help=False)
 parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'),
@@ -41,7 +41,8 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU,
                          '%d: CPU target (by default), '
                          '%d: OpenCL, '
                          '%d: OpenCL fp16 (half-float precision), '
-                         '%d: VPU' % targets)
+                         '%d: NCS2 VPU, '
+                         '%d: HDDL VPU' % targets)
 parser.add_argument('--async', type=int, default=0,
                     dest='asyncN',
                     help='Number of asynchronous forwards at the same time. '
diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py
index 1a228c63aa..8eeb59ba14 100644
--- a/samples/dnn/segmentation.py
+++ b/samples/dnn/segmentation.py
@@ -6,7 +6,7 @@ import sys
 from common import *
 
 backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL)
 
 parser = argparse.ArgumentParser(add_help=False)
 parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'),
@@ -28,7 +28,8 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU,
                          '%d: CPU target (by default), '
                          '%d: OpenCL, '
                          '%d: OpenCL fp16 (half-float precision), '
-                         '%d: VPU' % targets)
+                         '%d: NCS2 VPU, '
+                         '%d: HDDL VPU' % targets)
 args, _ = parser.parse_known_args()
 add_preproc_args(args.zoo, parser, 'segmentation')
 parser = argparse.ArgumentParser(parents=[parser],
diff --git a/samples/dnn/siamrpnpp.py b/samples/dnn/siamrpnpp.py
index bb126b71e5..c7c49b1b85 100644
--- a/samples/dnn/siamrpnpp.py
+++ b/samples/dnn/siamrpnpp.py
@@ -234,10 +234,10 @@ class SiamRPNTracker:
         """
         Args:
             img(np.ndarray):    bgr based input image frame
-            bbox: (x,y,w,h):    bounding box
+            bbox: (x, y, w, h): bounding box
         """
-        x,y,h,w = bbox
-        self.center_pos = np.array([x + (h - 1) / 2, y + (w - 1) / 2])
+        x, y, w, h = bbox
+        self.center_pos = np.array([x + (w - 1) / 2, y + (h - 1) / 2])
         self.h = h
         self.w = w
         w_z = self.w + self.track_context_amount * np.add(h, w)
diff --git a/samples/dnn/virtual_try_on.py b/samples/dnn/virtual_try_on.py
index e4f2e518ec..d1cdd4e021 100644
--- a/samples/dnn/virtual_try_on.py
+++ b/samples/dnn/virtual_try_on.py
@@ -17,7 +17,7 @@ from common import findFile
 from human_parsing import parse_human
 
 backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL)
 
 parser = argparse.ArgumentParser(description='Use this script to run virtial try-on using CP-VTON',
                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@@ -39,7 +39,8 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU,
                             '%d: CPU target (by default), '
                             '%d: OpenCL, '
                             '%d: OpenCL fp16 (half-float precision), '
-                            '%d: VPU' % targets)
+                            '%d: NCS2 VPU, '
+                            '%d: HDDL VPU' % targets)
 args, _ = parser.parse_known_args()
 
 
diff --git a/samples/java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java b/samples/java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java
index 7a5f60f065..e71400f737 100644
--- a/samples/java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java
+++ b/samples/java/tutorial_code/ImgProc/erosion_dilatation/MorphologyDemo1.java
@@ -34,6 +34,7 @@ public class MorphologyDemo1 {
     private JFrame frame;
     private JLabel imgLabel;
 
+    //! [constructor]
     public MorphologyDemo1(String[] args) {
         String imagePath = args.length > 0 ? args[0] : "../data/LinuxLogo.jpg";
         matImgSrc = Imgcodecs.imread(imagePath);
@@ -54,7 +55,9 @@ public class MorphologyDemo1 {
         frame.pack();
         frame.setVisible(true);
     }
+    //! [constructor]
 
+    //! [components]
     private void addComponentsToPane(Container pane, Image img) {
         if (!(pane.getLayout() instanceof BorderLayout)) {
             pane.add(new JLabel("Container doesn't use BorderLayout!"));
@@ -114,21 +117,31 @@ public class MorphologyDemo1 {
         imgLabel = new JLabel(new ImageIcon(img));
         pane.add(imgLabel, BorderLayout.CENTER);
     }
+    //! [components]
 
+    //! [update]
     private void update() {
+        //! [kernel]
         Mat element = Imgproc.getStructuringElement(elementType, new Size(2 * kernelSize + 1, 2 * kernelSize + 1),
                 new Point(kernelSize, kernelSize));
+        //! [kernel]
 
         if (doErosion) {
+            //! [erosion]
             Imgproc.erode(matImgSrc, matImgDst, element);
+            //! [erosion]
         } else {
+            //! [dilation]
             Imgproc.dilate(matImgSrc, matImgDst, element);
+            //! [dilation]
         }
         Image img = HighGui.toBufferedImage(matImgDst);
         imgLabel.setIcon(new ImageIcon(img));
         frame.repaint();
     }
+    //! [update]
 
+    //! [main]
     public static void main(String[] args) {
         // Load the native OpenCV library
         System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
@@ -142,4 +155,5 @@ public class MorphologyDemo1 {
             }
         });
     }
+    //! [main]
 }
diff --git a/samples/python/stitching_detailed.py b/samples/python/stitching_detailed.py
index b0cf78a759..cd3f063e35 100644
--- a/samples/python/stitching_detailed.py
+++ b/samples/python/stitching_detailed.py
@@ -387,7 +387,7 @@ def main():
     focals = []
     for cam in cameras:
         focals.append(cam.focal)
-    sorted(focals)
+    focals.sort()
     if len(focals) % 2 == 1:
         warped_image_scale = focals[len(focals) // 2]
     else:
diff --git a/samples/python/tracker.py b/samples/python/tracker.py
new file mode 100644
index 0000000000..f67499cd15
--- /dev/null
+++ b/samples/python/tracker.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+'''
+Tracker demo
+
+USAGE:
+    tracker.py [<video_source>]
+'''
+
+# Python 2/3 compatibility
+from __future__ import print_function
+
+import sys
+
+import numpy as np
+import cv2 as cv
+
+from video import create_capture, presets
+
+class App(object):
+
+    def initializeTracker(self, image):
+        while True:
+            print('==> Select object ROI for tracker ...')
+            bbox = cv.selectROI('tracking', image)
+            print('ROI: {}'.format(bbox))
+
+            tracker = cv.TrackerMIL_create()
+            try:
+                tracker.init(image, bbox)
+            except Exception as e:
+                print('Unable to initialize tracker with requested bounding box. Is there any object?')
+                print(e)
+                print('Try again ...')
+                continue
+
+            return tracker
+
+    def run(self):
+        videoPath = sys.argv[1] if len(sys.argv) >= 2 else 'vtest.avi'
+        camera = create_capture(videoPath, presets['cube'])
+        if not camera.isOpened():
+            sys.exit("Can't open video stream: {}".format(videoPath))
+
+        ok, image = camera.read()
+        if not ok:
+            sys.exit("Can't read first frame")
+        assert image is not None
+
+        cv.namedWindow('tracking')
+        tracker = self.initializeTracker(image)
+
+        print("==> Tracking is started. Press 'SPACE' to re-initialize tracker or 'ESC' for exit...")
+
+        while camera.isOpened():
+            ok, image = camera.read()
+            if not ok:
+                print("Can't read frame")
+                break
+
+            ok, newbox = tracker.update(image)
+            #print(ok, newbox)
+
+            if ok:
+                cv.rectangle(image, newbox, (200,0,0))
+
+            cv.imshow("tracking", image)
+            k = cv.waitKey(1)
+            if k == 32:  # SPACE
+                tracker = self.initializeTracker(image)
+            if k == 27:  # ESC
+                break
+
+        print('Done')
+
+
+if __name__ == '__main__':
+    print(__doc__)
+    App().run()
+    cv.destroyAllWindows()
diff --git a/samples/python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py b/samples/python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py
index 502457b471..3645eab3d1 100644
--- a/samples/python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py
+++ b/samples/python/tutorial_code/imgProc/erosion_dilatation/morphology_1.py
@@ -3,61 +3,76 @@ import cv2 as cv
 import numpy as np
 import argparse
 
+src = None
 erosion_size = 0
 max_elem = 2
 max_kernel_size = 21
-title_trackbar_element_type = 'Element:\n 0: Rect \n 1: Cross \n 2: Ellipse'
+title_trackbar_element_shape = 'Element:\n 0: Rect \n 1: Cross \n 2: Ellipse'
 title_trackbar_kernel_size = 'Kernel size:\n 2n +1'
 title_erosion_window = 'Erosion Demo'
-title_dilatation_window = 'Dilation Demo'
+title_dilation_window = 'Dilation Demo'
 
+
+## [main]
+def main(image):
+    global src
+    src = cv.imread(cv.samples.findFile(image))
+    if src is None:
+        print('Could not open or find the image: ', image)
+        exit(0)
+
+    cv.namedWindow(title_erosion_window)
+    cv.createTrackbar(title_trackbar_element_shape, title_erosion_window, 0, max_elem, erosion)
+    cv.createTrackbar(title_trackbar_kernel_size, title_erosion_window, 0, max_kernel_size, erosion)
+
+    cv.namedWindow(title_dilation_window)
+    cv.createTrackbar(title_trackbar_element_shape, title_dilation_window, 0, max_elem, dilatation)
+    cv.createTrackbar(title_trackbar_kernel_size, title_dilation_window, 0, max_kernel_size, dilatation)
+
+    erosion(0)
+    dilatation(0)
+    cv.waitKey()
+## [main]
+
+# optional mapping of values with morphological shapes
+def morph_shape(val):
+    if val == 0:
+        return cv.MORPH_RECT
+    elif val == 1:
+        return cv.MORPH_CROSS
+    elif val == 2:
+        return cv.MORPH_ELLIPSE
+
+
+## [erosion]
 def erosion(val):
     erosion_size = cv.getTrackbarPos(title_trackbar_kernel_size, title_erosion_window)
-    erosion_type = 0
-    val_type = cv.getTrackbarPos(title_trackbar_element_type, title_erosion_window)
-    if val_type == 0:
-        erosion_type = cv.MORPH_RECT
-    elif val_type == 1:
-        erosion_type = cv.MORPH_CROSS
-    elif val_type == 2:
-        erosion_type = cv.MORPH_ELLIPSE
+    erosion_shape = morph_shape(cv.getTrackbarPos(title_trackbar_element_shape, title_erosion_window))
 
-    element = cv.getStructuringElement(erosion_type, (2*erosion_size + 1, 2*erosion_size+1), (erosion_size, erosion_size))
+    ## [kernel]
+    element = cv.getStructuringElement(erosion_shape, (2 * erosion_size + 1, 2 * erosion_size + 1),
+                                       (erosion_size, erosion_size))
+    ## [kernel]
     erosion_dst = cv.erode(src, element)
     cv.imshow(title_erosion_window, erosion_dst)
+## [erosion]
 
+
+## [dilation]
 def dilatation(val):
-    dilatation_size = cv.getTrackbarPos(title_trackbar_kernel_size, title_dilatation_window)
-    dilatation_type = 0
-    val_type = cv.getTrackbarPos(title_trackbar_element_type, title_dilatation_window)
-    if val_type == 0:
-        dilatation_type = cv.MORPH_RECT
-    elif val_type == 1:
-        dilatation_type = cv.MORPH_CROSS
-    elif val_type == 2:
-        dilatation_type = cv.MORPH_ELLIPSE
+    dilatation_size = cv.getTrackbarPos(title_trackbar_kernel_size, title_dilation_window)
+    dilation_shape = morph_shape(cv.getTrackbarPos(title_trackbar_element_shape, title_dilation_window))
 
-    element = cv.getStructuringElement(dilatation_type, (2*dilatation_size + 1, 2*dilatation_size+1), (dilatation_size, dilatation_size))
+    element = cv.getStructuringElement(dilation_shape, (2 * dilatation_size + 1, 2 * dilatation_size + 1),
+                                       (dilatation_size, dilatation_size))
     dilatation_dst = cv.dilate(src, element)
-    cv.imshow(title_dilatation_window, dilatation_dst)
+    cv.imshow(title_dilation_window, dilatation_dst)
+## [dilation]
 
-parser = argparse.ArgumentParser(description='Code for Eroding and Dilating tutorial.')
-parser.add_argument('--input', help='Path to input image.', default='LinuxLogo.jpg')
-args = parser.parse_args()
 
-src = cv.imread(cv.samples.findFile(args.input))
-if src is None:
-    print('Could not open or find the image: ', args.input)
-    exit(0)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Code for Eroding and Dilating tutorial.')
+    parser.add_argument('--input', help='Path to input image.', default='LinuxLogo.jpg')
+    args = parser.parse_args()
 
-cv.namedWindow(title_erosion_window)
-cv.createTrackbar(title_trackbar_element_type, title_erosion_window , 0, max_elem, erosion)
-cv.createTrackbar(title_trackbar_kernel_size, title_erosion_window , 0, max_kernel_size, erosion)
-
-cv.namedWindow(title_dilatation_window)
-cv.createTrackbar(title_trackbar_element_type, title_dilatation_window , 0, max_elem, dilatation)
-cv.createTrackbar(title_trackbar_kernel_size, title_dilatation_window , 0, max_kernel_size, dilatation)
-
-erosion(0)
-dilatation(0)
-cv.waitKey()
+    main(args.input)