diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 505fe77f89..309524886e 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -34,11 +34,11 @@ This is a template helping you to create an issue which can be processed as quic
  - [ ] I report the issue, it's not a question
    <!--
    OpenCV team works with forum.opencv.org, Stack Overflow and other communities
-   to discuss problems. Tickets with question without real issue statement will be
+   to discuss problems. Tickets with questions without a real issue statement will be
    closed.
    -->
  - [ ] I checked the problem with documentation, FAQ, open issues,
-       forum.opencv.org, Stack Overflow, etc and have not found solution
+       forum.opencv.org, Stack Overflow, etc and have not found any solution
    <!--
    Places to check:
    * OpenCV documentation: https://docs.opencv.org
@@ -47,11 +47,11 @@ This is a template helping you to create an issue which can be processed as quic
    * OpenCV issue tracker: https://github.com/opencv/opencv/issues?q=is%3Aissue
    * Stack Overflow branch: https://stackoverflow.com/questions/tagged/opencv
    -->
- - [ ] I updated to latest OpenCV version and the issue is still there
+ - [ ] I updated to the latest OpenCV version and the issue is still there
    <!--
    master branch for OpenCV 4.x and 3.4 branch for OpenCV 3.x releases.
-   OpenCV team supports only latest release for each branch.
-   The ticket is closed, if the problem is not reproduced with modern version.
+   OpenCV team supports only the latest release for each branch.
+   The ticket is closed if the problem is not reproduced with the modern version.
    -->
  - [ ] There is reproducer code and related data files: videos, images, onnx, etc
    <!--
@@ -61,9 +61,9 @@ This is a template helping you to create an issue which can be processed as quic
      to reduce attachment size
    * Use PNG for images, if you report some CV related bug, but not image reader
      issue
-   * Attach the image as archive to the ticket, if you report some reader issue.
+   * Attach the image as an archive to the ticket, if you report some reader issue.
      Image hosting services compress images and it breaks the repro code.
-   * Provide ONNX file for some public model or ONNX file with with random weights,
+   * Provide ONNX file for some public model or ONNX file with random weights,
      if you report ONNX parsing or handling issue. Architecture details diagram
      from netron tool can be very useful too. See https://lutzroeder.github.io/netron/
    -->
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 0454d1aed8..5e2e911cc8 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -3,9 +3,9 @@
 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
 
 - [ ] I agree to contribute to the project under Apache 2 License.
-- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or other license that is incompatible with OpenCV
-- [ ] The PR is proposed to proper branch
-- [ ] There is reference to original bug report and related work
+- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
+- [ ] The PR is proposed to the proper branch
+- [ ] There is a reference to the original bug report and related work
 - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
       Patch to opencv_extra has the same branch name.
 - [ ] The feature is well documented and sample code can be built with the project CMake
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6c8c1a0172..8ec7b65030 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -244,7 +244,7 @@ OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O (iOS/Mac)" ON
 OCV_OPTION(WITH_CAP_IOS "Enable iOS video capture" ON
   VISIBLE_IF IOS
   VERIFY HAVE_CAP_IOS)
-OCV_OPTION(WITH_CAROTENE "Use NVidia carotene acceleration library for ARM platform" ON
+OCV_OPTION(WITH_CAROTENE "Use NVidia carotene acceleration library for ARM platform" (NOT CV_DISABLE_OPTIMIZATION)
   VISIBLE_IF (ARM OR AARCH64) AND NOT IOS)
 OCV_OPTION(WITH_CPUFEATURES "Use cpufeatures Android library" ON
   VISIBLE_IF ANDROID
@@ -291,12 +291,10 @@ OCV_OPTION(WITH_HALIDE "Include Halide support" OFF
 OCV_OPTION(WITH_VULKAN "Include Vulkan support" OFF
   VISIBLE_IF TRUE
   VERIFY HAVE_VULKAN)
-OCV_OPTION(WITH_INF_ENGINE "Include Intel Inference Engine support" OFF
+# replacement for deprecated options: WITH_INF_ENGINE, WITH_NGRAPH
+OCV_OPTION(WITH_OPENVINO "Include Intel OpenVINO toolkit support" (WITH_INF_ENGINE)
   VISIBLE_IF TRUE
-  VERIFY INF_ENGINE_TARGET)
-OCV_OPTION(WITH_NGRAPH "Include nGraph support" WITH_INF_ENGINE
-  VISIBLE_IF TRUE
-  VERIFY TARGET ngraph::ngraph)
+  VERIFY TARGET ocv.3rdparty.openvino)
 OCV_OPTION(WITH_WEBNN "Include WebNN support" OFF
   VISIBLE_IF TRUE
   VERIFY HAVE_WEBNN)
@@ -809,7 +807,7 @@ if(WITH_WEBNN)
 endif()
 
 # --- Inference Engine ---
-if(WITH_INF_ENGINE)
+if(WITH_INF_ENGINE OR WITH_OPENVINO)
   include(cmake/OpenCVDetectInferenceEngine.cmake)
 endif()
 
@@ -1548,55 +1546,61 @@ if(WITH_HALIDE OR HAVE_HALIDE)
   status("    Halide:"     HAVE_HALIDE      THEN "YES (${HALIDE_LIBRARIES} ${HALIDE_INCLUDE_DIRS})" ELSE NO)
 endif()
 
-if(WITH_INF_ENGINE OR INF_ENGINE_TARGET)
-  if(INF_ENGINE_TARGET)
-    list(GET INF_ENGINE_TARGET 0 ie_target)
-    set(__msg "YES (${INF_ENGINE_RELEASE} / ${INF_ENGINE_VERSION})")
-    ocv_get_imported_target(ie_target "${ie_target}")
-    get_target_property(_lib ${ie_target} IMPORTED_LOCATION)
-    get_target_property(_lib_imp_rel ${ie_target} IMPORTED_IMPLIB_RELEASE)
-    get_target_property(_lib_imp_dbg ${ie_target} IMPORTED_IMPLIB_DEBUG)
-    get_target_property(_lib_rel ${ie_target} IMPORTED_LOCATION_RELEASE)
-    get_target_property(_lib_dbg ${ie_target} IMPORTED_LOCATION_DEBUG)
-    ocv_build_features_string(_lib
-      IF _lib THEN "${_lib}"
-      IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}"
-      IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}"
-      IF _lib_rel  THEN "${_lib_rel}"
-      IF _lib_dbg  THEN "${_lib_dbg}"
-      ELSE "unknown"
-    )
-    get_target_property(_inc ${ie_target} INTERFACE_INCLUDE_DIRECTORIES)
-    status("    Inference Engine:" "${__msg}")
-    status("        * libs:" "${_lib}")
-    status("        * includes:" "${_inc}")
-  else()
-    status("    Inference Engine:"     "NO")
+if(HAVE_OPENVINO
+    OR (WITH_OPENVINO AND NOT WITH_INF_ENGINE AND NOT INF_ENGINE_TARGET)
+)
+  status("    OpenVINO:" TARGET openvino::runtime THEN "YES (${OpenVINO_VERSION})" ELSE "NO")
+else()
+  if(WITH_INF_ENGINE OR INF_ENGINE_TARGET)
+    if(INF_ENGINE_TARGET)
+      list(GET INF_ENGINE_TARGET 0 ie_target)
+      set(__msg "YES (${INF_ENGINE_RELEASE} / ${INF_ENGINE_VERSION})")
+      ocv_get_imported_target(ie_target "${ie_target}")
+      get_target_property(_lib ${ie_target} IMPORTED_LOCATION)
+      get_target_property(_lib_imp_rel ${ie_target} IMPORTED_IMPLIB_RELEASE)
+      get_target_property(_lib_imp_dbg ${ie_target} IMPORTED_IMPLIB_DEBUG)
+      get_target_property(_lib_rel ${ie_target} IMPORTED_LOCATION_RELEASE)
+      get_target_property(_lib_dbg ${ie_target} IMPORTED_LOCATION_DEBUG)
+      ocv_build_features_string(_lib
+        IF _lib THEN "${_lib}"
+        IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}"
+        IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}"
+        IF _lib_rel  THEN "${_lib_rel}"
+        IF _lib_dbg  THEN "${_lib_dbg}"
+        ELSE "unknown"
+      )
+      get_target_property(_inc ${ie_target} INTERFACE_INCLUDE_DIRECTORIES)
+      status("    Inference Engine:" "${__msg}")
+      status("        * libs:" "${_lib}")
+      status("        * includes:" "${_inc}")
+    else()
+      status("    Inference Engine:"     "NO")
+    endif()
   endif()
-endif()
-if(WITH_NGRAPH OR HAVE_NGRAPH)
-  if(HAVE_NGRAPH)
-    ocv_get_imported_target(__target ngraph::ngraph)
-    set(__msg "YES (${ngraph_VERSION})")
-    get_target_property(_lib ${__target} IMPORTED_LOCATION)
-    get_target_property(_lib_imp_rel ${__target} IMPORTED_IMPLIB_RELEASE)
-    get_target_property(_lib_imp_dbg ${__target} IMPORTED_IMPLIB_DEBUG)
-    get_target_property(_lib_rel ${__target} IMPORTED_LOCATION_RELEASE)
-    get_target_property(_lib_dbg ${__target} IMPORTED_LOCATION_DEBUG)
-    ocv_build_features_string(_lib
-      IF _lib THEN "${_lib}"
-      IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}"
-      IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}"
-      IF _lib_rel  THEN "${_lib_rel}"
-      IF _lib_dbg  THEN "${_lib_dbg}"
-      ELSE "unknown"
-    )
-    get_target_property(_inc ${__target} INTERFACE_INCLUDE_DIRECTORIES)
-    status("    nGraph:" "${__msg}")
-    status("        * libs:" "${_lib}")
-    status("        * includes:" "${_inc}")
-  else()
-    status("    nGraph:"     "NO")
+  if(WITH_NGRAPH OR HAVE_NGRAPH)
+    if(HAVE_NGRAPH)
+      ocv_get_imported_target(__target ngraph::ngraph)
+      set(__msg "YES (${ngraph_VERSION})")
+      get_target_property(_lib ${__target} IMPORTED_LOCATION)
+      get_target_property(_lib_imp_rel ${__target} IMPORTED_IMPLIB_RELEASE)
+      get_target_property(_lib_imp_dbg ${__target} IMPORTED_IMPLIB_DEBUG)
+      get_target_property(_lib_rel ${__target} IMPORTED_LOCATION_RELEASE)
+      get_target_property(_lib_dbg ${__target} IMPORTED_LOCATION_DEBUG)
+      ocv_build_features_string(_lib
+        IF _lib THEN "${_lib}"
+        IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}"
+        IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}"
+        IF _lib_rel  THEN "${_lib_rel}"
+        IF _lib_dbg  THEN "${_lib_dbg}"
+        ELSE "unknown"
+      )
+      get_target_property(_inc ${__target} INTERFACE_INCLUDE_DIRECTORIES)
+      status("    nGraph:" "${__msg}")
+      status("        * libs:" "${_lib}")
+      status("        * includes:" "${_inc}")
+    else()
+      status("    nGraph:"     "NO")
+    endif()
   endif()
 endif()
 
diff --git a/apps/opencv_stitching_tool/opencv_stitching/blender.py b/apps/opencv_stitching_tool/opencv_stitching/blender.py
index 2b6e74a810..5ee2a717f1 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/blender.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/blender.py
@@ -26,8 +26,8 @@ class Blender:
 
         elif self.blender_type == "multiband":
             self.blender = cv.detail_MultiBandBlender()
-            self.blender.setNumBands((np.log(blend_width) /
-                                      np.log(2.) - 1.).astype(np.int))
+            self.blender.setNumBands(int((np.log(blend_width) /
+                                          np.log(2.) - 1.)))
 
         elif self.blender_type == "feather":
             self.blender = cv.detail_FeatherBlender()
@@ -45,4 +45,12 @@ class Blender:
         result_mask = None
         result, result_mask = self.blender.blend(result, result_mask)
         result = cv.convertScaleAbs(result)
-        return result
+        return result, result_mask
+
+    @classmethod
+    def create_panorama(cls, imgs, masks, corners, sizes):
+        blender = cls("no")
+        blender.prepare(corners, sizes)
+        for img, mask, corner in zip(imgs, masks, corners):
+            blender.feed(img, mask, corner)
+        return blender.blend()
diff --git a/apps/opencv_stitching_tool/opencv_stitching/cropper.py b/apps/opencv_stitching_tool/opencv_stitching/cropper.py
new file mode 100644
index 0000000000..243a6dc7b0
--- /dev/null
+++ b/apps/opencv_stitching_tool/opencv_stitching/cropper.py
@@ -0,0 +1,149 @@
+from collections import namedtuple
+import cv2 as cv
+
+from .blender import Blender
+from .stitching_error import StitchingError
+
+
+class Rectangle(namedtuple('Rectangle', 'x y width height')):
+    __slots__ = ()
+
+    @property
+    def area(self):
+        return self.width * self.height
+
+    @property
+    def corner(self):
+        return (self.x, self.y)
+
+    @property
+    def size(self):
+        return (self.width, self.height)
+
+    @property
+    def x2(self):
+        return self.x + self.width
+
+    @property
+    def y2(self):
+        return self.y + self.height
+
+    def times(self, x):
+        return Rectangle(*(int(round(i*x)) for i in self))
+
+    def draw_on(self, img, color=(0, 0, 255), size=1):
+        if len(img.shape) == 2:
+            img = cv.cvtColor(img, cv.COLOR_GRAY2RGB)
+        start_point = (self.x, self.y)
+        end_point = (self.x2-1, self.y2-1)
+        cv.rectangle(img, start_point, end_point, color, size)
+        return img
+
+
+class Cropper:
+
+    DEFAULT_CROP = False
+
+    def __init__(self, crop=DEFAULT_CROP):
+        self.do_crop = crop
+        self.overlapping_rectangles = []
+        self.cropping_rectangles = []
+
+    def prepare(self, imgs, masks, corners, sizes):
+        if self.do_crop:
+            mask = self.estimate_panorama_mask(imgs, masks, corners, sizes)
+            self.compile_numba_functionality()
+            lir = self.estimate_largest_interior_rectangle(mask)
+            corners = self.get_zero_center_corners(corners)
+            rectangles = self.get_rectangles(corners, sizes)
+            self.overlapping_rectangles = self.get_overlaps(
+                rectangles, lir)
+            self.intersection_rectangles = self.get_intersections(
+                rectangles, self.overlapping_rectangles)
+
+    def crop_images(self, imgs, aspect=1):
+        for idx, img in enumerate(imgs):
+            yield self.crop_img(img, idx, aspect)
+
+    def crop_img(self, img, idx, aspect=1):
+        if self.do_crop:
+            intersection_rect = self.intersection_rectangles[idx]
+            scaled_intersection_rect = intersection_rect.times(aspect)
+            cropped_img = self.crop_rectangle(img, scaled_intersection_rect)
+            return cropped_img
+        return img
+
+    def crop_rois(self, corners, sizes, aspect=1):
+        if self.do_crop:
+            scaled_overlaps = \
+                [r.times(aspect) for r in self.overlapping_rectangles]
+            cropped_corners = [r.corner for r in scaled_overlaps]
+            cropped_corners = self.get_zero_center_corners(cropped_corners)
+            cropped_sizes = [r.size for r in scaled_overlaps]
+            return cropped_corners, cropped_sizes
+        return corners, sizes
+
+    @staticmethod
+    def estimate_panorama_mask(imgs, masks, corners, sizes):
+        _, mask = Blender.create_panorama(imgs, masks, corners, sizes)
+        return mask
+
+    def compile_numba_functionality(self):
+        # numba functionality is only imported if cropping
+        # is explicitely desired
+        try:
+            import numba
+        except ModuleNotFoundError:
+            raise StitchingError("Numba is needed for cropping but not installed")
+        from .largest_interior_rectangle import largest_interior_rectangle
+        self.largest_interior_rectangle = largest_interior_rectangle
+
+    def estimate_largest_interior_rectangle(self, mask):
+        lir = self.largest_interior_rectangle(mask)
+        lir = Rectangle(*lir)
+        return lir
+
+    @staticmethod
+    def get_zero_center_corners(corners):
+        min_corner_x = min([corner[0] for corner in corners])
+        min_corner_y = min([corner[1] for corner in corners])
+        return [(x - min_corner_x, y - min_corner_y) for x, y in corners]
+
+    @staticmethod
+    def get_rectangles(corners, sizes):
+        rectangles = []
+        for corner, size in zip(corners, sizes):
+            rectangle = Rectangle(*corner, *size)
+            rectangles.append(rectangle)
+        return rectangles
+
+    @staticmethod
+    def get_overlaps(rectangles, lir):
+        return [Cropper.get_overlap(r, lir) for r in rectangles]
+
+    @staticmethod
+    def get_overlap(rectangle1, rectangle2):
+        x1 = max(rectangle1.x, rectangle2.x)
+        y1 = max(rectangle1.y, rectangle2.y)
+        x2 = min(rectangle1.x2, rectangle2.x2)
+        y2 = min(rectangle1.y2, rectangle2.y2)
+        if x2 < x1 or y2 < y1:
+            raise StitchingError("Rectangles do not overlap!")
+        return Rectangle(x1, y1, x2-x1, y2-y1)
+
+    @staticmethod
+    def get_intersections(rectangles, overlapping_rectangles):
+        return [Cropper.get_intersection(r, overlap_r) for r, overlap_r
+                in zip(rectangles, overlapping_rectangles)]
+
+    @staticmethod
+    def get_intersection(rectangle, overlapping_rectangle):
+        x = abs(overlapping_rectangle.x - rectangle.x)
+        y = abs(overlapping_rectangle.y - rectangle.y)
+        width = overlapping_rectangle.width
+        height = overlapping_rectangle.height
+        return Rectangle(x, y, width, height)
+
+    @staticmethod
+    def crop_rectangle(img, rectangle):
+        return img[rectangle.y:rectangle.y2, rectangle.x:rectangle.x2]
diff --git a/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py b/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py
index 2b1d5e6461..2a3fc8c434 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py
@@ -19,10 +19,10 @@ class FeatureMatcher:
             self.matcher = cv.detail_AffineBestOf2NearestMatcher(**kwargs)
         elif range_width == -1:
             """https://docs.opencv.org/5.x/d4/d26/classcv_1_1detail_1_1BestOf2NearestMatcher.html"""  # noqa
-            self.matcher = cv.detail.BestOf2NearestMatcher_create(**kwargs)
+            self.matcher = cv.detail_BestOf2NearestMatcher(**kwargs)
         else:
             """https://docs.opencv.org/5.x/d8/d72/classcv_1_1detail_1_1BestOf2NearestRangeMatcher.html"""  # noqa
-            self.matcher = cv.detail.BestOf2NearestRangeMatcher_create(
+            self.matcher = cv.detail_BestOf2NearestRangeMatcher(
                 range_width, **kwargs
                 )
 
diff --git a/apps/opencv_stitching_tool/opencv_stitching/image_handler.py b/apps/opencv_stitching_tool/opencv_stitching/image_handler.py
index a3b76b288a..3be9ff4817 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/image_handler.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/image_handler.py
@@ -1,6 +1,6 @@
 import cv2 as cv
 
-from .megapix_downscaler import MegapixDownscaler
+from .megapix_scaler import MegapixDownscaler
 from .stitching_error import StitchingError
 
 class ImageHandler:
@@ -35,7 +35,7 @@ class ImageHandler:
 
     def resize_to_low_resolution(self, medium_imgs=None):
         if medium_imgs and self.scales_set:
-            return self.resize_medium_to_low(medium_imgs)
+            return self.resize_imgs_by_scaler(medium_imgs, self.low_scaler)
         return self.read_and_resize_imgs(self.low_scaler)
 
     def resize_to_final_resolution(self):
@@ -45,9 +45,9 @@ class ImageHandler:
         for img, size in self.input_images():
             yield self.resize_img_by_scaler(scaler, size, img)
 
-    def resize_medium_to_low(self, medium_imgs):
+    def resize_imgs_by_scaler(self, medium_imgs, scaler):
         for img, size in zip(medium_imgs, self.img_sizes):
-            yield self.resize_img_by_scaler(self.low_scaler, size, img)
+            yield self.resize_img_by_scaler(scaler, size, img)
 
     @staticmethod
     def resize_img_by_scaler(scaler, size, img):
@@ -92,3 +92,14 @@ class ImageHandler:
 
     def get_final_to_low_ratio(self):
         return self.low_scaler.scale / self.final_scaler.scale
+
+    def get_low_to_final_ratio(self):
+        return self.final_scaler.scale / self.low_scaler.scale
+
+    def get_final_img_sizes(self):
+        return [self.final_scaler.get_scaled_img_size(sz)
+                for sz in self.img_sizes]
+
+    def get_low_img_sizes(self):
+        return [self.low_scaler.get_scaled_img_size(sz)
+                for sz in self.img_sizes]
diff --git a/apps/opencv_stitching_tool/opencv_stitching/largest_interior_rectangle.py b/apps/opencv_stitching_tool/opencv_stitching/largest_interior_rectangle.py
new file mode 100644
index 0000000000..5f0a82f7b9
--- /dev/null
+++ b/apps/opencv_stitching_tool/opencv_stitching/largest_interior_rectangle.py
@@ -0,0 +1,303 @@
+import numpy as np
+import numba as nb
+import cv2 as cv
+
+from .stitching_error import StitchingError
+
+
+def largest_interior_rectangle(cells):
+    outline = get_outline(cells)
+    adjacencies = adjacencies_all_directions(cells)
+    s_map, _, saddle_candidates_map = create_maps(outline, adjacencies)
+    lir1 = biggest_span_in_span_map(s_map)
+
+    candidate_cells = cells_of_interest(saddle_candidates_map)
+    s_map = span_map(adjacencies[0], adjacencies[2], candidate_cells)
+    lir2 = biggest_span_in_span_map(s_map)
+
+    lir = biggest_rectangle(lir1, lir2)
+    return lir
+
+
+def get_outline(cells):
+    contours, hierarchy = \
+        cv.findContours(cells, cv.RETR_TREE, cv.CHAIN_APPROX_NONE)
+    # TODO support multiple contours
+    # test that only one regular contour exists
+    if not hierarchy.shape == (1, 1, 4) or not np.all(hierarchy == -1):
+        raise StitchingError("Invalid Contour. Try without cropping.")
+    contour = contours[0][:, 0, :]
+    x_values = contour[:, 0].astype("uint32", order="C")
+    y_values = contour[:, 1].astype("uint32", order="C")
+    return x_values, y_values
+
+
+@nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True)
+def horizontal_adjacency(cells, direction):
+    result = np.zeros(cells.shape, dtype=np.uint32)
+    for y in nb.prange(cells.shape[0]):
+        span = 0
+        if direction:
+            iterator = range(cells.shape[1]-1, -1, -1)
+        else:
+            iterator = range(cells.shape[1])
+        for x in iterator:
+            if cells[y, x] > 0:
+                span += 1
+            else:
+                span = 0
+            result[y, x] = span
+    return result
+
+
+@nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True)
+def vertical_adjacency(cells, direction):
+    result = np.zeros(cells.shape, dtype=np.uint32)
+    for x in nb.prange(cells.shape[1]):
+        span = 0
+        if direction:
+            iterator = range(cells.shape[0]-1, -1, -1)
+        else:
+            iterator = range(cells.shape[0])
+        for y in iterator:
+            if cells[y, x] > 0:
+                span += 1
+            else:
+                span = 0
+            result[y, x] = span
+    return result
+
+
+@nb.njit(cache=True)
+def adjacencies_all_directions(cells):
+    h_left2right = horizontal_adjacency(cells, 1)
+    h_right2left = horizontal_adjacency(cells, 0)
+    v_top2bottom = vertical_adjacency(cells, 1)
+    v_bottom2top = vertical_adjacency(cells, 0)
+    return h_left2right, h_right2left, v_top2bottom, v_bottom2top
+
+
+@nb.njit('uint32(uint32[:])', cache=True)
+def predict_vector_size(array):
+    zero_indices = np.where(array == 0)[0]
+    if len(zero_indices) == 0:
+        if len(array) == 0:
+            return 0
+        return len(array)
+    return zero_indices[0]
+
+
+@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
+def h_vector_top2bottom(h_adjacency, x, y):
+    vector_size = predict_vector_size(h_adjacency[y:, x])
+    h_vector = np.zeros(vector_size, dtype=np.uint32)
+    h = np.Inf
+    for p in range(vector_size):
+        h = np.minimum(h_adjacency[y+p, x], h)
+        h_vector[p] = h
+    h_vector = np.unique(h_vector)[::-1]
+    return h_vector
+
+
+@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
+def h_vector_bottom2top(h_adjacency, x, y):
+    vector_size = predict_vector_size(np.flip(h_adjacency[:y+1, x]))
+    h_vector = np.zeros(vector_size, dtype=np.uint32)
+    h = np.Inf
+    for p in range(vector_size):
+        h = np.minimum(h_adjacency[y-p, x], h)
+        h_vector[p] = h
+    h_vector = np.unique(h_vector)[::-1]
+    return h_vector
+
+
+@nb.njit(cache=True)
+def h_vectors_all_directions(h_left2right, h_right2left, x, y):
+    h_l2r_t2b = h_vector_top2bottom(h_left2right, x, y)
+    h_r2l_t2b = h_vector_top2bottom(h_right2left, x, y)
+    h_l2r_b2t = h_vector_bottom2top(h_left2right, x, y)
+    h_r2l_b2t = h_vector_bottom2top(h_right2left, x, y)
+    return h_l2r_t2b, h_r2l_t2b, h_l2r_b2t, h_r2l_b2t
+
+
+@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
+def v_vector_left2right(v_adjacency, x, y):
+    vector_size = predict_vector_size(v_adjacency[y, x:])
+    v_vector = np.zeros(vector_size, dtype=np.uint32)
+    v = np.Inf
+    for q in range(vector_size):
+        v = np.minimum(v_adjacency[y, x+q], v)
+        v_vector[q] = v
+    v_vector = np.unique(v_vector)[::-1]
+    return v_vector
+
+
+@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
+def v_vector_right2left(v_adjacency, x, y):
+    vector_size = predict_vector_size(np.flip(v_adjacency[y, :x+1]))
+    v_vector = np.zeros(vector_size, dtype=np.uint32)
+    v = np.Inf
+    for q in range(vector_size):
+        v = np.minimum(v_adjacency[y, x-q], v)
+        v_vector[q] = v
+    v_vector = np.unique(v_vector)[::-1]
+    return v_vector
+
+
+@nb.njit(cache=True)
+def v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y):
+    v_l2r_t2b = v_vector_left2right(v_top2bottom, x, y)
+    v_r2l_t2b = v_vector_right2left(v_top2bottom, x, y)
+    v_l2r_b2t = v_vector_left2right(v_bottom2top, x, y)
+    v_r2l_b2t = v_vector_right2left(v_bottom2top, x, y)
+    return v_l2r_t2b, v_r2l_t2b, v_l2r_b2t, v_r2l_b2t
+
+
+@nb.njit('uint32[:,:](uint32[:], uint32[:])', cache=True)
+def spans(h_vector, v_vector):
+    spans = np.stack((h_vector, v_vector[::-1]), axis=1)
+    return spans
+
+
+@nb.njit('uint32[:](uint32[:,:])', cache=True)
+def biggest_span(spans):
+    if len(spans) == 0:
+        return np.array([0, 0], dtype=np.uint32)
+    areas = spans[:, 0] * spans[:, 1]
+    biggest_span_index = np.where(areas == np.amax(areas))[0][0]
+    return spans[biggest_span_index]
+
+
+@nb.njit(cache=True)
+def spans_all_directions(h_vectors, v_vectors):
+    span_l2r_t2b = spans(h_vectors[0], v_vectors[0])
+    span_r2l_t2b = spans(h_vectors[1], v_vectors[1])
+    span_l2r_b2t = spans(h_vectors[2], v_vectors[2])
+    span_r2l_b2t = spans(h_vectors[3], v_vectors[3])
+    return span_l2r_t2b, span_r2l_t2b, span_l2r_b2t, span_r2l_b2t
+
+
+@nb.njit(cache=True)
+def get_n_directions(spans_all_directions):
+    n_directions = 1
+    for spans in spans_all_directions:
+        all_x_1 = np.all(spans[:, 0] == 1)
+        all_y_1 = np.all(spans[:, 1] == 1)
+        if not all_x_1 and not all_y_1:
+            n_directions += 1
+    return n_directions
+
+
+@nb.njit(cache=True)
+def get_xy_array(x, y, spans, mode=0):
+    """0 - flip none, 1 - flip x, 2 - flip y, 3 - flip both"""
+    xy = spans.copy()
+    xy[:, 0] = x
+    xy[:, 1] = y
+    if mode == 1:
+        xy[:, 0] = xy[:, 0] - spans[:, 0] + 1
+    if mode == 2:
+        xy[:, 1] = xy[:, 1] - spans[:, 1] + 1
+    if mode == 3:
+        xy[:, 0] = xy[:, 0] - spans[:, 0] + 1
+        xy[:, 1] = xy[:, 1] - spans[:, 1] + 1
+    return xy
+
+
+@nb.njit(cache=True)
+def get_xy_arrays(x, y, spans_all_directions):
+    xy_l2r_t2b = get_xy_array(x, y, spans_all_directions[0], 0)
+    xy_r2l_t2b = get_xy_array(x, y, spans_all_directions[1], 1)
+    xy_l2r_b2t = get_xy_array(x, y, spans_all_directions[2], 2)
+    xy_r2l_b2t = get_xy_array(x, y, spans_all_directions[3], 3)
+    return xy_l2r_t2b, xy_r2l_t2b, xy_l2r_b2t, xy_r2l_b2t
+
+
+@nb.njit(cache=True)
+def point_on_outline(x, y, outline):
+    x_vals, y_vals = outline
+    x_true = x_vals == x
+    y_true = y_vals == y
+    both_true = np.logical_and(x_true, y_true)
+    return np.any(both_true)
+
+
+@nb.njit('Tuple((uint32[:,:,::1], uint8[:,::1], uint8[:,::1]))'
+         '(UniTuple(uint32[:], 2), UniTuple(uint32[:,::1], 4))',
+         parallel=True, cache=True)
+def create_maps(outline, adjacencies):
+    x_values, y_values = outline
+    h_left2right, h_right2left, v_top2bottom, v_bottom2top = adjacencies
+
+    shape = h_left2right.shape
+    span_map = np.zeros(shape + (2,), "uint32")
+    direction_map = np.zeros(shape, "uint8")
+    saddle_candidates_map = np.zeros(shape, "uint8")
+
+    for idx in nb.prange(len(x_values)):
+        x, y = x_values[idx], y_values[idx]
+        h_vectors = h_vectors_all_directions(h_left2right, h_right2left, x, y)
+        v_vectors = v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y)
+        span_arrays = spans_all_directions(h_vectors, v_vectors)
+        n = get_n_directions(span_arrays)
+        direction_map[y, x] = n
+        xy_arrays = get_xy_arrays(x, y, span_arrays)
+        for direction_idx in range(4):
+            xy_array = xy_arrays[direction_idx]
+            span_array = span_arrays[direction_idx]
+            for span_idx in range(span_array.shape[0]):
+                x, y = xy_array[span_idx][0], xy_array[span_idx][1]
+                w, h = span_array[span_idx][0], span_array[span_idx][1]
+                if w*h > span_map[y, x, 0] * span_map[y, x, 1]:
+                    span_map[y, x, :] = np.array([w, h], "uint32")
+                if n == 3 and not point_on_outline(x, y, outline):
+                    saddle_candidates_map[y, x] = np.uint8(255)
+
+    return span_map, direction_map, saddle_candidates_map
+
+
+def cells_of_interest(cells):
+    y_vals, x_vals = cells.nonzero()
+    x_vals = x_vals.astype("uint32", order="C")
+    y_vals = y_vals.astype("uint32", order="C")
+    return x_vals, y_vals
+
+
+@nb.njit('uint32[:, :, :]'
+         '(uint32[:,::1], uint32[:,::1], UniTuple(uint32[:], 2))',
+         parallel=True, cache=True)
+def span_map(h_adjacency_left2right,
+             v_adjacency_top2bottom,
+             cells_of_interest):
+
+    x_values, y_values = cells_of_interest
+
+    span_map = np.zeros(h_adjacency_left2right.shape + (2,), dtype=np.uint32)
+
+    for idx in nb.prange(len(x_values)):
+        x, y = x_values[idx], y_values[idx]
+        h_vector = h_vector_top2bottom(h_adjacency_left2right, x, y)
+        v_vector = v_vector_left2right(v_adjacency_top2bottom, x, y)
+        s = spans(h_vector, v_vector)
+        s = biggest_span(s)
+        span_map[y, x, :] = s
+
+    return span_map
+
+
+@nb.njit('uint32[:](uint32[:, :, :])', cache=True)
+def biggest_span_in_span_map(span_map):
+    areas = span_map[:, :, 0] * span_map[:, :, 1]
+    largest_rectangle_indices = np.where(areas == np.amax(areas))
+    x = largest_rectangle_indices[1][0]
+    y = largest_rectangle_indices[0][0]
+    span = span_map[y, x]
+    return np.array([x, y, span[0], span[1]], dtype=np.uint32)
+
+
+def biggest_rectangle(*args):
+    biggest_rect = np.array([0, 0, 0, 0], dtype=np.uint32)
+    for rect in args:
+        if rect[2] * rect[3] > biggest_rect[2] * biggest_rect[3]:
+            biggest_rect = rect
+    return biggest_rect
diff --git a/apps/opencv_stitching_tool/opencv_stitching/megapix_downscaler.py b/apps/opencv_stitching_tool/opencv_stitching/megapix_downscaler.py
deleted file mode 100644
index f7553acc2e..0000000000
--- a/apps/opencv_stitching_tool/opencv_stitching/megapix_downscaler.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from .megapix_scaler import MegapixScaler
-
-
-class MegapixDownscaler(MegapixScaler):
-
-    @staticmethod
-    def force_downscale(scale):
-        return min(1.0, scale)
-
-    def set_scale(self, scale):
-        scale = self.force_downscale(scale)
-        super().set_scale(scale)
diff --git a/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py b/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py
index 96d47536f9..a7be8ad3dc 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py
@@ -25,3 +25,14 @@ class MegapixScaler:
         width = int(round(img_size[0] * self.scale))
         height = int(round(img_size[1] * self.scale))
         return (width, height)
+
+
+class MegapixDownscaler(MegapixScaler):
+
+    @staticmethod
+    def force_downscale(scale):
+        return min(1.0, scale)
+
+    def set_scale(self, scale):
+        scale = self.force_downscale(scale)
+        super().set_scale(scale)
diff --git a/apps/opencv_stitching_tool/opencv_stitching/panorama_estimation.py b/apps/opencv_stitching_tool/opencv_stitching/panorama_estimation.py
deleted file mode 100644
index e3a45773ea..0000000000
--- a/apps/opencv_stitching_tool/opencv_stitching/panorama_estimation.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import statistics
-
-
-def estimate_final_panorama_dimensions(cameras, warper, img_handler):
-    medium_to_final_ratio = img_handler.get_medium_to_final_ratio()
-
-    panorama_scale_determined_on_medium_img = \
-        estimate_panorama_scale(cameras)
-
-    panorama_scale = (panorama_scale_determined_on_medium_img *
-                      medium_to_final_ratio)
-    panorama_corners = []
-    panorama_sizes = []
-
-    for size, camera in zip(img_handler.img_sizes, cameras):
-        width, height = img_handler.final_scaler.get_scaled_img_size(size)
-        roi = warper.warp_roi(width, height, camera, panorama_scale, medium_to_final_ratio)
-        panorama_corners.append(roi[0:2])
-        panorama_sizes.append(roi[2:4])
-
-    return panorama_scale, panorama_corners, panorama_sizes
-
-
-def estimate_panorama_scale(cameras):
-    focals = [cam.focal for cam in cameras]
-    panorama_scale = statistics.median(focals)
-    return panorama_scale
diff --git a/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py b/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py
index 2ef86092ab..959880ef02 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py
@@ -63,7 +63,14 @@ class SeamFinder:
         return cv.dilate(seam_lines, kernel)
 
     @staticmethod
-    def blend_seam_masks(seam_masks, corners, sizes, colors=[
+    def blend_seam_masks(seam_masks, corners, sizes):
+        imgs = colored_img_generator(sizes)
+        blended_seam_masks, _ = \
+            Blender.create_panorama(imgs, seam_masks, corners, sizes)
+        return blended_seam_masks
+
+
+def colored_img_generator(sizes, colors=(
             (255, 000, 000),      # Blue
             (000, 000, 255),      # Red
             (000, 255, 000),      # Green
@@ -72,21 +79,13 @@ class SeamFinder:
             (128, 128, 255),      # Pink
             (128, 128, 128),      # Gray
             (000, 000, 128),      # Brown
-            (000, 128, 255)]      # Orange
+            (000, 128, 255))      # Orange
             ):
-
-        blender = Blender("no")
-        blender.prepare(corners, sizes)
-
-        for idx, (seam_mask, size, corner) in enumerate(
-                zip(seam_masks, sizes, corners)):
-            if idx+1 > len(colors):
-                raise ValueError("Not enough default colors! Pass additional "
-                                 "colors to \"colors\" parameter")
-            one_color_img = create_img_by_size(size, colors[idx])
-            blender.feed(one_color_img, seam_mask, corner)
-
-        return blender.blend()
+    for idx, size in enumerate(sizes):
+        if idx+1 > len(colors):
+            raise ValueError("Not enough default colors! Pass additional "
+                             "colors to \"colors\" parameter")
+        yield create_img_by_size(size, colors[idx])
 
 
 def create_img_by_size(size, color=(0, 0, 0)):
diff --git a/apps/opencv_stitching_tool/opencv_stitching/stitcher.py b/apps/opencv_stitching_tool/opencv_stitching/stitcher.py
index c08112664f..2419092420 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/stitcher.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/stitcher.py
@@ -8,7 +8,7 @@ from .camera_estimator import CameraEstimator
 from .camera_adjuster import CameraAdjuster
 from .camera_wave_corrector import WaveCorrector
 from .warper import Warper
-from .panorama_estimation import estimate_final_panorama_dimensions
+from .cropper import Cropper
 from .exposure_error_compensator import ExposureErrorCompensator
 from .seam_finder import SeamFinder
 from .blender import Blender
@@ -33,6 +33,7 @@ class Stitcher:
          "wave_correct_kind": WaveCorrector.DEFAULT_WAVE_CORRECTION,
          "warper_type": Warper.DEFAULT_WARP_TYPE,
          "low_megapix": ImageHandler.DEFAULT_LOW_MEGAPIX,
+         "crop": Cropper.DEFAULT_CROP,
          "compensator": ExposureErrorCompensator.DEFAULT_COMPENSATOR,
          "nr_feeds": ExposureErrorCompensator.DEFAULT_NR_FEEDS,
          "block_size": ExposureErrorCompensator.DEFAULT_BLOCK_SIZE,
@@ -68,6 +69,7 @@ class Stitcher:
             CameraAdjuster(args.adjuster, args.refinement_mask)
         self.wave_corrector = WaveCorrector(args.wave_correct_kind)
         self.warper = Warper(args.warper_type)
+        self.cropper = Cropper(args.crop)
         self.compensator = \
             ExposureErrorCompensator(args.compensator, args.nr_feeds,
                                      args.block_size)
@@ -77,7 +79,6 @@ class Stitcher:
 
     def stitch(self, img_names):
         self.initialize_registration(img_names)
-
         imgs = self.resize_medium_resolution()
         features = self.find_features(imgs)
         matches = self.match_features(features)
@@ -85,22 +86,26 @@ class Stitcher:
         cameras = self.estimate_camera_parameters(features, matches)
         cameras = self.refine_camera_parameters(features, matches, cameras)
         cameras = self.perform_wave_correction(cameras)
-        panorama_scale, panorama_corners, panorama_sizes = \
-            self.estimate_final_panorama_dimensions(cameras)
-
-        self.initialize_composition(panorama_corners, panorama_sizes)
+        self.estimate_scale(cameras)
 
         imgs = self.resize_low_resolution(imgs)
-        imgs = self.warp_low_resolution_images(imgs, cameras, panorama_scale)
-        self.estimate_exposure_errors(imgs)
-        seam_masks = self.find_seam_masks(imgs)
+        imgs, masks, corners, sizes = self.warp_low_resolution(imgs, cameras)
+        self.prepare_cropper(imgs, masks, corners, sizes)
+        imgs, masks, corners, sizes = \
+            self.crop_low_resolution(imgs, masks, corners, sizes)
+        self.estimate_exposure_errors(corners, imgs, masks)
+        seam_masks = self.find_seam_masks(imgs, corners, masks)
 
         imgs = self.resize_final_resolution()
-        imgs = self.warp_final_resolution_images(imgs, cameras, panorama_scale)
-        imgs = self.compensate_exposure_errors(imgs)
+        imgs, masks, corners, sizes = self.warp_final_resolution(imgs, cameras)
+        imgs, masks, corners, sizes = \
+            self.crop_final_resolution(imgs, masks, corners, sizes)
+        self.set_masks(masks)
+        imgs = self.compensate_exposure_errors(corners, imgs)
         seam_masks = self.resize_seam_masks(seam_masks)
-        self.blend_images(imgs, seam_masks)
 
+        self.initialize_composition(corners, sizes)
+        self.blend_images(imgs, seam_masks, corners)
         return self.create_final_panorama()
 
     def initialize_registration(self, img_names):
@@ -132,9 +137,77 @@ class Stitcher:
     def perform_wave_correction(self, cameras):
         return self.wave_corrector.correct(cameras)
 
-    def estimate_final_panorama_dimensions(self, cameras):
-        return estimate_final_panorama_dimensions(cameras, self.warper,
-                                                  self.img_handler)
+    def estimate_scale(self, cameras):
+        self.warper.set_scale(cameras)
+
+    def resize_low_resolution(self, imgs=None):
+        return list(self.img_handler.resize_to_low_resolution(imgs))
+
+    def warp_low_resolution(self, imgs, cameras):
+        sizes = self.img_handler.get_low_img_sizes()
+        camera_aspect = self.img_handler.get_medium_to_low_ratio()
+        imgs, masks, corners, sizes = \
+            self.warp(imgs, cameras, sizes, camera_aspect)
+        return list(imgs), list(masks), corners, sizes
+
+    def warp_final_resolution(self, imgs, cameras):
+        sizes = self.img_handler.get_final_img_sizes()
+        camera_aspect = self.img_handler.get_medium_to_final_ratio()
+        return self.warp(imgs, cameras, sizes, camera_aspect)
+
+    def warp(self, imgs, cameras, sizes, aspect=1):
+        imgs = self.warper.warp_images(imgs, cameras, aspect)
+        masks = self.warper.create_and_warp_masks(sizes, cameras, aspect)
+        corners, sizes = self.warper.warp_rois(sizes, cameras, aspect)
+        return imgs, masks, corners, sizes
+
+    def prepare_cropper(self, imgs, masks, corners, sizes):
+        self.cropper.prepare(imgs, masks, corners, sizes)
+
+    def crop_low_resolution(self, imgs, masks, corners, sizes):
+        imgs, masks, corners, sizes = self.crop(imgs, masks, corners, sizes)
+        return list(imgs), list(masks), corners, sizes
+
+    def crop_final_resolution(self, imgs, masks, corners, sizes):
+        lir_aspect = self.img_handler.get_low_to_final_ratio()
+        return self.crop(imgs, masks, corners, sizes, lir_aspect)
+
+    def crop(self, imgs, masks, corners, sizes, aspect=1):
+        masks = self.cropper.crop_images(masks, aspect)
+        imgs = self.cropper.crop_images(imgs, aspect)
+        corners, sizes = self.cropper.crop_rois(corners, sizes, aspect)
+        return imgs, masks, corners, sizes
+
+    def estimate_exposure_errors(self, corners, imgs, masks):
+        self.compensator.feed(corners, imgs, masks)
+
+    def find_seam_masks(self, imgs, corners, masks):
+        return self.seam_finder.find(imgs, corners, masks)
+
+    def resize_final_resolution(self):
+        return self.img_handler.resize_to_final_resolution()
+
+    def compensate_exposure_errors(self, corners, imgs):
+        for idx, (corner, img) in enumerate(zip(corners, imgs)):
+            yield self.compensator.apply(idx, corner, img, self.get_mask(idx))
+
+    def resize_seam_masks(self, seam_masks):
+        for idx, seam_mask in enumerate(seam_masks):
+            yield SeamFinder.resize(seam_mask, self.get_mask(idx))
+
+    def set_masks(self, mask_generator):
+        self.masks = mask_generator
+        self.mask_index = -1
+
+    def get_mask(self, idx):
+        if idx == self.mask_index + 1:
+            self.mask_index += 1
+            self.mask = next(self.masks)
+            return self.mask
+        elif idx == self.mask_index:
+            return self.mask
+        else:
+            raise StitchingError("Invalid Mask Index!")
 
     def initialize_composition(self, corners, sizes):
         if self.timelapser.do_timelapse:
@@ -142,66 +215,22 @@ class Stitcher:
         else:
             self.blender.prepare(corners, sizes)
 
-    def resize_low_resolution(self, imgs=None):
-        return list(self.img_handler.resize_to_low_resolution(imgs))
-
-    def warp_low_resolution_images(self, imgs, cameras, final_scale):
-        camera_aspect = self.img_handler.get_medium_to_low_ratio()
-        scale = final_scale * self.img_handler.get_final_to_low_ratio()
-        return list(self.warp_images(imgs, cameras, scale, camera_aspect))
-
-    def warp_final_resolution_images(self, imgs, cameras, scale):
-        camera_aspect = self.img_handler.get_medium_to_final_ratio()
-        return self.warp_images(imgs, cameras, scale, camera_aspect)
-
-    def warp_images(self, imgs, cameras, scale, aspect=1):
-        self._masks = []
-        self._corners = []
-        for img_warped, mask_warped, corner in \
-            self.warper.warp_images_and_image_masks(
-                imgs, cameras, scale, aspect
-                ):
-            self._masks.append(mask_warped)
-            self._corners.append(corner)
-            yield img_warped
-
-    def estimate_exposure_errors(self, imgs):
-        self.compensator.feed(self._corners, imgs, self._masks)
-
-    def find_seam_masks(self, imgs):
-        return self.seam_finder.find(imgs, self._corners, self._masks)
-
-    def resize_final_resolution(self):
-        return self.img_handler.resize_to_final_resolution()
-
-    def compensate_exposure_errors(self, imgs):
-        for idx, img in enumerate(imgs):
-            yield self.compensator.apply(idx, self._corners[idx],
-                                         img, self._masks[idx])
-
-    def resize_seam_masks(self, seam_masks):
-        for idx, seam_mask in enumerate(seam_masks):
-            yield SeamFinder.resize(seam_mask, self._masks[idx])
-
-    def blend_images(self, imgs, masks):
-        for idx, (img, mask) in enumerate(zip(imgs, masks)):
+    def blend_images(self, imgs, masks, corners):
+        for idx, (img, mask, corner) in enumerate(zip(imgs, masks, corners)):
             if self.timelapser.do_timelapse:
                 self.timelapser.process_and_save_frame(
-                    self.img_handler.img_names[idx], img, self._corners[idx]
+                    self.img_handler.img_names[idx], img, corner
                     )
             else:
-                self.blender.feed(img, mask, self._corners[idx])
+                self.blender.feed(img, mask, corner)
 
     def create_final_panorama(self):
         if not self.timelapser.do_timelapse:
-            return self.blender.blend()
+            panorama, _ = self.blender.blend()
+            return panorama
 
     @staticmethod
     def validate_kwargs(kwargs):
         for arg in kwargs:
             if arg not in Stitcher.DEFAULT_SETTINGS:
                 raise StitchingError("Invalid Argument: " + arg)
-
-    def collect_garbage(self):
-        del self.img_handler.img_names, self.img_handler.img_sizes,
-        del self._corners, self._masks
diff --git a/apps/opencv_stitching_tool/opencv_stitching/subsetter.py b/apps/opencv_stitching_tool/opencv_stitching/subsetter.py
index 4ea6acc60d..e037984530 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/subsetter.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/subsetter.py
@@ -44,13 +44,12 @@ class Subsetter:
         indices = cv.detail.leaveBiggestComponent(features,
                                                   pairwise_matches,
                                                   self.confidence_threshold)
-        indices_as_list = [int(idx) for idx in list(indices[:, 0])]
 
-        if len(indices_as_list) < 2:
+        if len(indices) < 2:
             raise StitchingError("No match exceeds the "
                                  "given confidence theshold.")
 
-        return indices_as_list
+        return indices
 
     @staticmethod
     def subset_list(list_to_subset, indices):
diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py
index 0afdad2628..0dc5b8fbbf 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py
@@ -7,9 +7,8 @@ import cv2 as cv
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 '..', '..')))
 
-from opencv_stitching.megapix_scaler import MegapixScaler
-from opencv_stitching.megapix_downscaler import MegapixDownscaler
-#%%
+from opencv_stitching.megapix_scaler import MegapixScaler, MegapixDownscaler
+# %%
 
 
 class TestScaler(unittest.TestCase):
diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py
index 60b03a8bfe..2028ed8b5c 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py
@@ -14,6 +14,7 @@ from stitching_detailed import main
 
 class TestStitcher(unittest.TestCase):
 
+    @unittest.skip("skip performance test (not needed in every run)")
     def test_performance(self):
 
         print("Run new Stitcher class:")
@@ -25,7 +26,6 @@ class TestStitcher(unittest.TestCase):
         stitcher.stitch(["boat5.jpg", "boat2.jpg",
                          "boat3.jpg", "boat4.jpg",
                          "boat1.jpg", "boat6.jpg"])
-        stitcher.collect_garbage()
 
         _, peak_memory = tracemalloc.get_traced_memory()
         tracemalloc.stop()
diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py
index 98e792fd01..15b851e433 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py
@@ -70,8 +70,8 @@ class TestImageRegistration(unittest.TestCase):
         indices_to_delete = subsetter.get_indices_to_delete(len(img_names),
                                                             indices)
 
-        self.assertEqual(indices, [2, 3, 4])
-        self.assertEqual(indices_to_delete, [0, 1])
+        np.testing.assert_array_equal(indices, np.array([2, 3, 4]))
+        np.testing.assert_array_equal(indices_to_delete, np.array([0, 1]))
 
         subsetted_image_names = subsetter.subset_list(img_names, indices)
         self.assertEqual(subsetted_image_names,
diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py
index 5a24f752c0..d97300dadd 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py
@@ -15,7 +15,7 @@ from opencv_stitching.stitcher import Stitcher
 class TestStitcher(unittest.TestCase):
 
     def test_stitcher_aquaduct(self):
-        stitcher = Stitcher(n_features=250)
+        stitcher = Stitcher(nfeatures=250)
         result = stitcher.stitch(["s1.jpg", "s2.jpg"])
         cv.imwrite("result.jpg", result)
 
@@ -30,7 +30,7 @@ class TestStitcher(unittest.TestCase):
                     "wave_correct_kind": "no",
                     "finder": "dp_colorgrad",
                     "compensator": "no",
-                    "conf_thresh": 0.3}
+                    "confidence_threshold": 0.3}
 
         stitcher = Stitcher(**settings)
         result = stitcher.stitch(["boat5.jpg", "boat2.jpg",
@@ -49,7 +49,7 @@ class TestStitcher(unittest.TestCase):
         settings = {"warper_type": "compressedPlaneA2B1",
                     "finder": "dp_colorgrad",
                     "compensator": "channel_blocks",
-                    "conf_thresh": 0.3}
+                    "confidence_threshold": 0.3}
 
         stitcher = Stitcher(**settings)
         result = stitcher.stitch(["boat5.jpg", "boat2.jpg",
@@ -64,7 +64,7 @@ class TestStitcher(unittest.TestCase):
                                    atol=max_image_shape_derivation)
 
     def test_stitcher_boat_aquaduct_subset(self):
-        settings = {"final_megapix": 1}
+        settings = {"final_megapix": 1, "crop": True}
 
         stitcher = Stitcher(**settings)
         result = stitcher.stitch(["boat5.jpg",
@@ -76,7 +76,7 @@ class TestStitcher(unittest.TestCase):
 
         max_image_shape_derivation = 100
         np.testing.assert_allclose(result.shape[:2],
-                                   (839, 3384),
+                                   (705, 3374),
                                    atol=max_image_shape_derivation)
 
     def test_stitcher_budapest(self):
diff --git a/apps/opencv_stitching_tool/opencv_stitching/warper.py b/apps/opencv_stitching_tool/opencv_stitching/warper.py
index 47f2ec0adb..44ecb00f5f 100644
--- a/apps/opencv_stitching_tool/opencv_stitching/warper.py
+++ b/apps/opencv_stitching_tool/opencv_stitching/warper.py
@@ -1,3 +1,5 @@
+from statistics import median
+
 import cv2 as cv
 import numpy as np
 
@@ -15,48 +17,54 @@ class Warper:
 
     DEFAULT_WARP_TYPE = 'spherical'
 
-    def __init__(self, warper_type=DEFAULT_WARP_TYPE, scale=1):
+    def __init__(self, warper_type=DEFAULT_WARP_TYPE):
         self.warper_type = warper_type
-        self.warper = cv.PyRotationWarper(warper_type, scale)
-        self.scale = scale
+        self.scale = None
 
-    def warp_images_and_image_masks(self, imgs, cameras, scale=None, aspect=1):
-        self.update_scale(scale)
+    def set_scale(self, cameras):
+        focals = [cam.focal for cam in cameras]
+        self.scale = median(focals)
+
+    def warp_images(self, imgs, cameras, aspect=1):
         for img, camera in zip(imgs, cameras):
-            yield self.warp_image_and_image_mask(img, camera, scale, aspect)
+            yield self.warp_image(img, camera, aspect)
 
-    def warp_image_and_image_mask(self, img, camera, scale=None, aspect=1):
-        self.update_scale(scale)
-        corner, img_warped = self.warp_image(img, camera, aspect)
-        mask = 255 * np.ones((img.shape[0], img.shape[1]), np.uint8)
-        _, mask_warped = self.warp_image(mask, camera, aspect, mask=True)
-        return img_warped, mask_warped, corner
+    def warp_image(self, img, camera, aspect=1):
+        warper = cv.PyRotationWarper(self.warper_type, self.scale*aspect)
+        _, warped_image = warper.warp(img,
+                                      Warper.get_K(camera, aspect),
+                                      camera.R,
+                                      cv.INTER_LINEAR,
+                                      cv.BORDER_REFLECT)
+        return warped_image
 
-    def warp_image(self, image, camera, aspect=1, mask=False):
-        if mask:
-            interp_mode = cv.INTER_NEAREST
-            border_mode = cv.BORDER_CONSTANT
-        else:
-            interp_mode = cv.INTER_LINEAR
-            border_mode = cv.BORDER_REFLECT
+    def create_and_warp_masks(self, sizes, cameras, aspect=1):
+        for size, camera in zip(sizes, cameras):
+            yield self.create_and_warp_mask(size, camera, aspect)
 
-        corner, warped_image = self.warper.warp(image,
-                                                Warper.get_K(camera, aspect),
-                                                camera.R,
-                                                interp_mode,
-                                                border_mode)
-        return corner, warped_image
+    def create_and_warp_mask(self, size, camera, aspect=1):
+        warper = cv.PyRotationWarper(self.warper_type, self.scale*aspect)
+        mask = 255 * np.ones((size[1], size[0]), np.uint8)
+        _, warped_mask = warper.warp(mask,
+                                     Warper.get_K(camera, aspect),
+                                     camera.R,
+                                     cv.INTER_NEAREST,
+                                     cv.BORDER_CONSTANT)
+        return warped_mask
 
-    def warp_roi(self, width, height, camera, scale=None, aspect=1):
-        self.update_scale(scale)
-        roi = (width, height)
+    def warp_rois(self, sizes, cameras, aspect=1):
+        roi_corners = []
+        roi_sizes = []
+        for size, camera in zip(sizes, cameras):
+            roi = self.warp_roi(size, camera, aspect)
+            roi_corners.append(roi[0:2])
+            roi_sizes.append(roi[2:4])
+        return roi_corners, roi_sizes
+
+    def warp_roi(self, size, camera, aspect=1):
+        warper = cv.PyRotationWarper(self.warper_type, self.scale*aspect)
         K = Warper.get_K(camera, aspect)
-        return self.warper.warpRoi(roi, K, camera.R)
-
-    def update_scale(self, scale):
-        if scale is not None and scale != self.scale:
-            self.warper = cv.PyRotationWarper(self.warper_type, scale)  # setScale not working: https://docs.opencv.org/5.x/d5/d76/classcv_1_1PyRotationWarper.html#a90b000bb75f95294f9b0b6ec9859eb55
-            self.scale = scale
+        return self.warper.warpRoi(size, K, camera.R)
 
     @staticmethod
     def get_K(camera, aspect=1):
diff --git a/apps/opencv_stitching_tool/opencv_stitching_tool.py b/apps/opencv_stitching_tool/opencv_stitching_tool.py
index 1ee96aa8cb..2e41c11b87 100644
--- a/apps/opencv_stitching_tool/opencv_stitching_tool.py
+++ b/apps/opencv_stitching_tool/opencv_stitching_tool.py
@@ -23,6 +23,7 @@ from opencv_stitching.camera_estimator import CameraEstimator
 from opencv_stitching.camera_adjuster import CameraAdjuster
 from opencv_stitching.camera_wave_corrector import WaveCorrector
 from opencv_stitching.warper import Warper
+from opencv_stitching.cropper import Cropper
 from opencv_stitching.exposure_error_compensator import ExposureErrorCompensator  # noqa
 from opencv_stitching.seam_finder import SeamFinder
 from opencv_stitching.blender import Blender
@@ -72,9 +73,7 @@ parser.add_argument(
     type=int, dest='range_width'
 )
 parser.add_argument(
-    '--try_use_gpu',
-    action='store',
-    default=False,
+    '--try_use_gpu', action='store', default=False,
     help="Try to use CUDA. The default value is no. "
          "All default values are for CPU mode.",
     type=bool, dest='try_use_gpu'
@@ -146,6 +145,13 @@ parser.add_argument(
     "The default is %s Mpx." % ImageHandler.DEFAULT_LOW_MEGAPIX,
     type=float, dest='low_megapix'
 )
+parser.add_argument(
+    '--crop', action='store', default=Cropper.DEFAULT_CROP,
+    help="Crop black borders around images caused by warping using the "
+    "largest interior rectangle. "
+    "Default is '%s'." % Cropper.DEFAULT_CROP,
+    type=bool, dest='crop'
+)
 parser.add_argument(
     '--compensator', action='store',
     default=ExposureErrorCompensator.DEFAULT_COMPENSATOR,
diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index 037c7fb5ba..bcb8a3e203 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -119,12 +119,12 @@ if(CV_GCC OR CV_CLANG)
     # we want.
     add_extra_compiler_option(-Wall)
   endif()
-  add_extra_compiler_option(-Werror=return-type)
-  add_extra_compiler_option(-Werror=non-virtual-dtor)
-  add_extra_compiler_option(-Werror=address)
-  add_extra_compiler_option(-Werror=sequence-point)
+  add_extra_compiler_option(-Wreturn-type)
+  add_extra_compiler_option(-Wnon-virtual-dtor)
+  add_extra_compiler_option(-Waddress)
+  add_extra_compiler_option(-Wsequence-point)
   add_extra_compiler_option(-Wformat)
-  add_extra_compiler_option(-Werror=format-security -Wformat)
+  add_extra_compiler_option(-Wformat-security -Wformat)
   add_extra_compiler_option(-Wmissing-declarations)
   add_extra_compiler_option(-Wmissing-prototypes)
   add_extra_compiler_option(-Wstrict-prototypes)
@@ -367,6 +367,22 @@ if(NOT OPENCV_SKIP_LINK_AS_NEEDED)
   endif()
 endif()
 
+# Apply "-Wl,--no-undefined" linker flags: https://github.com/opencv/opencv/pull/21347
+if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED)
+  if(UNIX AND (NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2"))
+    set(_option "-Wl,--no-undefined")
+    set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}")  # requires CMake 3.2+ and CMP0056
+    ocv_check_compiler_flag(CXX "" HAVE_LINK_NO_UNDEFINED)
+    set(CMAKE_EXE_LINKER_FLAGS "${_saved_CMAKE_EXE_LINKER_FLAGS}")
+    if(HAVE_LINK_NO_UNDEFINED)
+      set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS} ${_option}")
+      set(OPENCV_EXTRA_SHARED_LINKER_FLAGS "${OPENCV_EXTRA_SHARED_LINKER_FLAGS} ${_option}")
+      set(OPENCV_EXTRA_MODULE_LINKER_FLAGS "${OPENCV_EXTRA_MODULE_LINKER_FLAGS} ${_option}")
+    endif()
+  endif()
+endif()
+
 # combine all "extra" options
 if(NOT OPENCV_SKIP_EXTRA_COMPILER_FLAGS)
   set(CMAKE_C_FLAGS           "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake
index 6521fbab93..319fd5bf0a 100644
--- a/cmake/OpenCVDetectInferenceEngine.cmake
+++ b/cmake/OpenCVDetectInferenceEngine.cmake
@@ -1,101 +1,38 @@
-# The script detects Intel(R) Inference Engine installation
-#
-# Cache variables:
-# INF_ENGINE_RELEASE - a number reflecting IE source interface (linked with OpenVINO release)
-#
-# Detect parameters:
-# 1. Native cmake IE package:
-#    - environment variable InferenceEngine_DIR is set to location of cmake module
-# 2. Custom location:
-#    - INF_ENGINE_INCLUDE_DIRS - headers search location
-#    - INF_ENGINE_LIB_DIRS     - library search location
-# 3. OpenVINO location:
-#    - environment variable INTEL_OPENVINO_DIR is set to location of OpenVINO installation dir
-#    - INF_ENGINE_PLATFORM - part of name of library directory representing its platform
+# The script detects Intel(R) OpenVINO(TM) runtime installation
 #
 # Result:
-# INF_ENGINE_TARGET - set to name of imported library target representing InferenceEngine
-#
+# - target ocv.3rdparty.openvino
 
-
-macro(ocv_ie_find_extra_libraries find_prefix find_suffix)
-  file(GLOB libraries "${INF_ENGINE_LIB_DIRS}/${find_prefix}inference_engine*${find_suffix}")
-  foreach(full_path IN LISTS libraries)
-    get_filename_component(library "${full_path}" NAME_WE)
-    string(REPLACE "${find_prefix}" "" library "${library}")
-    if(library STREQUAL "inference_engine" OR library STREQUAL "inference_engined")
-      # skip
-    else()
-      add_library(${library} UNKNOWN IMPORTED)
-      set_target_properties(${library} PROPERTIES
-          IMPORTED_LOCATION "${full_path}")
-      list(APPEND custom_libraries ${library})
-    endif()
-  endforeach()
-endmacro()
-
-function(add_custom_ie_build _inc _lib _lib_rel _lib_dbg _msg)
-  if(NOT _inc OR NOT (_lib OR _lib_rel OR _lib_dbg))
+if(WITH_OPENVINO)
+  find_package(OpenVINO QUIET)
+  if(OpenVINO_FOUND)
+    message(STATUS "OpenVINO FOUND: ${OpenVINO_VERSION}")
+    math(EXPR ver "${OpenVINO_VERSION_MAJOR} * 1000000 + ${OpenVINO_VERSION_MINOR} * 10000 + ${OpenVINO_VERSION_PATCH} * 100")
+    ocv_add_external_target(openvino "" "openvino::runtime" "INF_ENGINE_RELEASE=${ver};HAVE_NGRAPH;HAVE_DNN_NGRAPH;HAVE_INF_ENGINE")
+    set(HAVE_OPENVINO 1)
     return()
   endif()
-  if(NOT _lib)
-    if(_lib_rel)
-      set(_lib "${_lib_rel}")
-    else()
-      set(_lib "${_lib_dbg}")
-    endif()
-  endif()
-  add_library(inference_engine UNKNOWN IMPORTED)
-  set_target_properties(inference_engine PROPERTIES
-    IMPORTED_LOCATION "${_lib}"
-    IMPORTED_IMPLIB_RELEASE "${_lib_rel}"
-    IMPORTED_IMPLIB_DEBUG "${_lib_dbg}"
-    INTERFACE_INCLUDE_DIRECTORIES "${_inc}"
-  )
+endif()
 
-  set(custom_libraries "")
-  set(__prefixes "${CMAKE_FIND_LIBRARY_PREFIXES}")
-  if(NOT __prefixes)
-    set(__prefixes "_empty_")
-  endif()
-  foreach(find_prefix ${__prefixes})
-    if(find_prefix STREQUAL "_empty_")  # foreach doesn't iterate over empty elements
-      set(find_prefix "")
-    endif()
-    if(NOT DEFINED INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES)  # allow custom override
-      set(INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
-      if(APPLE)
-        ocv_list_filterout(INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES "^.so$")  # skip plugins (can't be linked)
-      endif()
-    endif()
-    foreach(find_suffix ${INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES})
-      ocv_ie_find_extra_libraries("${find_prefix}" "${find_suffix}")
-    endforeach()
-    if(NOT CMAKE_FIND_LIBRARY_SUFFIXES)
-      ocv_ie_find_extra_libraries("${find_prefix}" "")
-    endif()
-  endforeach()
+# ======================
 
-  if(NOT INF_ENGINE_RELEASE VERSION_GREATER "2018050000")
-    find_library(INF_ENGINE_OMP_LIBRARY iomp5 PATHS "${INF_ENGINE_OMP_DIR}" NO_DEFAULT_PATH)
-    if(NOT INF_ENGINE_OMP_LIBRARY)
-      message(WARNING "OpenMP for IE have not been found. Set INF_ENGINE_OMP_DIR variable if you experience build errors.")
-    endif()
+if(WITH_OPENVINO)
+  find_package(OpenVINO QUIET)
+  if(OpenVINO_FOUND)
+    message(STATUS "OpenVINO FOUND: ${OpenVINO_VERSION}")
+    math(EXPR ver "${OpenVINO_VERSION_MAJOR} * 1000000 + ${OpenVINO_VERSION_MINOR} * 10000 + ${OpenVINO_VERSION_PATCH} * 100")
+    ocv_add_external_target(openvino "" "openvino::runtime" "INF_ENGINE_RELEASE=${ver};HAVE_NGRAPH;HAVE_DNN_NGRAPH;HAVE_INF_ENGINE")
+    set(HAVE_OPENVINO 1)
+    return()
   endif()
-  if(EXISTS "${INF_ENGINE_OMP_LIBRARY}")
-    set_target_properties(inference_engine PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${INF_ENGINE_OMP_LIBRARY}")
-  endif()
-  set(INF_ENGINE_VERSION "Unknown" CACHE STRING "")
-  set(INF_ENGINE_TARGET "inference_engine;${custom_libraries}" PARENT_SCOPE)
-  message(STATUS "Detected InferenceEngine: ${_msg}")
-endfunction()
+endif()
 
 # ======================
 
 find_package(InferenceEngine QUIET)
 if(InferenceEngine_FOUND)
   set(INF_ENGINE_TARGET ${InferenceEngine_LIBRARIES})
-  set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}" CACHE STRING "")
+  set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}")
   message(STATUS "Detected InferenceEngine: cmake package (${InferenceEngine_VERSION})")
 endif()
 
@@ -113,47 +50,19 @@ elseif(DEFINED INF_ENGINE_RELEASE)
 endif()
 set(INF_ENGINE_RELEASE "${INF_ENGINE_RELEASE_INIT}" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
 
-if(NOT INF_ENGINE_TARGET AND INF_ENGINE_LIB_DIRS AND INF_ENGINE_INCLUDE_DIRS)
-  find_path(ie_custom_inc "inference_engine.hpp" PATHS "${INF_ENGINE_INCLUDE_DIRS}" NO_DEFAULT_PATH)
-  if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-    find_library(ie_custom_lib_dbg "inference_engined" PATHS "${INF_ENGINE_LIB_DIRS}" NO_DEFAULT_PATH)  # Win32 and MacOSX
-  endif()
-  find_library(ie_custom_lib "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}" NO_DEFAULT_PATH)
-  find_library(ie_custom_lib_rel "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Release" NO_DEFAULT_PATH)
-  find_library(ie_custom_lib_dbg "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Debug" NO_DEFAULT_PATH)
-  add_custom_ie_build("${ie_custom_inc}" "${ie_custom_lib}" "${ie_custom_lib_rel}" "${ie_custom_lib_dbg}" "INF_ENGINE_{INCLUDE,LIB}_DIRS")
-endif()
-
-set(_loc "$ENV{INTEL_OPENVINO_DIR}")
-if(NOT _loc AND DEFINED ENV{INTEL_CVSDK_DIR})
-  set(_loc "$ENV{INTEL_CVSDK_DIR}")  # OpenVINO 2018.x
-endif()
-if(NOT INF_ENGINE_TARGET AND _loc)
-  if(NOT INF_ENGINE_RELEASE VERSION_GREATER "2018050000")
-    set(INF_ENGINE_PLATFORM_DEFAULT "ubuntu_16.04")
-  else()
-    set(INF_ENGINE_PLATFORM_DEFAULT "")
-  endif()
-  set(INF_ENGINE_PLATFORM "${INF_ENGINE_PLATFORM_DEFAULT}" CACHE STRING "InferenceEngine platform (library dir)")
-  find_path(ie_custom_env_inc "inference_engine.hpp" PATHS "${_loc}/deployment_tools/inference_engine/include" NO_DEFAULT_PATH)
-  if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-    find_library(ie_custom_env_lib_dbg "inference_engined" PATHS "${_loc}/deployment_tools/inference_engine/lib/${INF_ENGINE_PLATFORM}/intel64" NO_DEFAULT_PATH)
-  endif()
-  find_library(ie_custom_env_lib "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/${INF_ENGINE_PLATFORM}/intel64" NO_DEFAULT_PATH)
-  find_library(ie_custom_env_lib_rel "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Release" NO_DEFAULT_PATH)
-  find_library(ie_custom_env_lib_dbg "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Debug" NO_DEFAULT_PATH)
-  add_custom_ie_build("${ie_custom_env_inc}" "${ie_custom_env_lib}" "${ie_custom_env_lib_rel}" "${ie_custom_env_lib_dbg}" "OpenVINO (${_loc})")
-endif()
+set(tgts)
+set(defs)
 
 # Add more features to the target
-
 if(INF_ENGINE_TARGET)
   set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
       INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
   )
+  list(APPEND tgts ${INF_ENGINE_TARGET})
+  list(APPEND defs "INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" "HAVE_INF_ENGINE")
 endif()
 
-if(WITH_NGRAPH)
+if(WITH_NGRAPH OR NOT DEFINED WITH_NGRAPH)
   find_package(ngraph QUIET)
   if(ngraph_FOUND)
     ocv_assert(TARGET ngraph::ngraph)
@@ -162,5 +71,9 @@ if(WITH_NGRAPH)
     endif()
     message(STATUS "Detected ngraph: cmake package (${ngraph_VERSION})")
     set(HAVE_NGRAPH ON)
+    list(APPEND tgts ngraph::ngraph)
+    list(APPEND defs "HAVE_NGRAPH" "HAVE_DNN_NGRAPH")
   endif()
 endif()
+
+ocv_add_external_target(openvino "" "${tgts}" "${defs}")
diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake
index 95d1d92f68..00886cc131 100644
--- a/cmake/OpenCVFindLibsGrfmt.cmake
+++ b/cmake/OpenCVFindLibsGrfmt.cmake
@@ -17,8 +17,10 @@ else()
     unset(_zlib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES)
   endif()
   if(ZLIB_FOUND AND ANDROID)
-    if(ZLIB_LIBRARIES MATCHES "/usr/lib.*/libz.so$")
+    if(ZLIB_LIBRARY MATCHES "/usr/lib.*/libz.so$")
+      set(ZLIB_LIBRARY z)
       set(ZLIB_LIBRARIES z)
+      set(ZLIB_LIBRARY_RELEASE z)
     endif()
   endif()
 endif()
diff --git a/cmake/OpenCVPluginStandalone.cmake b/cmake/OpenCVPluginStandalone.cmake
index 15b7a8085e..129ede1ae1 100644
--- a/cmake/OpenCVPluginStandalone.cmake
+++ b/cmake/OpenCVPluginStandalone.cmake
@@ -78,10 +78,17 @@ function(ocv_create_plugin module default_name dependency_target dependency_targ
     set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES PREFIX "${OPENCV_PLUGIN_MODULE_PREFIX}")
   endif()
 
-  if(APPLE)
-    set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-  elseif(WIN32)
-    # Hack for Windows only, Linux/MacOS uses global symbol table (without exact .so binding)
+  if(WIN32 OR NOT APPLE)
+    set(OPENCV_PLUGIN_NO_LINK FALSE CACHE BOOL "")
+  else()
+    set(OPENCV_PLUGIN_NO_LINK TRUE CACHE BOOL "")
+  endif()
+
+  if(OPENCV_PLUGIN_NO_LINK)
+    if(APPLE)
+      set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    endif()
+  else()
     find_package(OpenCV REQUIRED ${module} ${OPENCV_PLUGIN_DEPS})
     target_link_libraries(${OPENCV_PLUGIN_NAME} PRIVATE ${OpenCV_LIBRARIES})
   endif()
diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index 21d60cc0f2..5d49b8a889 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -1619,6 +1619,7 @@ function(ocv_add_external_target name inc link def)
   endif()
 endfunction()
 
+
 # Returns the first non-interface target
 function(ocv_get_imported_target imported interface)
   set(__result "${interface}")
diff --git a/doc/tutorials/dnn/dnn_face/dnn_face.markdown b/doc/tutorials/dnn/dnn_face/dnn_face.markdown
index 069de2e919..09180a6a50 100644
--- a/doc/tutorials/dnn/dnn_face/dnn_face.markdown
+++ b/doc/tutorials/dnn/dnn_face/dnn_face.markdown
@@ -8,19 +8,19 @@
 | | |
 | -: | :- |
 | Original Author | Chengrui Wang, Yuantao Feng |
-| Compatibility | OpenCV >= 4.5.1 |
+| Compatibility | OpenCV >= 4.5.4 |
 
 ## Introduction
 
-In this section, we introduce the DNN-based module for face detection and face recognition. Models can be obtained in [Models](#Models). The usage of `FaceDetectorYN` and `FaceRecognizerSF` are presented in [Usage](#Usage).
+In this section, we introduce cv::FaceDetectorYN class for face detection and cv::FaceRecognizerSF class for face recognition.
 
 ## Models
 
 There are two models (ONNX format) pre-trained and required for this module:
-- [Face Detection](https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx):
-    - Size: 337KB
+- [Face Detection](https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet):
+    - Size: 338KB
     - Results on WIDER Face Val set: 0.830(easy), 0.824(medium), 0.708(hard)
-- [Face Recognition](https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view?usp=sharing)
+- [Face Recognition](https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface)
     - Size: 36.9MB
     - Results:
 
@@ -32,9 +32,7 @@ There are two models (ONNX format) pre-trained and required for this module:
     | AgeDB-30 | 94.90%   | 1.202              | 0.277              |
     | CFP-FP   | 94.80%   | 1.253              | 0.212              |
 
-## Usage
-
-### DNNFaceDetector
+## Code
 
 @add_toggle_cpp
 -   **Downloadable code**: Click
diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index 998bcfb392..d71a3a8c82 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -476,9 +476,10 @@ OpenCV have own DNN inference module which have own build-in engine, but can als
 | `BUILD_PROTOBUF` | _ON_ | Build own copy of _protobuf_. Must be disabled if you want to use external library. |
 | `PROTOBUF_UPDATE_FILES` | _OFF_ | Re-generate all .proto files. _protoc_ compiler compatible with used version of _protobuf_ must be installed. |
 | `OPENCV_DNN_OPENCL` | _ON_ | Enable built-in OpenCL inference backend. |
-| `WITH_INF_ENGINE` | _OFF_ | Enables [Intel Inference Engine (IE)](https://github.com/openvinotoolkit/openvino) backend. Allows to execute networks in IE format (.xml + .bin). Inference Engine must be installed either as part of [OpenVINO toolkit](https://en.wikipedia.org/wiki/OpenVINO), either as a standalone library built from sources. |
-| `INF_ENGINE_RELEASE` | _2020040000_ | Defines version of Inference Engine library which is tied to OpenVINO toolkit version. Must be a 10-digit string, e.g. _2020040000_ for OpenVINO 2020.4. |
-| `WITH_NGRAPH` | _OFF_ | Enables Intel NGraph library support. This library is part of Inference Engine backend which allows executing arbitrary networks read from files in multiple formats supported by OpenCV: Caffe, TensorFlow, PyTorch, Darknet, etc.. NGraph library must be installed, it is included into Inference Engine. |
+| `WITH_INF_ENGINE` | _OFF_ | **Deprecated since OpenVINO 2022.1** Enables [Intel Inference Engine (IE)](https://github.com/openvinotoolkit/openvino) backend. Allows to execute networks in IE format (.xml + .bin). Inference Engine must be installed either as part of [OpenVINO toolkit](https://en.wikipedia.org/wiki/OpenVINO), either as a standalone library built from sources. |
+| `INF_ENGINE_RELEASE` | _2020040000_ | **Deprecated since OpenVINO 2022.1** Defines version of Inference Engine library which is tied to OpenVINO toolkit version. Must be a 10-digit string, e.g. _2020040000_ for OpenVINO 2020.4. |
+| `WITH_NGRAPH` | _OFF_ | **Deprecated since OpenVINO 2022.1** Enables Intel NGraph library support. This library is part of Inference Engine backend which allows executing arbitrary networks read from files in multiple formats supported by OpenCV: Caffe, TensorFlow, PyTorch, Darknet, etc.. NGraph library must be installed, it is included into Inference Engine. |
+| `WITH_OPENVINO` | _OFF_ | Enable Intel OpenVINO Toolkit support. Should be used for OpenVINO>=2022.1 instead of `WITH_INF_ENGINE` and `WITH_NGRAPH`. |
 | `OPENCV_DNN_CUDA` | _OFF_ | Enable CUDA backend. [CUDA](https://en.wikipedia.org/wiki/CUDA), CUBLAS and [CUDNN](https://developer.nvidia.com/cudnn) must be installed. |
 | `WITH_HALIDE` | _OFF_ | Use experimental [Halide](https://en.wikipedia.org/wiki/Halide_(programming_language)) backend which can generate optimized code for dnn-layers at runtime. Halide must be installed. |
 | `WITH_VULKAN` | _OFF_ | Enable experimental [Vulkan](https://en.wikipedia.org/wiki/Vulkan_(API)) backend. Does not require additional dependencies, but can use external Vulkan headers (`VULKAN_INCLUDE_DIRS`). |
diff --git a/modules/3d/src/dls.cpp b/modules/3d/src/dls.cpp
index cbcd8fea6a..ed2e37f558 100644
--- a/modules/3d/src/dls.cpp
+++ b/modules/3d/src/dls.cpp
@@ -25,8 +25,7 @@ namespace cv {
 
 dls::dls(const Mat& opoints, const Mat& ipoints)
 {
-
-    N =  std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F));
+    N = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F));
     p = Mat(3, N, CV_64F);
     z = Mat(3, N, CV_64F);
     mn = Mat::zeros(3, 1, CV_64F);
@@ -274,7 +273,7 @@ void dls::build_coeff_matrix(const Mat& pp, Mat& Mtilde, Mat& D)
 }
 
 void dls::compute_eigenvec(const Mat& Mtilde, Mat& eigenval_real, Mat& eigenval_imag,
-                                                  Mat& eigenvec_real, Mat& eigenvec_imag)
+                                              Mat& eigenvec_real, Mat& eigenvec_imag)
 {
 #ifdef HAVE_EIGEN
     Eigen::MatrixXd Mtilde_eig, zeros_eig;
@@ -606,8 +605,8 @@ Mat dls::skewsymm(const Mat * X1)
 {
     MatConstIterator_<double> it = X1->begin<double>();
     return (Mat_<double>(3,3) <<        0, -*(it+2),  *(it+1),
-                                      *(it+2),        0, -*(it+0),
-                                     -*(it+1),  *(it+0),       0);
+                                  *(it+2),        0, -*(it+0),
+                                 -*(it+1),  *(it+0),       0);
 }
 
 Mat dls::rotx(const double t)
@@ -658,4 +657,4 @@ bool dls::positive_eigenvalues(const Mat * eigenvalues)
     return *(it) > 0 && *(it+1) > 0 && *(it+2) > 0;
 }
 
-}
+} // namespace cv
diff --git a/modules/3d/src/dls.h b/modules/3d/src/dls.h
index 8c385709b2..a4b0ffa45c 100644
--- a/modules/3d/src/dls.h
+++ b/modules/3d/src/dls.h
@@ -9,16 +9,16 @@ namespace cv {
 class dls
 {
 public:
-    dls(const cv::Mat& opoints, const cv::Mat& ipoints);
+    dls(const Mat& opoints, const Mat& ipoints);
     ~dls();
 
-    bool compute_pose(cv::Mat& R, cv::Mat& t);
+    bool compute_pose(Mat& R, Mat& t);
 
 private:
 
     // initialisation
     template <typename OpointType, typename IpointType>
-    void init_points(const cv::Mat& opoints, const cv::Mat& ipoints)
+    void init_points(const Mat& opoints, const Mat& ipoints)
     {
         for(int i = 0; i < N; i++)
         {
@@ -47,33 +47,33 @@ private:
     }
 
     // main algorithm
-    cv::Mat LeftMultVec(const cv::Mat& v);
-    void run_kernel(const cv::Mat& pp);
-    void build_coeff_matrix(const cv::Mat& pp, cv::Mat& Mtilde, cv::Mat& D);
-    void compute_eigenvec(const cv::Mat& Mtilde, cv::Mat& eigenval_real, cv::Mat& eigenval_imag,
-                                                 cv::Mat& eigenvec_real, cv::Mat& eigenvec_imag);
-    void fill_coeff(const cv::Mat * D);
+    Mat LeftMultVec(const Mat& v);
+    void run_kernel(const Mat& pp);
+    void build_coeff_matrix(const Mat& pp, Mat& Mtilde, Mat& D);
+    void compute_eigenvec(const Mat& Mtilde, Mat& eigenval_real, Mat& eigenval_imag,
+                                             Mat& eigenvec_real, Mat& eigenvec_imag);
+    void fill_coeff(const Mat * D);
 
     // useful functions
-    cv::Mat cayley_LS_M(const std::vector<double>& a, const std::vector<double>& b,
-                        const std::vector<double>& c, const std::vector<double>& u);
-    cv::Mat Hessian(const double s[]);
-    cv::Mat cayley2rotbar(const cv::Mat& s);
-    cv::Mat skewsymm(const cv::Mat * X1);
+    Mat cayley_LS_M(const std::vector<double>& a, const std::vector<double>& b,
+                    const std::vector<double>& c, const std::vector<double>& u);
+    Mat Hessian(const double s[]);
+    Mat cayley2rotbar(const Mat& s);
+    Mat skewsymm(const Mat * X1);
 
     // extra functions
-    cv::Mat rotx(const double t);
-    cv::Mat roty(const double t);
-    cv::Mat rotz(const double t);
-    cv::Mat mean(const cv::Mat& M);
-    bool is_empty(const cv::Mat * v);
-    bool positive_eigenvalues(const cv::Mat * eigenvalues);
+    Mat rotx(const double t);
+    Mat roty(const double t);
+    Mat rotz(const double t);
+    Mat mean(const Mat& M);
+    bool is_empty(const Mat * v);
+    bool positive_eigenvalues(const Mat * eigenvalues);
 
-    cv::Mat p, z, mn;        // object-image points
+    Mat p, z, mn;        // object-image points
     int N;                // number of input points
     std::vector<double> f1coeff, f2coeff, f3coeff, cost_; // coefficient for coefficients matrix
-    std::vector<cv::Mat> C_est_, t_est_;    // optimal candidates
-    cv::Mat C_est__, t_est__;                // optimal found solution
+    std::vector<Mat> C_est_, t_est_;    // optimal candidates
+    Mat C_est__, t_est__;                // optimal found solution
     double cost__;                            // optimal found solution
 };
 
@@ -736,7 +736,7 @@ public:
     {
         /*if(isSymmetric(src)) {
             // Fall back to OpenCV for a symmetric matrix!
-            cv::eigen(src, _eigenvalues, _eigenvectors);
+            eigen(src, _eigenvalues, _eigenvectors);
         } else {*/
             Mat tmp;
             // Convert the given input matrix to double. Is there any way to
@@ -768,6 +768,5 @@ public:
     Mat eigenvectors() { return _eigenvectors; }
 };
 
-}
-
+} // namespace cv
 #endif // DLS_H
diff --git a/modules/3d/src/solvepnp.cpp b/modules/3d/src/solvepnp.cpp
index 6cee6a7485..bd55fea5e4 100644
--- a/modules/3d/src/solvepnp.cpp
+++ b/modules/3d/src/solvepnp.cpp
@@ -103,12 +103,12 @@ void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray d
     CV_Assert(length > 0);
 
     // project axes points
-    vector<Point3f> axesPoints;
+    std::vector<Point3f> axesPoints;
     axesPoints.push_back(Point3f(0, 0, 0));
     axesPoints.push_back(Point3f(length, 0, 0));
     axesPoints.push_back(Point3f(0, length, 0));
     axesPoints.push_back(Point3f(0, 0, length));
-    vector<Point2f> imagePoints;
+    std::vector<Point2f> imagePoints;
     projectPoints(axesPoints, rvec, tvec, cameraMatrix, distCoeffs, imagePoints);
 
     // draw axes lines
@@ -123,7 +123,7 @@ bool solvePnP( InputArray opoints, InputArray ipoints,
 {
     CV_INSTRUMENT_REGION();
 
-    vector<Mat> rvecs, tvecs;
+    std::vector<Mat> rvecs, tvecs;
     int solutions = solvePnPGeneric(opoints, ipoints, cameraMatrix, distCoeffs, rvecs, tvecs, useExtrinsicGuess, (SolvePnPMethod)flags, rvec, tvec);
 
     if (solutions > 0)
@@ -321,8 +321,8 @@ bool solvePnPRansac(InputArray _opoints, InputArray _ipoints,
         return false;
     }
 
-    vector<Point3d> opoints_inliers;
-    vector<Point2d> ipoints_inliers;
+    std::vector<Point3d> opoints_inliers;
+    std::vector<Point2d> ipoints_inliers;
     opoints = opoints.reshape(3);
     ipoints = ipoints.reshape(2);
     opoints.convertTo(opoints_inliers, CV_64F);
@@ -472,7 +472,7 @@ int solveP3P( InputArray _opoints, InputArray _ipoints,
     else
         imgPts = imgPts.reshape(1, 2*imgPts.rows);
 
-    vector<double> reproj_errors(solutions);
+    std::vector<double> reproj_errors(solutions);
     for (size_t i = 0; i < reproj_errors.size(); i++)
     {
         Mat rvec;
@@ -753,7 +753,7 @@ static void solvePnPRefine(InputArray _objectPoints, InputArray _imagePoints,
         rvec0.convertTo(rvec, CV_64F);
         tvec0.convertTo(tvec, CV_64F);
 
-        vector<Point2d> ipoints_normalized;
+        std::vector<Point2d> ipoints_normalized;
         undistortPoints(ipoints, ipoints_normalized, cameraMatrix, distCoeffs);
         Mat sd = Mat(ipoints_normalized).reshape(1, npoints*2);
         Mat objectPoints0 = opoints.reshape(1, npoints);
@@ -847,7 +847,7 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
     Mat cameraMatrix = Mat_<double>(cameraMatrix0);
     Mat distCoeffs = Mat_<double>(distCoeffs0);
 
-    vector<Mat> vec_rvecs, vec_tvecs;
+    std::vector<Mat> vec_rvecs, vec_tvecs;
     if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP)
     {
         if (flags == SOLVEPNP_DLS)
@@ -872,7 +872,7 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
     }
     else if (flags == SOLVEPNP_P3P || flags == SOLVEPNP_AP3P)
     {
-        vector<Mat> rvecs, tvecs;
+        std::vector<Mat> rvecs, tvecs;
         solveP3P(opoints, ipoints, _cameraMatrix, _distCoeffs, rvecs, tvecs, flags);
         vec_rvecs.insert(vec_rvecs.end(), rvecs.begin(), rvecs.end());
         vec_tvecs.insert(vec_tvecs.end(), tvecs.begin(), tvecs.end());
@@ -1120,7 +1120,7 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
 
         for (size_t i = 0; i < vec_rvecs.size(); i++)
         {
-            vector<Point2d> projectedPoints;
+            std::vector<Point2d> projectedPoints;
             projectPoints(objectPoints, vec_rvecs[i], vec_tvecs[i], cameraMatrix, distCoeffs, projectedPoints);
             double rmse = norm(Mat(projectedPoints, false), imagePoints, NORM_L2) / sqrt(2*projectedPoints.size());
 
diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp
index 22a86ff9be..7a50390aed 100644
--- a/modules/core/include/opencv2/core/bindings_utils.hpp
+++ b/modules/core/include/opencv2/core/bindings_utils.hpp
@@ -219,6 +219,12 @@ AsyncArray testAsyncException()
     return p.getArrayResult();
 }
 
+namespace nested {
+CV_WRAP static inline bool testEchoBooleanFunction(bool flag) {
+    return flag;
+}
+} // namespace nested
+
 namespace fs {
     CV_EXPORTS_W cv::String getCacheDirectoryForDownloads();
 } // namespace fs
diff --git a/modules/core/include/opencv2/core/core_c.h b/modules/core/include/opencv2/core/core_c.h
index b37297e146..bd9b516003 100644
--- a/modules/core/include/opencv2/core/core_c.h
+++ b/modules/core/include/opencv2/core/core_c.h
@@ -48,16 +48,19 @@
 #include "opencv2/core/types_c.h"
 
 #ifdef __cplusplus
-#  ifdef _MSC_VER
-/* disable warning C4190: 'function' has C-linkage specified, but returns UDT 'typename'
-                          which is incompatible with C
+/* disable MSVC warning C4190 / clang-cl -Wreturn-type-c-linkage:
+       'function' has C-linkage specified, but returns UDT 'typename'
+       which is incompatible with C
 
    It is OK to disable it because we only extend few plain structures with
    C++ constructors for simpler interoperability with C++ API of the library
 */
-#    pragma warning(disable:4190)
-#  elif defined __clang__ && __clang_major__ >= 3
+#  if defined(__clang__)
+     // handle clang on Linux and clang-cl (i. e. clang on Windows) first
 #    pragma GCC diagnostic ignored "-Wreturn-type-c-linkage"
+#  elif defined(_MSC_VER)
+     // then handle MSVC
+#    pragma warning(disable:4190)
 #  endif
 #endif
 
diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp
index 716b8bf2a8..719003f21f 100644
--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@@ -924,7 +924,7 @@ public:
         INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
     };
 
-    CV_WRAP explicit Event(Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
+    CV_WRAP explicit Event(const Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
 
     //! records an event
     CV_WRAP void record(Stream& stream = Stream::Null());
@@ -946,6 +946,7 @@ private:
 
     friend struct EventAccessor;
 };
+CV_ENUM_FLAGS(Event::CreateFlags)
 
 //! @} cudacore_struct
 
diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp
index e39f229f6e..87a51748bb 100644
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -444,7 +444,16 @@ CV_EXPORTS InputOutputArray noArray();
 
 /////////////////////////////////// MatAllocator //////////////////////////////////////
 
-//! Usage flags for allocator
+/** @brief  Usage flags for allocator
+
+ @warning  All flags except `USAGE_DEFAULT` are experimental.
+
+ @warning  For the OpenCL allocator, `USAGE_ALLOCATE_SHARED_MEMORY` depends on
+ OpenCV's optional, experimental integration with OpenCL SVM. To enable this
+ integration, build OpenCV using the `WITH_OPENCL_SVM=ON` CMake option and, at
+ runtime, call `cv::ocl::Context::getDefault().setUseSVM(true);` or similar
+ code. Note that SVM is incompatible with OpenCL 1.x.
+*/
 enum UMatUsageFlags
 {
     USAGE_DEFAULT = 0,
@@ -2076,7 +2085,7 @@ public:
 
         Mat_<Pixel> image = Mat::zeros(3, sizes, CV_8UC3);
 
-        image.forEach<Pixel>([&](Pixel& pixel, const int position[]) -> void {
+        image.forEach<Pixel>([](Pixel& pixel, const int position[]) -> void {
             pixel.x = position[0];
             pixel.y = position[1];
             pixel.z = position[2];
diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp
index 276f640323..8e135d1a11 100644
--- a/modules/core/include/opencv2/core/persistence.hpp
+++ b/modules/core/include/opencv2/core/persistence.hpp
@@ -309,8 +309,8 @@ public:
         READ        = 0, //!< value, open the file for reading
         WRITE       = 1, //!< value, open the file for writing
         APPEND      = 2, //!< value, open the file for appending
-        MEMORY      = 4, //!< flag, read data from source or write data to the internal buffer (which is
-        //!< returned by FileStorage::release)
+        MEMORY      = 4, /**< flag, read data from source or write data to the internal buffer (which is
+                              returned by FileStorage::release) */
         FORMAT_MASK = (7<<3), //!< mask for format flags
         FORMAT_AUTO = 0,      //!< flag, auto format
         FORMAT_XML  = (1<<3), //!< flag, XML format
diff --git a/modules/core/include/opencv2/core/utils/fp_control.private.hpp b/modules/core/include/opencv2/core/utils/fp_control.private.hpp
new file mode 100644
index 0000000000..12ee363dd8
--- /dev/null
+++ b/modules/core/include/opencv2/core/utils/fp_control.private.hpp
@@ -0,0 +1,29 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
+#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
+
+#include "fp_control_utils.hpp"
+
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0
+  // disabled
+#elif defined(OPENCV_IMPL_FP_HINTS)
+  // custom
+#elif defined(OPENCV_IMPL_FP_HINTS_X86)
+  // custom
+#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+  #include <xmmintrin.h>
+  #define OPENCV_IMPL_FP_HINTS_X86 1
+  #define OPENCV_IMPL_FP_HINTS 1
+#endif
+
+#ifndef OPENCV_IMPL_FP_HINTS
+#define OPENCV_IMPL_FP_HINTS 0
+#endif
+#ifndef OPENCV_IMPL_FP_HINTS_X86
+#define OPENCV_IMPL_FP_HINTS_X86 0
+#endif
+
+#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
diff --git a/modules/core/include/opencv2/core/utils/fp_control_utils.hpp b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp
new file mode 100644
index 0000000000..930bc5d367
--- /dev/null
+++ b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp
@@ -0,0 +1,69 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP
+#define OPENCV_CORE_FP_CONTROL_UTILS_HPP
+
+namespace cv {
+
+namespace details {
+
+struct FPDenormalsModeState
+{
+    uint32_t reserved[16];  // 64-bytes
+};  // FPDenormalsModeState
+
+CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state);
+CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state);
+CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state);
+
+class FPDenormalsIgnoreHintScope
+{
+public:
+    inline explicit FPDenormalsIgnoreHintScope(bool ignore = true)
+    {
+        details::setFPDenormalsIgnoreHint(ignore, saved_state);
+    }
+
+    inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state)
+    {
+        details::saveFPDenormalsState(saved_state);
+        details::restoreFPDenormalsState(state);
+    }
+
+    inline ~FPDenormalsIgnoreHintScope()
+    {
+        details::restoreFPDenormalsState(saved_state);
+    }
+
+protected:
+    FPDenormalsModeState saved_state;
+};  // FPDenormalsIgnoreHintScope
+
+class FPDenormalsIgnoreHintScopeNOOP
+{
+public:
+    inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); }
+    inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); }
+    inline ~FPDenormalsIgnoreHintScopeNOOP() { }
+};  // FPDenormalsIgnoreHintScopeNOOP
+
+}  // namespace details
+
+
+// Should depend on target compilation architecture only
+// Note: previously added archs should NOT be removed to preserve ABI compatibility
+#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT)
+  // preserve configuration overloading through ports
+#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_)
+typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope;
+#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1
+#else
+#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0
+typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope;
+#endif
+
+}  // namespace cv
+
+#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP
diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp
index 68863ffb36..5cbc066784 100644
--- a/modules/core/include/opencv2/core/vsx_utils.hpp
+++ b/modules/core/include/opencv2/core/vsx_utils.hpp
@@ -684,7 +684,8 @@ VSX_IMPL_LOAD_L8(vec_double2, double)
 #endif
 
 // absolute difference
-#ifndef vec_absd
+#ifndef _ARCH_PWR9
+#   undef vec_absd
 #   define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b))
 #endif
 
diff --git a/modules/core/perf/perf_reduce.cpp b/modules/core/perf/perf_reduce.cpp
index 8f9c2e8349..dcc0205fdc 100644
--- a/modules/core/perf/perf_reduce.cpp
+++ b/modules/core/perf/perf_reduce.cpp
@@ -23,7 +23,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceR,
     int reduceOp = get<2>(GetParam());
 
     int ddepth = -1;
-    if( CV_MAT_DEPTH(matType) < CV_32S && (reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG) )
+    if( CV_MAT_DEPTH(matType) < CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG) )
         ddepth = CV_32S;
 
     Mat src(sz, matType);
@@ -51,7 +51,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceC,
     int reduceOp = get<2>(GetParam());
 
     int ddepth = -1;
-    if( CV_MAT_DEPTH(matType)< CV_32S && (reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG) )
+    if( CV_MAT_DEPTH(matType)< CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG) )
         ddepth = CV_32S;
 
     Mat src(sz, matType);
diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu
index f31f78a87a..c286f28eb0 100644
--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@@ -184,11 +184,8 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
         if (esz * cols == step)
             flags |= Mat::CONTINUOUS_FLAG;
 
-        int64 _nettosize = static_cast<int64>(step) * rows;
-        size_t nettosize = static_cast<size_t>(_nettosize);
-
         datastart = data;
-        dataend = data + nettosize;
+        dataend = data + step * (rows - 1) + cols * esz;
 
         if (refcount)
             *refcount = 1;
diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp
index 3680e0720a..3f647c8d55 100644
--- a/modules/core/src/cuda_stream.cpp
+++ b/modules/core/src/cuda_stream.cpp
@@ -811,7 +811,7 @@ Event cv::cuda::EventAccessor::wrapEvent(cudaEvent_t event)
 
 #endif
 
-cv::cuda::Event::Event(CreateFlags flags)
+cv::cuda::Event::Event(const Event::CreateFlags flags)
 {
 #ifndef HAVE_CUDA
     CV_UNUSED(flags);
diff --git a/modules/core/src/hal_internal.cpp b/modules/core/src/hal_internal.cpp
index 44540e7914..2ba992111c 100644
--- a/modules/core/src/hal_internal.cpp
+++ b/modules/core/src/hal_internal.cpp
@@ -64,6 +64,16 @@
 #define HAL_LU_SMALL_MATRIX_THRESH 100
 #define HAL_CHOLESKY_SMALL_MATRIX_THRESH 100
 
+#if defined(__clang__) && defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define CV_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
+__msan_unpoison(adresse, size)
+#endif
+#endif
+#ifndef CV_ANNOTATE_MEMORY_IS_INITIALIZED
+#define CV_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) do { } while(0)
+#endif
+
 //lapack stores matrices in column-major order so transposing is needed everywhere
 template <typename fptype> static inline void
 transpose_square_inplace(fptype *src, size_t src_ld, size_t m)
@@ -248,6 +258,17 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype
         OCV_LAPACK_FUNC(dgesdd)(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu,
                 (double*)vt, &ldv, (double*)&buffer[0], &lwork, &iworkBuf[0], info);
 
+    // Make sure MSAN sees the memory as having been written.
+    // MSAN does not think it has been written because a different language was called.
+    CV_ANNOTATE_MEMORY_IS_INITIALIZED(a, a_step * n);
+    CV_ANNOTATE_MEMORY_IS_INITIALIZED(buffer, sizeof(fptype) * (lwork + 1));
+    if (u)
+      CV_ANNOTATE_MEMORY_IS_INITIALIZED(u, u_step * m);
+    if (vt)
+      CV_ANNOTATE_MEMORY_IS_INITIALIZED(vt, v_step * n);
+    if (w)
+      CV_ANNOTATE_MEMORY_IS_INITIALIZED(w, sizeof(fptype) * std::min(m, n));
+
     if(!(flags & CV_HAL_SVD_NO_UV))
         transpose_square_inplace(vt, ldv, n);
 
@@ -359,6 +380,7 @@ lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_ste
             dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)buffer, &lwork, info);
     }
 
+    CV_ANNOTATE_MEMORY_IS_INITIALIZED(info, sizeof(int));
     if (m == n)
         transpose_square_inplace(a, lda, m);
     else
diff --git a/modules/core/src/kmeans.cpp b/modules/core/src/kmeans.cpp
index 3e72ddf6a4..c7a03c814b 100644
--- a/modules/core/src/kmeans.cpp
+++ b/modules/core/src/kmeans.cpp
@@ -240,7 +240,7 @@ double cv::kmeans( InputArray _data, int K,
 
     attempts = std::max(attempts, 1);
     CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 );
-    CV_CheckGE(N, K, "Number of clusters should be more than number of elements");
+    CV_CheckGE(N, K, "There can't be more clusters than elements");
 
     Mat data(N, dims, CV_32F, data0.ptr(), isrow ? dims * sizeof(float) : static_cast<size_t>(data0.step));
 
diff --git a/modules/core/src/matmul.dispatch.cpp b/modules/core/src/matmul.dispatch.cpp
index 92e44f45c9..52200f097c 100644
--- a/modules/core/src/matmul.dispatch.cpp
+++ b/modules/core/src/matmul.dispatch.cpp
@@ -804,7 +804,7 @@ void calcCovarMatrix( InputArray _src, OutputArray _covar, InputOutputArray _mea
     else
     {
         ctype = std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), CV_32F);
-        reduce( _src, _mean, takeRows ? 0 : 1, CV_REDUCE_AVG, ctype );
+        reduce( _src, _mean, takeRows ? 0 : 1, REDUCE_AVG, ctype );
         mean = _mean.getMat();
     }
 
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index 1729862cb7..6a381c15a0 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -176,27 +176,23 @@ public:
     }
 };
 
-namespace
+static
+MatAllocator*& getDefaultAllocatorMatRef()
 {
-    MatAllocator* volatile g_matAllocator = NULL;
+    static MatAllocator* g_matAllocator = Mat::getStdAllocator();
+    return g_matAllocator;
 }
 
 MatAllocator* Mat::getDefaultAllocator()
 {
-    if (g_matAllocator == NULL)
-    {
-        cv::AutoLock lock(cv::getInitializationMutex());
-        if (g_matAllocator == NULL)
-        {
-            g_matAllocator = getStdAllocator();
-        }
-    }
-    return g_matAllocator;
+    return getDefaultAllocatorMatRef();
 }
+
 void Mat::setDefaultAllocator(MatAllocator* allocator)
 {
-    g_matAllocator = allocator;
+    getDefaultAllocatorMatRef() = allocator;
 }
+
 MatAllocator* Mat::getStdAllocator()
 {
     CV_SINGLETON_LAZY_INIT(MatAllocator, new StdMatAllocator())
@@ -269,7 +265,7 @@ void setSize( Mat& m, int _dims, const int* _sz, const size_t* _steps, bool auto
         else if( autoSteps )
         {
             m.step.p[i] = total;
-            int64 total1 = (int64)total*s;
+            uint64 total1 = (uint64)total*s;
             if( (uint64)total1 != (size_t)total1 )
                 CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" );
             total = (size_t)total1;
diff --git a/modules/core/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp
index 62e92fd5d3..f9a50cd0ee 100644
--- a/modules/core/src/matrix_operations.cpp
+++ b/modules/core/src/matrix_operations.cpp
@@ -616,7 +616,7 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
     if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
         return false;
 
-    if (op == CV_REDUCE_AVG)
+    if (op == REDUCE_AVG)
     {
         if (sdepth < CV_32S && ddepth < CV_32S)
             ddepth = CV_32S;
@@ -654,7 +654,7 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
         _dst.create(dsize, dtype);
         UMat dst = _dst.getUMat();
 
-        if (op0 == CV_REDUCE_AVG)
+        if (op0 == REDUCE_AVG)
             k.args(ocl::KernelArg::ReadOnly(src),
                       ocl::KernelArg::WriteOnlyNoSize(dst), 1.0f / src.cols);
         else
@@ -690,7 +690,7 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
         ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src),
                 temparg = ocl::KernelArg::WriteOnlyNoSize(dst);
 
-        if (op0 == CV_REDUCE_AVG)
+        if (op0 == REDUCE_AVG)
             k.args(srcarg, temparg, 1.0f / (dim == 0 ? src.rows : src.cols));
         else
             k.args(srcarg, temparg);
@@ -717,8 +717,8 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
     int ddepth = CV_MAT_DEPTH(dtype);
 
     CV_Assert( cn == CV_MAT_CN(dtype) );
-    CV_Assert( op == CV_REDUCE_SUM || op == CV_REDUCE_MAX ||
-               op == CV_REDUCE_MIN || op == CV_REDUCE_AVG );
+    CV_Assert( op == REDUCE_SUM || op == REDUCE_MAX ||
+               op == REDUCE_MIN || op == REDUCE_AVG );
 
     CV_OCL_RUN(_dst.isUMat(),
                ocl_reduce(_src, _dst, dim, op, op0, stype, dtype))
@@ -732,9 +732,9 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
     _dst.create(dim == 0 ? 1 : src.rows, dim == 0 ? src.cols : 1, dtype);
     Mat dst = _dst.getMat(), temp = dst;
 
-    if( op == CV_REDUCE_AVG )
+    if( op == REDUCE_AVG )
     {
-        op = CV_REDUCE_SUM;
+        op = REDUCE_SUM;
         if( sdepth < CV_32S && ddepth < CV_32S )
         {
             temp.create(dst.rows, dst.cols, CV_32SC(cn));
@@ -745,7 +745,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
     ReduceFunc func = 0;
     if( dim == 0 )
     {
-        if( op == CV_REDUCE_SUM )
+        if( op == REDUCE_SUM )
         {
             if(sdepth == CV_8U && ddepth == CV_32S)
                 func = GET_OPTIMIZED(reduceSumR8u32s);
@@ -768,7 +768,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
             else if(sdepth == CV_64F && ddepth == CV_64F)
                 func = reduceSumR64f64f;
         }
-        else if(op == CV_REDUCE_MAX)
+        else if(op == REDUCE_MAX)
         {
             if(sdepth == CV_8U && ddepth == CV_8U)
                 func = GET_OPTIMIZED(reduceMaxR8u);
@@ -781,7 +781,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
             else if(sdepth == CV_64F && ddepth == CV_64F)
                 func = reduceMaxR64f;
         }
-        else if(op == CV_REDUCE_MIN)
+        else if(op == REDUCE_MIN)
         {
             if(sdepth == CV_8U && ddepth == CV_8U)
                 func = GET_OPTIMIZED(reduceMinR8u);
@@ -797,7 +797,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
     }
     else
     {
-        if(op == CV_REDUCE_SUM)
+        if(op == REDUCE_SUM)
         {
             if(sdepth == CV_8U && ddepth == CV_32S)
                 func = GET_OPTIMIZED(reduceSumC8u32s);
@@ -820,7 +820,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
             else if(sdepth == CV_64F && ddepth == CV_64F)
                 func = reduceSumC64f64f;
         }
-        else if(op == CV_REDUCE_MAX)
+        else if(op == REDUCE_MAX)
         {
             if(sdepth == CV_8U && ddepth == CV_8U)
                 func = GET_OPTIMIZED(reduceMaxC8u);
@@ -833,7 +833,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
             else if(sdepth == CV_64F && ddepth == CV_64F)
                 func = reduceMaxC64f;
         }
-        else if(op == CV_REDUCE_MIN)
+        else if(op == REDUCE_MIN)
         {
             if(sdepth == CV_8U && ddepth == CV_8U)
                 func = GET_OPTIMIZED(reduceMinC8u);
@@ -854,7 +854,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype)
 
     func( src, temp );
 
-    if( op0 == CV_REDUCE_AVG )
+    if( op0 == REDUCE_AVG )
         temp.convertTo(dst, dst.type(), 1./(dim == 0 ? src.rows : src.cols));
 }
 
@@ -940,8 +940,8 @@ static bool ipp_sort(const Mat& src, Mat& dst, int flags)
 {
     CV_INSTRUMENT_REGION_IPP();
 
-    bool        sortRows        = (flags & 1) == CV_SORT_EVERY_ROW;
-    bool        sortDescending  = (flags & CV_SORT_DESCENDING) != 0;
+    bool        sortRows        = (flags & 1) == SORT_EVERY_ROW;
+    bool        sortDescending  = (flags & SORT_DESCENDING) != 0;
     bool        inplace         = (src.data == dst.data);
     int         depth           = src.depth();
     IppDataType type            = ippiGetDataType(depth);
diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp
index 684a1a2ee3..2c66a120aa 100644
--- a/modules/core/src/parallel.cpp
+++ b/modules/core/src/parallel.cpp
@@ -153,6 +153,9 @@
 
 #include "opencv2/core/detail/exception_ptr.hpp"  // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+#include <opencv2/core/utils/fp_control.private.hpp>
+
 using namespace cv;
 
 namespace cv {
@@ -203,6 +206,9 @@ namespace {
 
             // propagate main thread state
             rng = cv::theRNG();
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
+            details::saveFPDenormalsState(fp_denormals_base_state);
+#endif
 
 #ifdef OPENCV_TRACE
             traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
@@ -283,6 +289,11 @@ namespace {
                 }
             }
         }
+
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
+        details::FPDenormalsModeState fp_denormals_base_state;
+#endif
+
     private:
         ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
         ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
@@ -319,6 +330,9 @@ namespace {
 
             // propagate main thread state
             cv::theRNG() = ctx.rng;
+#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
+            FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state);
+#endif
 
             cv::Range r;
             cv::Range wholeRange = ctx.wholeRange;
diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp
index ae6a5a04fe..6789c78e9d 100644
--- a/modules/core/src/persistence.cpp
+++ b/modules/core/src/persistence.cpp
@@ -9,6 +9,8 @@
 #include <unordered_map>
 #include <iterator>
 
+#include <opencv2/core/utils/logger.hpp>
+
 namespace cv
 {
 
@@ -499,21 +501,29 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char
         if (!isGZ) {
             file = fopen(filename.c_str(), !write_mode ? "rt" : !append ? "wt" : "a+t");
             if (!file)
+            {
+                CV_LOG_ERROR(NULL, "Can't open file: '" << filename << "' in " << (!write_mode ? "read" : !append ? "write" : "append") << " mode");
                 return false;
+            }
         } else {
 #if USE_ZLIB
             char mode[] = {write_mode ? 'w' : 'r', 'b', compression ? compression : '3', '\0'};
             gzfile = gzopen(filename.c_str(), mode);
             if (!gzfile)
+            {
+                CV_LOG_ERROR(NULL, "Can't open archive: '" << filename << "' mode=" << mode);
                 return false;
+            }
 #else
             CV_Error(cv::Error::StsNotImplemented, "There is no compressed file storage support in this configuration");
 #endif
         }
     }
 
+    // FIXIT release() must do that, use CV_Assert() here instead
     roots.clear();
     fs_data.clear();
+
     wrap_margin = 71;
     fmt = FileStorage::FORMAT_AUTO;
 
@@ -616,14 +626,14 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char
                 puts("\n");
             }
 
-            emitter = createXMLEmitter(this);
+            emitter_do_not_use_direct_dereference = createXMLEmitter(this);
         } else if (fmt == FileStorage::FORMAT_YAML) {
             if (!append)
                 puts("%YAML:1.0\n---\n");
             else
                 puts("...\n---\n");
 
-            emitter = createYAMLEmitter(this);
+            emitter_do_not_use_direct_dereference = createYAMLEmitter(this);
         } else {
             CV_Assert(fmt == FileStorage::FORMAT_JSON);
             if (!append)
@@ -653,7 +663,7 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char
                 }
             }
             write_stack.back().indent = 4;
-            emitter = createJSONEmitter(this);
+            emitter_do_not_use_direct_dereference = createJSONEmitter(this);
         }
         is_opened = true;
     } else {
@@ -701,20 +711,20 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char
 
             switch (fmt) {
                 case FileStorage::FORMAT_XML:
-                    parser = createXMLParser(this);
+                    parser_do_not_use_direct_dereference = createXMLParser(this);
                     break;
                 case FileStorage::FORMAT_YAML:
-                    parser = createYAMLParser(this);
+                    parser_do_not_use_direct_dereference = createYAMLParser(this);
                     break;
                 case FileStorage::FORMAT_JSON:
-                    parser = createJSONParser(this);
+                    parser_do_not_use_direct_dereference = createJSONParser(this);
                     break;
                 default:
-                    parser = Ptr<FileStorageParser>();
+                    parser_do_not_use_direct_dereference = Ptr<FileStorageParser>();
             }
 
-            if (!parser.empty()) {
-                ok = parser->parse(ptr);
+            if (!parser_do_not_use_direct_dereference.empty()) {
+                ok = getParser().parse(ptr);
                 if (ok) {
                     finalizeCollection(root_nodes);
 
@@ -728,7 +738,9 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char
                 }
             }
         }
-        catch (...) {
+        catch (...)
+        {
+            // FIXIT log error message
             is_opened = true;
             release();
             throw;
@@ -926,7 +938,7 @@ void FileStorage::Impl::endWriteStruct() {
     if (fmt == FileStorage::FORMAT_JSON && !FileNode::isFlow(current_struct.flags) && write_stack.size() > 1)
         current_struct.indent = write_stack[write_stack.size() - 2].indent;
 
-    emitter->endWriteStruct(current_struct);
+    getEmitter().endWriteStruct(current_struct);
 
     write_stack.pop_back();
     if (!write_stack.empty())
@@ -945,7 +957,7 @@ void FileStorage::Impl::startWriteStruct_helper(const char *key, int struct_flag
     if (type_name && type_name[0] == '\0')
         type_name = 0;
 
-    FStructData s = emitter->startWriteStruct(write_stack.back(), key, struct_flags, type_name);
+    FStructData s = getEmitter().startWriteStruct(write_stack.back(), key, struct_flags, type_name);
 
     write_stack.push_back(s);
     size_t write_stack_size = write_stack.size();
@@ -956,7 +968,7 @@ void FileStorage::Impl::startWriteStruct_helper(const char *key, int struct_flag
         flush();
 
     if (fmt == FileStorage::FORMAT_JSON && type_name && type_name[0] && FileNode::isMap(struct_flags)) {
-        emitter->write("type_id", type_name, false);
+        getEmitter().write("type_id", type_name, false);
     }
 }
 
@@ -997,7 +1009,7 @@ void FileStorage::Impl::startWriteStruct(const char *key, int struct_flags,
 
 void FileStorage::Impl::writeComment(const char *comment, bool eol_comment) {
     CV_Assert(write_mode);
-    emitter->writeComment(comment, eol_comment);
+    getEmitter().writeComment(comment, eol_comment);
 }
 
 void FileStorage::Impl::startNextStream() {
@@ -1006,7 +1018,7 @@ void FileStorage::Impl::startNextStream() {
         while (!write_stack.empty())
             endWriteStruct();
         flush();
-        emitter->startNextStream();
+        getEmitter().startNextStream();
         empty_stream = true;
         write_stack.push_back(FStructData("", FileNode::EMPTY, 0));
         bufofs = 0;
@@ -1015,17 +1027,17 @@ void FileStorage::Impl::startNextStream() {
 
 void FileStorage::Impl::write(const String &key, int value) {
     CV_Assert(write_mode);
-    emitter->write(key.c_str(), value);
+    getEmitter().write(key.c_str(), value);
 }
 
 void FileStorage::Impl::write(const String &key, double value) {
     CV_Assert(write_mode);
-    emitter->write(key.c_str(), value);
+    getEmitter().write(key.c_str(), value);
 }
 
 void FileStorage::Impl::write(const String &key, const String &value) {
     CV_Assert(write_mode);
-    emitter->write(key.c_str(), value.c_str(), false);
+    getEmitter().write(key.c_str(), value.c_str(), false);
 }
 
 void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, size_t len) {
@@ -1111,7 +1123,7 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s
                         return;
                 }
 
-                emitter->writeScalar(0, ptr);
+                getEmitter().writeScalar(0, ptr);
             }
 
             offset = (int) (data - data0);
@@ -1597,8 +1609,8 @@ FileStorage::Impl::Base64Decoder::Base64Decoder() {
     eos = true;
 }
 
-void FileStorage::Impl::Base64Decoder::init(Ptr<FileStorageParser> &_parser, char *_ptr, int _indent) {
-    parser = _parser;
+void FileStorage::Impl::Base64Decoder::init(const Ptr<FileStorageParser> &_parser, char *_ptr, int _indent) {
+    parser_do_not_use_direct_dereference = _parser;
     ptr = _ptr;
     indent = _indent;
     encoded.clear();
@@ -1641,9 +1653,9 @@ bool FileStorage::Impl::Base64Decoder::readMore(int needed) {
     decoded.resize(sz);
     ofs = 0;
 
-    CV_Assert(!parser.empty() && ptr);
+    CV_Assert(ptr);
     char *beg = 0, *end = 0;
-    bool ok = parser->getBase64Row(ptr, indent, beg, end);
+    bool ok = getParser().getBase64Row(ptr, indent, beg, end);
     ptr = end;
     std::copy(beg, end, std::back_inserter(encoded));
     totalchars += end - beg;
@@ -1730,7 +1742,7 @@ char *FileStorage::Impl::Base64Decoder::getPtr() const { return ptr; }
 char *FileStorage::Impl::parseBase64(char *ptr, int indent, FileNode &collection) {
     const int BASE64_HDR_SIZE = 24;
     char dt[BASE64_HDR_SIZE + 1] = {0};
-    base64decoder.init(parser, ptr, indent);
+    base64decoder.init(parser_do_not_use_direct_dereference, ptr, indent);
 
     int i, k;
 
diff --git a/modules/core/src/persistence_impl.hpp b/modules/core/src/persistence_impl.hpp
index 4ea2dc3502..1c261ce772 100644
--- a/modules/core/src/persistence_impl.hpp
+++ b/modules/core/src/persistence_impl.hpp
@@ -139,7 +139,7 @@ public:
     {
     public:
         Base64Decoder();
-        void init(Ptr<FileStorageParser>& _parser, char* _ptr, int _indent);
+        void init(const Ptr<FileStorageParser>& _parser, char* _ptr, int _indent);
 
         bool readMore(int needed);
 
@@ -155,7 +155,13 @@ public:
         char* getPtr() const;
     protected:
 
-        Ptr<FileStorageParser> parser;
+        Ptr<FileStorageParser> parser_do_not_use_direct_dereference;
+        FileStorageParser& getParser() const
+        {
+            if (!parser_do_not_use_direct_dereference)
+                CV_Error(Error::StsNullPtr, "Parser is not available");
+            return *parser_do_not_use_direct_dereference;
+        }
         char* ptr;
         int indent;
         std::vector<char> encoded;
@@ -205,8 +211,20 @@ public:
 
     std::deque<char> outbuf;
 
-    Ptr<FileStorageEmitter> emitter;
-    Ptr<FileStorageParser> parser;
+    Ptr<FileStorageEmitter> emitter_do_not_use_direct_dereference;
+    FileStorageEmitter& getEmitter()
+    {
+        if (!emitter_do_not_use_direct_dereference)
+            CV_Error(Error::StsNullPtr, "Emitter is not available");
+        return *emitter_do_not_use_direct_dereference;
+    }
+    Ptr<FileStorageParser> parser_do_not_use_direct_dereference;
+    FileStorageParser& getParser() const
+    {
+        if (!parser_do_not_use_direct_dereference)
+            CV_Error(Error::StsNullPtr, "Parser is not available");
+        return *parser_do_not_use_direct_dereference;
+    }
     Base64Decoder base64decoder;
     base64::Base64Writer* base64_writer;
 
@@ -228,4 +246,4 @@ public:
 
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index 5682c3d008..84f0039dc9 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -55,6 +55,9 @@
 
 #include <opencv2/core/utils/filesystem.private.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+#include <opencv2/core/utils/fp_control.private.hpp>
+
 #ifndef OPENCV_WITH_THREAD_SANITIZER
   #if defined(__clang__) && defined(__has_feature)
   #if __has_feature(thread_sanitizer)
@@ -630,7 +633,7 @@ struct HWFeatures
             }
         }
     #elif (defined __ppc64__ || defined __PPC64__) && defined __FreeBSD__
-        unsigned int hwcap = 0;
+        unsigned long hwcap = 0;
         elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
         if (hwcap & PPC_FEATURE_HAS_VSX) {
             elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
@@ -2720,6 +2723,82 @@ void setUseIPP_NotExact(bool flag)
 
 } // namespace ipp
 
+
+namespace details {
+
+#if OPENCV_IMPL_FP_HINTS_X86
+#ifndef _MM_DENORMALS_ZERO_ON  // requires pmmintrin.h (SSE3)
+#define _MM_DENORMALS_ZERO_ON 0x0040
+#endif
+#ifndef _MM_DENORMALS_ZERO_MASK  // requires pmmintrin.h (SSE3)
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#endif
+#endif
+
+void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state)
+{
+#if OPENCV_IMPL_FP_HINTS_X86
+    unsigned mask = _MM_FLUSH_ZERO_MASK;
+    unsigned value = ignore ? _MM_FLUSH_ZERO_ON : 0;
+    if (featuresEnabled.have[CPU_SSE3])
+    {
+        mask |= _MM_DENORMALS_ZERO_MASK;
+        value |= ignore ? _MM_DENORMALS_ZERO_ON : 0;
+    }
+    const unsigned old_flags = _mm_getcsr();
+    const unsigned old_value = old_flags & mask;
+    unsigned flags = (old_flags & ~mask) | value;
+    CV_LOG_DEBUG(NULL, "core: update FP mxcsr flags = " << cv::format("0x%08x", flags));
+    // save state
+    state.reserved[0] = (uint32_t)mask;
+    state.reserved[1] = (uint32_t)old_value;
+    _mm_setcsr(flags);
+#else
+    CV_UNUSED(ignore); CV_UNUSED(state);
+#endif
+}
+
+int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state)
+{
+#if OPENCV_IMPL_FP_HINTS_X86
+    unsigned mask = _MM_FLUSH_ZERO_MASK;
+    if (featuresEnabled.have[CPU_SSE3])
+    {
+        mask |= _MM_DENORMALS_ZERO_MASK;
+    }
+    const unsigned old_flags = _mm_getcsr();
+    const unsigned old_value = old_flags & mask;
+    // save state
+    state.reserved[0] = (uint32_t)mask;
+    state.reserved[1] = (uint32_t)old_value;
+    return 2;
+#else
+    CV_UNUSED(state);
+    return 0;
+#endif
+}
+
+bool restoreFPDenormalsState(const FPDenormalsModeState& state)
+{
+#if OPENCV_IMPL_FP_HINTS_X86
+    const unsigned mask = (unsigned)state.reserved[0];
+    CV_DbgAssert(mask != 0); // invalid state (ensure that state is properly saved earlier)
+    const unsigned value = (unsigned)state.reserved[1];
+    CV_DbgCheck((int)value, value == (value & mask), "invalid SSE FP state");
+    const unsigned old_flags = _mm_getcsr();
+    unsigned flags = (old_flags & ~mask) | value;
+    CV_LOG_DEBUG(NULL, "core: restore FP mxcsr flags = " << cv::format("0x%08x", flags));
+    _mm_setcsr(flags);
+    return true;
+#else
+    CV_UNUSED(state);
+    return false;
+#endif
+}
+
+}  // namespace details
+
+
 } // namespace cv
 
 /* End of file. */
diff --git a/modules/core/src/va_intel.cpp b/modules/core/src/va_intel.cpp
index acc502d0f5..cf974298e5 100644
--- a/modules/core/src/va_intel.cpp
+++ b/modules/core/src/va_intel.cpp
@@ -606,10 +606,36 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
         if (status != VA_STATUS_SUCCESS)
             CV_Error(cv::Error::StsError, "VA-API: vaSyncSurface failed");
 
+        bool indirect_buffer = false;
         VAImage image;
         status = vaDeriveImage(display, surface, &image);
-        if (status != VA_STATUS_SUCCESS)
-            CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed");
+        if (status != VA_STATUS_SUCCESS){
+            //try vaCreateImage + vaPutImage
+            //pick a format
+            indirect_buffer = true;
+            int num_formats = vaMaxNumImageFormats(display);
+            if (num_formats <= 0)
+                CV_Error(cv::Error::StsError, "VA-API: vaMaxNumImageFormats failed");
+            std::vector<VAImageFormat> fmt_list(num_formats);
+
+            status = vaQueryImageFormats(display, fmt_list.data(), &num_formats);
+            if (status != VA_STATUS_SUCCESS)
+                CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats failed");
+            VAImageFormat *selected_format = nullptr;
+            for (auto &fmt : fmt_list){
+                if (fmt.fourcc == VA_FOURCC_NV12 || fmt.fourcc == VA_FOURCC_YV12){
+                    selected_format = &fmt;
+                    break;
+                }
+            }
+            if (selected_format == nullptr)
+                CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats did not return a supported format");
+
+            status = vaCreateImage(display, selected_format, size.width, size.height, &image);
+            if (status != VA_STATUS_SUCCESS)
+                CV_Error(cv::Error::StsError, "VA-API: vaCreateImage failed");
+
+        }
 
         unsigned char* buffer = 0;
         status = vaMapBuffer(display, image.buf, (void **)&buffer);
@@ -627,6 +653,14 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
         if (status != VA_STATUS_SUCCESS)
             CV_Error(cv::Error::StsError, "VA-API: vaUnmapBuffer failed");
 
+        if (indirect_buffer){
+            status = vaPutImage(display, surface, image.image_id, 0, 0, size.width, size.height, 0, 0, size.width, size.height);
+            if (status != VA_STATUS_SUCCESS){
+                vaDestroyImage(display, image.image_id);
+                CV_Error(cv::Error::StsError, "VA-API: vaPutImage failed");
+            }
+        }
+
         status = vaDestroyImage(display, image.image_id);
         if (status != VA_STATUS_SUCCESS)
             CV_Error(cv::Error::StsError, "VA-API: vaDestroyImage failed");
@@ -711,8 +745,37 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out
 
         VAImage image;
         status = vaDeriveImage(display, surface, &image);
-        if (status != VA_STATUS_SUCCESS)
-            CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed");
+        if (status != VA_STATUS_SUCCESS){
+            //try vaCreateImage + vaGetImage
+            //pick a format
+            int num_formats = vaMaxNumImageFormats(display);
+            if (num_formats <= 0)
+                CV_Error(cv::Error::StsError, "VA-API: vaMaxNumImageFormats failed");
+            std::vector<VAImageFormat> fmt_list(num_formats);
+
+            status = vaQueryImageFormats(display, fmt_list.data(), &num_formats);
+            if (status != VA_STATUS_SUCCESS)
+                CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats failed");
+            VAImageFormat *selected_format = nullptr;
+            for (auto &fmt : fmt_list){
+                if (fmt.fourcc == VA_FOURCC_NV12 || fmt.fourcc == VA_FOURCC_YV12){
+                    selected_format = &fmt;
+                    break;
+                }
+            }
+            if (selected_format == nullptr)
+                CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats did not return a supported format");
+
+            status = vaCreateImage(display, selected_format, size.width, size.height, &image);
+            if (status != VA_STATUS_SUCCESS)
+                CV_Error(cv::Error::StsError, "VA-API: vaCreateImage failed");
+
+            status = vaGetImage(display, surface, 0, 0, size.width, size.height, image.image_id);
+            if (status != VA_STATUS_SUCCESS){
+                vaDestroyImage(display, image.image_id);
+                CV_Error(cv::Error::StsError, "VA-API: vaPutImage failed");
+            }
+        }
 
         unsigned char* buffer = 0;
         status = vaMapBuffer(display, image.buf, (void **)&buffer);
diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp
index e6cb82919a..20e3a17755 100644
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@@ -1819,8 +1819,8 @@ OCL_TEST_P(ReduceSum, Mat)
     {
         generateTestData();
 
-        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_SUM, dtype));
-        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_SUM, dtype));
+        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_SUM, dtype));
+        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_SUM, dtype));
 
         double eps = ddepth <= CV_32S ? 1 : 7e-4;
         OCL_EXPECT_MATS_NEAR(dst, eps);
@@ -1835,8 +1835,8 @@ OCL_TEST_P(ReduceMax, Mat)
     {
         generateTestData();
 
-        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_MAX, dtype));
-        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_MAX, dtype));
+        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_MAX, dtype));
+        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_MAX, dtype));
 
         OCL_EXPECT_MATS_NEAR(dst, 0);
     }
@@ -1850,8 +1850,8 @@ OCL_TEST_P(ReduceMin, Mat)
     {
         generateTestData();
 
-        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_MIN, dtype));
-        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_MIN, dtype));
+        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_MIN, dtype));
+        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_MIN, dtype));
 
         OCL_EXPECT_MATS_NEAR(dst, 0);
     }
@@ -1865,8 +1865,8 @@ OCL_TEST_P(ReduceAvg, Mat)
     {
         generateTestData();
 
-        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_AVG, dtype));
-        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_AVG, dtype));
+        OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_AVG, dtype));
+        OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_AVG, dtype));
 
         double eps = ddepth <= CV_32S ? 1 : 6e-6;
         OCL_EXPECT_MATS_NEAR(dst, eps);
diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp
index 3712be9f2e..4218cb9297 100644
--- a/modules/core/test/test_io.cpp
+++ b/modules/core/test/test_io.cpp
@@ -1918,5 +1918,29 @@ TEST(Core_InputOutput, FileStorage_16F_json)
     test_20279(fs);
 }
 
+TEST(Core_InputOutput, FileStorage_invalid_path_regression_21448_YAML)
+{
+    FileStorage fs("invalid_path/test.yaml", cv::FileStorage::WRITE);
+    EXPECT_FALSE(fs.isOpened());
+    EXPECT_ANY_THROW(fs.write("K", 1));
+    fs.release();
+}
+
+TEST(Core_InputOutput, FileStorage_invalid_path_regression_21448_XML)
+{
+    FileStorage fs("invalid_path/test.xml", cv::FileStorage::WRITE);
+    EXPECT_FALSE(fs.isOpened());
+    EXPECT_ANY_THROW(fs.write("K", 1));
+    fs.release();
+}
+
+TEST(Core_InputOutput, FileStorage_invalid_path_regression_21448_JSON)
+{
+    FileStorage fs("invalid_path/test.json", cv::FileStorage::WRITE);
+    EXPECT_FALSE(fs.isOpened());
+    EXPECT_ANY_THROW(fs.write("K", 1));
+    fs.release();
+}
+
 
 }} // namespace
diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp
index bd0f3897d2..0c54b55ac6 100644
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@@ -93,7 +93,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
 {
     int srcType = src.type();
     bool support = false;
-    if( opType == CV_REDUCE_SUM || opType == CV_REDUCE_AVG )
+    if( opType == REDUCE_SUM || opType == REDUCE_AVG )
     {
         if( srcType == CV_8U && (dstType == CV_32S || dstType == CV_32F || dstType == CV_64F) )
             support = true;
@@ -106,7 +106,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
         if( srcType == CV_64F && dstType == CV_64F)
             support = true;
     }
-    else if( opType == CV_REDUCE_MAX )
+    else if( opType == REDUCE_MAX )
     {
         if( srcType == CV_8U && dstType == CV_8U )
             support = true;
@@ -115,7 +115,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
         if( srcType == CV_64F && dstType == CV_64F )
             support = true;
     }
-    else if( opType == CV_REDUCE_MIN )
+    else if( opType == REDUCE_MIN )
     {
         if( srcType == CV_8U && dstType == CV_8U)
             support = true;
@@ -128,7 +128,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
         return cvtest::TS::OK;
 
     double eps = 0.0;
-    if ( opType == CV_REDUCE_SUM || opType == CV_REDUCE_AVG )
+    if ( opType == REDUCE_SUM || opType == REDUCE_AVG )
     {
         if ( dstType == CV_32F )
             eps = 1.e-5;
@@ -152,10 +152,10 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat
     if( check )
     {
         char msg[100];
-        const char* opTypeStr = opType == CV_REDUCE_SUM ? "CV_REDUCE_SUM" :
-        opType == CV_REDUCE_AVG ? "CV_REDUCE_AVG" :
-        opType == CV_REDUCE_MAX ? "CV_REDUCE_MAX" :
-        opType == CV_REDUCE_MIN ? "CV_REDUCE_MIN" : "unknown operation type";
+        const char* opTypeStr = opType == REDUCE_SUM ? "REDUCE_SUM" :
+        opType == REDUCE_AVG ? "REDUCE_AVG" :
+        opType == REDUCE_MAX ? "REDUCE_MAX" :
+        opType == REDUCE_MIN ? "REDUCE_MIN" : "unknown operation type";
         string srcTypeStr, dstTypeStr;
         getMatTypeStr( src.type(), srcTypeStr );
         getMatTypeStr( dstType, dstTypeStr );
@@ -195,19 +195,19 @@ int Core_ReduceTest::checkCase( int srcType, int dstType, int dim, Size sz )
         CV_Assert( 0 );
 
     // 1. sum
-    tempCode = checkOp( src, dstType, CV_REDUCE_SUM, sum, dim );
+    tempCode = checkOp( src, dstType, REDUCE_SUM, sum, dim );
     code = tempCode != cvtest::TS::OK ? tempCode : code;
 
     // 2. avg
-    tempCode = checkOp( src, dstType, CV_REDUCE_AVG, avg, dim );
+    tempCode = checkOp( src, dstType, REDUCE_AVG, avg, dim );
     code = tempCode != cvtest::TS::OK ? tempCode : code;
 
     // 3. max
-    tempCode = checkOp( src, dstType, CV_REDUCE_MAX, max, dim );
+    tempCode = checkOp( src, dstType, REDUCE_MAX, max, dim );
     code = tempCode != cvtest::TS::OK ? tempCode : code;
 
     // 4. min
-    tempCode = checkOp( src, dstType, CV_REDUCE_MIN, min, dim );
+    tempCode = checkOp( src, dstType, REDUCE_MIN, min, dim );
     code = tempCode != cvtest::TS::OK ? tempCode : code;
 
     return code;
@@ -315,7 +315,7 @@ TEST(Core_PCA, accuracy)
     Mat rBackPrjTestPoints = rPCA.backProject( rPrjTestPoints );
 
     Mat avg(1, sz.width, CV_32FC1 );
-    cv::reduce( rPoints, avg, 0, CV_REDUCE_AVG );
+    cv::reduce( rPoints, avg, 0, REDUCE_AVG );
     Mat Q = rPoints - repeat( avg, rPoints.rows, 1 ), Qt = Q.t(), eval, evec;
     Q = Qt * Q;
     Q = Q /(float)rPoints.rows;
@@ -1559,10 +1559,10 @@ TEST(Reduce, regression_should_fail_bug_4594)
     cv::Mat src = cv::Mat::eye(4, 4, CV_8U);
     std::vector<int> dst;
 
-    EXPECT_THROW(cv::reduce(src, dst, 0, CV_REDUCE_MIN, CV_32S), cv::Exception);
-    EXPECT_THROW(cv::reduce(src, dst, 0, CV_REDUCE_MAX, CV_32S), cv::Exception);
-    EXPECT_NO_THROW(cv::reduce(src, dst, 0, CV_REDUCE_SUM, CV_32S));
-    EXPECT_NO_THROW(cv::reduce(src, dst, 0, CV_REDUCE_AVG, CV_32S));
+    EXPECT_THROW(cv::reduce(src, dst, 0, REDUCE_MIN, CV_32S), cv::Exception);
+    EXPECT_THROW(cv::reduce(src, dst, 0, REDUCE_MAX, CV_32S), cv::Exception);
+    EXPECT_NO_THROW(cv::reduce(src, dst, 0, REDUCE_SUM, CV_32S));
+    EXPECT_NO_THROW(cv::reduce(src, dst, 0, REDUCE_AVG, CV_32S));
 }
 
 TEST(Mat, push_back_vector)
diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp
index 0b083b3e6d..c23bf5c7eb 100644
--- a/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@@ -3023,7 +3023,7 @@ TEST(CovariationMatrixVectorOfMatWithMean, accuracy)
     cv::randu(src,cv::Scalar(-128), cv::Scalar(128));
     cv::Mat goldMean;
 
-    cv::reduce(src,goldMean,0 ,CV_REDUCE_AVG, CV_32F);
+    cv::reduce(src,goldMean,0 ,REDUCE_AVG, CV_32F);
 
     cv::calcCovarMatrix(src,gold,goldMean,singleMatFlags,CV_32F);
 
diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp
index d9df475fa6..8ed0afe771 100644
--- a/modules/core/test/test_misc.cpp
+++ b/modules/core/test/test_misc.cpp
@@ -4,6 +4,15 @@
 #include "test_precomp.hpp"
 #include <cmath>
 
+#include "opencv2/core/utils/logger.hpp"
+
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
+#ifdef CV_CXX11
+#include <chrono>
+#include <thread>
+#endif
+
 namespace opencv_test { namespace {
 
 TEST(Core_OutputArrayCreate, _1997)
@@ -243,6 +252,62 @@ TEST(Core_Parallel, propagate_exceptions)
     }, cv::Exception);
 }
 
+class FPDenormalsHintCheckerParallelLoopBody : public cv::ParallelLoopBody
+{
+public:
+    FPDenormalsHintCheckerParallelLoopBody()
+        : isOK(true)
+    {
+        state_values_to_check = cv::details::saveFPDenormalsState(base_state);
+    }
+    ~FPDenormalsHintCheckerParallelLoopBody() {}
+    void operator()(const cv::Range& r) const
+    {
+        CV_UNUSED(r);
+        cv::details::FPDenormalsModeState state;
+        if (cv::details::saveFPDenormalsState(state))
+        {
+            for (int i = 0; i < state_values_to_check; ++i)
+            {
+                if (base_state.reserved[i] != state.reserved[i])
+                {
+                    CV_LOG_ERROR(NULL, cv::format("FP state[%d] mismatch: base=0x%08x thread=0x%08x", i, base_state.reserved[i], state.reserved[i]));
+                    isOK = false;
+                    cv::details::restoreFPDenormalsState(base_state);
+                }
+            }
+        }
+        else
+        {
+            // FP state is not supported
+            // no checks
+        }
+#ifdef CV_CXX11
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+#endif
+    }
+
+    cv::details::FPDenormalsModeState base_state;
+    int state_values_to_check;
+
+    mutable bool isOK;
+};
+
+TEST(Core_Parallel, propagate_fp_denormals_ignore_hint)
+{
+    int nThreads = std::max(1, cv::getNumThreads()) * 3;
+    for (int i = 0; i < 4; ++i)
+    {
+        SCOPED_TRACE(cv::format("Case=%d: FP denormals ignore hint: %s\n", i, ((i & 1) != 0) ? "enable" : "disable"));
+        FPDenormalsIgnoreHintScope fp_denormals_scope((i & 1) != 0);
+        FPDenormalsHintCheckerParallelLoopBody job;
+        ASSERT_NO_THROW({
+            parallel_for_(cv::Range(0, nThreads), job);
+        });
+        EXPECT_TRUE(job.isOK);
+    }
+}
+
 TEST(Core_Version, consistency)
 {
     // this test verifies that OpenCV version loaded in runtime
diff --git a/modules/core/test/test_precomp.hpp b/modules/core/test/test_precomp.hpp
index a82f5cc12c..81ddf45de9 100644
--- a/modules/core/test/test_precomp.hpp
+++ b/modules/core/test/test_precomp.hpp
@@ -6,9 +6,6 @@
 
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/ocl_test.hpp"
-#include "opencv2/core/core_c.h"
-
-#include "opencv2/core/cvdef.h"
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/hal/hal.hpp"
 
diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp
index c323d17c06..a89972762a 100644
--- a/modules/core/test/test_umat.cpp
+++ b/modules/core/test/test_umat.cpp
@@ -1398,8 +1398,8 @@ TEST(UMat, testTempObjects_Mat_issue_8693)
     randu(srcUMat, -1.f, 1.f);
     srcUMat.copyTo(srcMat);
 
-    reduce(srcUMat, srcUMat, 0, CV_REDUCE_SUM);
-    reduce(srcMat, srcMat, 0, CV_REDUCE_SUM);
+    reduce(srcUMat, srcUMat, 0, REDUCE_SUM);
+    reduce(srcMat, srcMat, 0, REDUCE_SUM);
 
     srcUMat.convertTo(srcUMat, CV_64FC1);
     srcMat.convertTo(srcMat, CV_64FC1);
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
index dfc08e8b9b..a9540f1088 100644
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -165,24 +165,13 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
 endif()
 
 set(dnn_runtime_libs "")
-if(INF_ENGINE_TARGET)
-  set(use_nn_builder OFF)
-  if(TARGET inference_engine_nn_builder OR # custom imported target
-     TARGET IE::inference_engine_nn_builder OR # default imported target via InferenceEngineConfig.cmake
-     INF_ENGINE_RELEASE VERSION_LESS "2020000000") # compatibility with older versions on IE
-    set(use_nn_builder ON)
+
+ocv_option(OPENCV_DNN_OPENVINO "Build with OpenVINO support (2021.4+)" (TARGET ocv.3rdparty.openvino))
+if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO)
+  if(NOT HAVE_OPENVINO AND NOT HAVE_NGRAPH)
+    message(FATAL_ERROR "DNN: Inference Engine is not supported without enabled 'nGraph'. Check build configuration.")
   endif()
-  ocv_option(OPENCV_DNN_IE_NN_BUILDER_2019 "Build with Inference Engine NN Builder API support" ${use_nn_builder})  # future: NOT HAVE_NGRAPH
-  if(OPENCV_DNN_IE_NN_BUILDER_2019)
-    message(STATUS "DNN: Enabling Inference Engine NN Builder API support")
-    add_definitions(-DHAVE_DNN_IE_NN_BUILDER_2019=1)
-  endif()
-  list(APPEND dnn_runtime_libs ${INF_ENGINE_TARGET})
-endif()
-if(HAVE_NGRAPH)
-  message(STATUS "DNN: Enabling Inference Engine nGraph API support")
-  add_definitions(-DHAVE_DNN_NGRAPH)
-  list(APPEND dnn_runtime_libs ngraph::ngraph)
+  list(APPEND dnn_runtime_libs ocv.3rdparty.openvino)
 endif()
 
 ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs} ${webnn_srcs})
@@ -193,7 +182,7 @@ ocv_add_accuracy_tests(${dnn_runtime_libs})
 set(perf_path "${CMAKE_CURRENT_LIST_DIR}/perf")
 file(GLOB_RECURSE perf_srcs "${perf_path}/*.cpp")
 file(GLOB_RECURSE perf_hdrs "${perf_path}/*.hpp" "${perf_path}/*.h")
-ocv_add_perf_tests(${INF_ENGINE_TARGET}
+ocv_add_perf_tests(${dnn_runtime_libs}
     FILES test_common "${CMAKE_CURRENT_LIST_DIR}/test/test_common.hpp" "${CMAKE_CURRENT_LIST_DIR}/test/test_common.impl.hpp"
     FILES Src ${perf_srcs}
     FILES Include ${perf_hdrs}
diff --git a/modules/dnn/include/opencv2/dnn/dict.hpp b/modules/dnn/include/opencv2/dnn/dict.hpp
index 463d314bee..059ce9b28e 100644
--- a/modules/dnn/include/opencv2/dnn/dict.hpp
+++ b/modules/dnn/include/opencv2/dnn/dict.hpp
@@ -60,13 +60,13 @@ CV__DNN_INLINE_NS_BEGIN
 struct CV_EXPORTS_W DictValue
 {
     DictValue(const DictValue &r);
-    DictValue(bool i)           : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i ? 1 : 0; }       //!< Constructs integer scalar
-    DictValue(int64 i = 0)      : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
-    CV_WRAP DictValue(int i)    : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
-    DictValue(unsigned p)       : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
-    CV_WRAP DictValue(double p)         : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; }     //!< Constructs floating point scalar
-    CV_WRAP DictValue(const String &s)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< Constructs string scalar
-    DictValue(const char *s)            : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< @overload
+    explicit DictValue(bool i)           : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i ? 1 : 0; }       //!< Constructs integer scalar
+    explicit DictValue(int64 i = 0)      : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
+    CV_WRAP explicit DictValue(int i)    : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
+    explicit DictValue(unsigned p)       : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
+    CV_WRAP explicit DictValue(double p)         : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; }     //!< Constructs floating point scalar
+    CV_WRAP explicit DictValue(const String &s)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< Constructs string scalar
+    explicit DictValue(const char *s)            : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< @overload
 
     template<typename TypeIter>
     static DictValue arrayInt(TypeIter begin, int size);    //!< Constructs integer array
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index d6b29cfcf3..97033a313e 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -134,7 +134,7 @@ CV__DNN_INLINE_NS_BEGIN
     class BackendNode
     {
     public:
-        BackendNode(int backendId);
+        explicit BackendNode(int backendId);
 
         virtual ~BackendNode(); //!< Virtual destructor to make polymorphism.
 
@@ -277,18 +277,18 @@ CV__DNN_INLINE_NS_BEGIN
          * Each layer input and output can be labeled to easily identify them using "%<layer_name%>[.output_name]" notation.
          * This method maps label of input blob to its index into input vector.
          */
-        virtual int inputNameToIndex(String inputName);
+        virtual int inputNameToIndex(String inputName);  // FIXIT const
         /** @brief Returns index of output blob in output array.
          *  @see inputNameToIndex()
          */
-        CV_WRAP virtual int outputNameToIndex(const String& outputName);
+        CV_WRAP virtual int outputNameToIndex(const String& outputName);  // FIXIT const
 
         /**
          * @brief Ask layer if it support specific backend for doing computations.
          * @param[in] backendId computation backend identifier.
          * @see Backend
          */
-        virtual bool supportBackend(int backendId);
+        virtual bool supportBackend(int backendId);  // FIXIT const
 
         /**
          * @brief Returns Halide backend node.
@@ -302,8 +302,6 @@ CV__DNN_INLINE_NS_BEGIN
          */
         virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs);
 
-        virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs);
-
         virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs, const std::vector<Ptr<BackendNode> >& nodes);
 
         virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs);
@@ -495,18 +493,29 @@ CV__DNN_INLINE_NS_BEGIN
         /** @brief Converts string name of the layer to the integer identifier.
          *  @returns id of the layer, or -1 if the layer wasn't found.
          */
-        CV_WRAP int getLayerId(const String &layer);
+        CV_WRAP int getLayerId(const String &layer) const;
 
         CV_WRAP std::vector<String> getLayerNames() const;
 
-        /** @brief Container for strings and integers. */
+        /** @brief Container for strings and integers.
+         *
+         * @deprecated Use getLayerId() with int result.
+         */
         typedef DictValue LayerId;
 
         /** @brief Returns pointer to layer with specified id or name which the network use. */
-        CV_WRAP Ptr<Layer> getLayer(LayerId layerId);
+        CV_WRAP Ptr<Layer> getLayer(int layerId) const;
+        /** @overload
+         *  @deprecated Use int getLayerId(const String &layer)
+         */
+        CV_WRAP inline Ptr<Layer> getLayer(const String& layerName) const { return getLayer(getLayerId(layerName)); }
+        /** @overload
+         *  @deprecated to be removed
+         */
+        CV_WRAP Ptr<Layer> getLayer(const LayerId& layerId) const;
 
         /** @brief Returns pointers to input layers of specific layer. */
-        std::vector<Ptr<Layer> > getLayerInputs(LayerId layerId); // FIXIT: CV_WRAP
+        std::vector<Ptr<Layer> > getLayerInputs(int layerId) const; // FIXIT: CV_WRAP
 
         /** @brief Connects output of the first layer to input of the second layer.
          *  @param outPin descriptor of the first layer output.
@@ -531,6 +540,18 @@ CV__DNN_INLINE_NS_BEGIN
          */
         void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
 
+        /** @brief Registers network output with name
+         *
+         *  Function may create additional 'Identity' layer.
+         *
+         *  @param outputName identifier of the output
+         *  @param layerId identifier of the second layer
+         *  @param outputPort number of the second layer input
+         *
+         *  @returns index of bound layer (the same as layerId or newly created)
+         */
+        int registerOutput(const std::string& outputName, int layerId, int outputPort);
+
         /** @brief Sets outputs names of the network input pseudo layer.
          *
          * Each net always has special own the network input pseudo layer with id=0.
@@ -662,20 +683,26 @@ CV__DNN_INLINE_NS_BEGIN
          *  @note If shape of the new blob differs from the previous shape,
          *  then the following forward pass may fail.
         */
-        CV_WRAP void setParam(LayerId layer, int numParam, const Mat &blob);
+        CV_WRAP void setParam(int layer, int numParam, const Mat &blob);
+        CV_WRAP inline void setParam(const String& layerName, int numParam, const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); }
 
         /** @brief Returns parameter blob of the layer.
          *  @param layer name or id of the layer.
          *  @param numParam index of the layer parameter in the Layer::blobs array.
          *  @see Layer::blobs
          */
-        CV_WRAP Mat getParam(LayerId layer, int numParam = 0);
+        CV_WRAP Mat getParam(int layer, int numParam = 0) const;
+        CV_WRAP inline Mat getParam(const String& layerName, int numParam = 0) const { return getParam(getLayerId(layerName), numParam); }
 
         /** @brief Returns indexes of layers with unconnected outputs.
+         *
+         * FIXIT: Rework API to registerOutput() approach, deprecate this call
          */
         CV_WRAP std::vector<int> getUnconnectedOutLayers() const;
 
         /** @brief Returns names of layers with unconnected outputs.
+         *
+         * FIXIT: Rework API to registerOutput() approach, deprecate this call
          */
         CV_WRAP std::vector<String> getUnconnectedOutLayersNames() const;
 
diff --git a/modules/dnn/include/opencv2/dnn/layer.hpp b/modules/dnn/include/opencv2/dnn/layer.hpp
index 8500599371..a4d167564d 100644
--- a/modules/dnn/include/opencv2/dnn/layer.hpp
+++ b/modules/dnn/include/opencv2/dnn/layer.hpp
@@ -66,6 +66,9 @@ public:
     //! Unregisters registered layer with specified type name. Thread-safe.
     static void unregisterLayer(const String &type);
 
+    //! Check if layer is registered.
+    static bool isLayerRegistered(const std::string& type);
+
     /** @brief Creates instance of registered layer.
      *  @param type type name of creating layer.
      *  @param params parameters which will be used for layer initialization.
diff --git a/modules/dnn/include/opencv2/dnn/shape_utils.hpp b/modules/dnn/include/opencv2/dnn/shape_utils.hpp
index 4c610f6cef..9bbbc806a8 100644
--- a/modules/dnn/include/opencv2/dnn/shape_utils.hpp
+++ b/modules/dnn/include/opencv2/dnn/shape_utils.hpp
@@ -184,7 +184,8 @@ static inline MatShape concat(const MatShape& a, const MatShape& b)
     return c;
 }
 
-static inline std::string toString(const MatShape& shape, const String& name = "")
+template<typename _Tp>
+static inline std::string toString(const std::vector<_Tp>& shape, const String& name = "")
 {
     std::ostringstream ss;
     if (!name.empty())
@@ -195,11 +196,14 @@ static inline std::string toString(const MatShape& shape, const String& name = "
     ss << " ]";
     return ss.str();
 }
-static inline void print(const MatShape& shape, const String& name = "")
+
+template<typename _Tp>
+static inline void print(const std::vector<_Tp>& shape, const String& name = "")
 {
     std::cout << toString(shape, name) << std::endl;
 }
-static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape)
+template<typename _Tp>
+static inline std::ostream& operator<<(std::ostream &out, const std::vector<_Tp>& shape)
 {
     out << toString(shape);
     return out;
diff --git a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
index 333b1bfdd2..b81806ed5a 100644
--- a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
+++ b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
@@ -15,14 +15,18 @@ CV__DNN_INLINE_NS_BEGIN
 
 
 /* Values for 'OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE' parameter */
+/// @deprecated
 #define CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API     "NN_BUILDER"
+/// @deprecated
 #define CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH             "NGRAPH"
 
 /** @brief Returns Inference Engine internal backend API.
  *
  * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros.
  *
- * Default value is controlled through `OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE` runtime parameter (environment variable).
+ * `OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE` runtime parameter (environment variable) is ignored since 4.6.0.
+ *
+ * @deprecated
  */
 CV_EXPORTS_W cv::String getInferenceEngineBackendType();
 
@@ -31,6 +35,8 @@ CV_EXPORTS_W cv::String getInferenceEngineBackendType();
  * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros.
  *
  * @returns previous value of internal backend API
+ *
+ * @deprecated
  */
 CV_EXPORTS_W cv::String setInferenceEngineBackendType(const cv::String& newBackendType);
 
diff --git a/modules/dnn/misc/objc/gen_dict.json b/modules/dnn/misc/objc/gen_dict.json
index e6d561fba0..6072bdfc01 100644
--- a/modules/dnn/misc/objc/gen_dict.json
+++ b/modules/dnn/misc/objc/gen_dict.json
@@ -18,8 +18,12 @@
             "(long)getFLOPS:(NSArray<IntVector*>*)netInputShapes" : { "getFLOPS" : {"name" : "getFLOPSWithNetInputShapes"} },
             "(long)getFLOPS:(int)layerId netInputShape:(IntVector*)netInputShape" : { "getFLOPS" : {"name" : "getFLOPSWithLayerId"} },
             "(long)getFLOPS:(int)layerId netInputShapes:(NSArray<IntVector*>*)netInputShapes" : { "getFLOPS" : {"name" : "getFLOPSWithLayerId"} },
+            "(Layer*)getLayer:(NSString*)layerName" : { "getLayer" : {"name" : "getLayerByName"} },
+            "(Layer*)getLayer:(DictValue*)layerId" : { "getLayer" : {"name" : "getLayerByDictValue"} },
             "(void)getLayersShapes:(IntVector*)netInputShape layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)inLayersShapes outLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShape"} },
-            "(void)getLayersShapes:(NSArray<IntVector*>*)netInputShapes layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)inLayersShapes outLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShapes"} }
+            "(void)getLayersShapes:(NSArray<IntVector*>*)netInputShapes layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)inLayersShapes outLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShapes"} },
+            "(Mat*)getParam:(NSString*)layerName numParam:(int)numParam" : { "getParam" : {"name" : "getParamByName"} },
+            "(void)setParam:(NSString*)layerName numParam:(int)numParam blob:(Mat*)blob" : { "setParam" : {"name" : "setParamByName"} }
         }
     },
     "type_dict": {
diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp
index 7fb64c7c0d..a8d2f28ca6 100644
--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
@@ -53,6 +53,8 @@
 #include "caffe_io.hpp"
 #endif
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 namespace cv {
 namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@@ -88,6 +90,8 @@ MatShape parseBlobShape(const caffe::BlobShape& _input_shape)
 
 class CaffeImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     caffe::NetParameter net;
     caffe::NetParameter netBinary;
 
diff --git a/modules/dnn/src/darknet/darknet_importer.cpp b/modules/dnn/src/darknet/darknet_importer.cpp
index f1269bd979..b5767af405 100644
--- a/modules/dnn/src/darknet/darknet_importer.cpp
+++ b/modules/dnn/src/darknet/darknet_importer.cpp
@@ -51,6 +51,7 @@
 
 #include "darknet_io.hpp"
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
 
 namespace cv {
 namespace dnn {
@@ -61,6 +62,8 @@ namespace
 
 class DarknetImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     darknet::NetParameter net;
 
 public:
diff --git a/modules/dnn/src/debug_utils.cpp b/modules/dnn/src/debug_utils.cpp
index d951205bd8..0e1ba10236 100644
--- a/modules/dnn/src/debug_utils.cpp
+++ b/modules/dnn/src/debug_utils.cpp
@@ -37,11 +37,8 @@ void skipModelImport(bool skip)
 
 void detail::LayerHandler::addMissing(const std::string& name, const std::string& type)
 {
-    cv::AutoLock lock(getLayerFactoryMutex());
-    auto& registeredLayers = getLayerFactoryImpl();
-
     // If we didn't add it, but can create it, it's custom and not missing.
-    if (layers.find(type) == layers.end() && registeredLayers.find(type) != registeredLayers.end())
+    if (!contains(type) && LayerFactory::isLayerRegistered(type))
     {
         return;
     }
@@ -51,17 +48,17 @@ void detail::LayerHandler::addMissing(const std::string& name, const std::string
 
 bool detail::LayerHandler::contains(const std::string& type) const
 {
-    return layers.find(type) != layers.end();
+    return layers.count(type) != 0;
 }
 
-void detail::LayerHandler::printMissing()
+void detail::LayerHandler::printMissing() const
 {
     if (layers.empty())
     {
         return;
     }
 
-    std::stringstream ss;
+    std::ostringstream ss;
     ss << "DNN: Not supported types:\n";
     for (const auto& type_names : layers)
     {
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 67312dba78..954ada50f2 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -66,6 +66,8 @@
 #include <opencv2/imgproc.hpp>
 #include <opencv2/dnn/layer_reg.private.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/logger.hpp>
 
@@ -175,48 +177,29 @@ private:
 
 #ifdef HAVE_INF_ENGINE
         if (checkIETarget(DNN_TARGET_CPU)) {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-            backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_CPU));
-#endif
 #ifdef HAVE_DNN_NGRAPH
             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
 #endif
         }
         if (checkIETarget(DNN_TARGET_MYRIAD)) {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-            backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_MYRIAD));
-#endif
 #ifdef HAVE_DNN_NGRAPH
             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
 #endif
         }
         if (checkIETarget(DNN_TARGET_HDDL)) {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-            backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_HDDL));
-#endif
 #ifdef HAVE_DNN_NGRAPH
             backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL));
 #endif
         }
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        if (checkIETarget(DNN_TARGET_FPGA))
-            backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_FPGA));
-#endif
 #ifdef HAVE_OPENCL
         if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
         {
             if (checkIETarget(DNN_TARGET_OPENCL)) {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-                backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL));
-#endif
 #ifdef HAVE_DNN_NGRAPH
                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
 #endif
             }
             if (checkIETarget(DNN_TARGET_OPENCL_FP16)) {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-                backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL_FP16));
-#endif
 #ifdef HAVE_DNN_NGRAPH
                 backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
 #endif
@@ -271,7 +254,7 @@ std::vector<Target> getAvailableTargets(Backend be)
         be = (Backend)PARAM_DNN_BACKEND_DEFAULT;
 #ifdef HAVE_INF_ENGINE
     if (be == DNN_BACKEND_INFERENCE_ENGINE)
-        be = getInferenceEngineBackendTypeParam();
+        be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
 #endif
 
     std::vector<Target> result;
@@ -635,8 +618,7 @@ struct DataLayer : public Layer
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_OPENCV ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && inputsData.size() == 1);
+        return backendId == DNN_BACKEND_OPENCV;
     }
 
     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
@@ -827,39 +809,6 @@ struct DataLayer : public Layer
         }
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        CV_CheckEQ(inputsData.size(), (size_t)1, "");
-        CV_CheckEQ(inputsData[0].dims, 4, "");
-        const size_t numChannels = inputsData[0].size[1];
-        CV_Assert(numChannels <= 4);
-
-        // Scale
-        InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels},
-                                       InferenceEngine::Layout::C);
-        auto weights = InferenceEngine::make_shared_blob<float>(td);
-        weights->allocate();
-
-        float* weight_buf = weights->buffer().as<float*>();
-        std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]);
-
-        // Mean subtraction
-        auto biases = InferenceEngine::make_shared_blob<float>(td);
-        biases->allocate();
-        float* bias_buf = biases->buffer().as<float*>();
-
-        for (int i = 0; i < numChannels; ++i)
-        {
-            bias_buf[i] = -means[0][i] * scaleFactors[0];
-        }
-
-        InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
-        addConstantData("weights", weights, ieLayer);
-        addConstantData("biases", biases, ieLayer);
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
     std::vector<String> outNames;
     std::vector<MatShape> shapes;
@@ -895,11 +844,11 @@ public:
     // layer blob.
     int numReferences(const LayerPin& lp)
     {
-        std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
+        std::map<LayerPin, LayerPin>::const_iterator mapIt = reuseMap.find(lp);
         CV_Assert(mapIt != reuseMap.end());
         LayerPin memHost = mapIt->second;
 
-        std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
+        std::map<LayerPin, int>::const_iterator refIt = refCounter.find(memHost);
         CV_Assert(refIt != refCounter.end());
         return refIt->second;
     }
@@ -927,7 +876,7 @@ public:
     // Decrease references counter to allocated memory inside specific blob.
     void releaseReference(const LayerPin& lp)
     {
-        std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
+        std::map<LayerPin, LayerPin>::const_iterator mapIt = reuseMap.find(lp);
         CV_Assert(mapIt != reuseMap.end());
 
         std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
@@ -951,8 +900,8 @@ public:
             Mat bestBlob;
             LayerPin bestBlobPin;
 
-            std::map<LayerPin, Mat>::iterator hostIt;
-            std::map<LayerPin, int>::iterator refIt;
+            std::map<LayerPin, Mat>::const_iterator hostIt;
+            std::map<LayerPin, int>::const_iterator refIt;
 
             const int targetTotal = total(shape);
             int bestBlobTotal = INT_MAX;
@@ -964,7 +913,7 @@ public:
                 // it might be used as output.
                 if (refIt != refCounter.end() && refIt->second == 0)
                 {
-                    Mat& unusedBlob = hostIt->second;
+                    const Mat& unusedBlob = hostIt->second;
                     if (unusedBlob.total() >= targetTotal &&
                         unusedBlob.total() < bestBlobTotal &&
                         unusedBlob.type() == dtype)
@@ -1117,18 +1066,14 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
     }
     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
     {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
-#else
-        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
-#endif
+        CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019;
     }
     else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
     {
 #ifdef HAVE_DNN_NGRAPH
         return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
 #else
-        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
+        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph");
 #endif
     }
     else if (backendId == DNN_BACKEND_WEBNN)
@@ -1177,7 +1122,7 @@ detail::NetImplBase::NetImplBase()
     // nothing
 }
 
-std::string detail::NetImplBase::getDumpFileNameBase()
+std::string detail::NetImplBase::getDumpFileNameBase() const
 {
     std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
     return dumpFileNameBase;
@@ -1214,6 +1159,7 @@ struct Net::Impl : public detail::NetImplBase
     std::vector<LayerPin> blobsToKeep;
     MapIdToLayerData layers;
     std::map<String, int> layerNameToId;
+    std::map<std::string, int> outputNameToId;  // use registerOutput() to populate outputs
     BlobManager blobManager;
     int preferableBackend;
     int preferableTarget;
@@ -1230,7 +1176,6 @@ struct Net::Impl : public detail::NetImplBase
     bool fusion;
     bool isAsync;
     std::vector<int64> layersTimings;
-    Mat output_blob;
 
 #ifdef HAVE_CUDA
     struct CudaInfo_t
@@ -1276,7 +1221,7 @@ struct Net::Impl : public detail::NetImplBase
             }
             else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
             {
-                return wrapMat(preferableBackend, preferableTarget, host);
+                CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019;
             }
             else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
             {
@@ -1329,7 +1274,7 @@ struct Net::Impl : public detail::NetImplBase
         std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
         for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
         {
-            LayerData &ld = it->second;
+            LayerData& ld = it->second;
             Ptr<Layer> layer = ld.layerInstance;
             if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
             {
@@ -1405,7 +1350,7 @@ struct Net::Impl : public detail::NetImplBase
             preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
 #ifdef HAVE_INF_ENGINE
         if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
-            preferableBackend = getInferenceEngineBackendTypeParam();
+            preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();
 #endif
 
         CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
@@ -1416,8 +1361,7 @@ struct Net::Impl : public detail::NetImplBase
                   preferableTarget == DNN_TARGET_CPU ||
                   preferableTarget == DNN_TARGET_OPENCL);
 #ifdef HAVE_INF_ENGINE
-        if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
-            preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
             CV_Assert(
                   (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) ||
@@ -1522,19 +1466,19 @@ struct Net::Impl : public detail::NetImplBase
         }
     }
 
-    int getLayerId(const String &layerName)
+    int getLayerId(const String &layerName) const
     {
-        std::map<String, int>::iterator it = layerNameToId.find(layerName);
+        std::map<String, int>::const_iterator it = layerNameToId.find(layerName);
         return (it != layerNameToId.end()) ? it->second : -1;
     }
 
-    int getLayerId(int id)
+    int getLayerId(int id) const
     {
-        MapIdToLayerData::iterator it = layers.find(id);
+        MapIdToLayerData::const_iterator it = layers.find(id);
         return (it != layers.end()) ? id : -1;
     }
 
-    int getLayerId(DictValue &layerDesc)
+    int getLayerId(DictValue &layerDesc) const
     {
         if (layerDesc.isInt())
             return getLayerId(layerDesc.get<int>());
@@ -1545,23 +1489,23 @@ struct Net::Impl : public detail::NetImplBase
         return -1;
     }
 
-    String getLayerName(int id)
+    String getLayerName(int id) const
     {
-        MapIdToLayerData::iterator it = layers.find(id);
+        MapIdToLayerData::const_iterator it = layers.find(id);
         return (it != layers.end()) ? it->second.name : "(unknown layer)";
     }
 
-    LayerData& getLayerData(int id)
+    LayerData& getLayerData(int id) const
     {
-        MapIdToLayerData::iterator it = layers.find(id);
+        MapIdToLayerData::const_iterator it = layers.find(id);
 
         if (it == layers.end())
             CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
 
-        return it->second;
+        return const_cast<LayerData&>(it->second);
     }
 
-    LayerData& getLayerData(const String &layerName)
+    LayerData& getLayerData(const String &layerName) const
     {
         int id = getLayerId(layerName);
 
@@ -1571,7 +1515,7 @@ struct Net::Impl : public detail::NetImplBase
         return getLayerData(id);
     }
 
-    LayerData& getLayerData(const DictValue &layerDesc)
+    LayerData& getLayerData(const DictValue &layerDesc) const
     {
         CV_Assert(layerDesc.isInt() || layerDesc.isString());
         if (layerDesc.isInt())
@@ -1597,14 +1541,14 @@ struct Net::Impl : public detail::NetImplBase
         ld.inputBlobsId[inNum] = from;
     }
 
-    int resolvePinOutputName(LayerData &ld, const String &outName)
+    int resolvePinOutputName(LayerData &ld, const String &outName) const
     {
         if (outName.empty())
             return 0;
         return ld.getLayerInstance()->outputNameToIndex(outName);
     }
 
-    LayerPin getPinByAlias(const String &layerName)
+    LayerPin getPinByAlias(const String &layerName) const
     {
         LayerPin pin;
         pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
@@ -1615,13 +1559,17 @@ struct Net::Impl : public detail::NetImplBase
         return pin;
     }
 
-    std::vector<LayerPin> getLayerOutPins(const String &layerName)
+    std::vector<LayerPin> getLayerOutPins(const String &layerName) const
     {
         int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
 
-        std::vector<LayerPin> pins;
+        MapIdToLayerData::const_iterator it = layers.find(lid);
+        if (it == layers.end())
+            CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid));
+        const size_t nOutputs = it->second.outputBlobs.size();
 
-        for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
+        std::vector<LayerPin> pins;
+        for (int i = 0; i < nOutputs; i++)
         {
             pins.push_back(LayerPin(lid, i));
         }
@@ -1629,6 +1577,38 @@ struct Net::Impl : public detail::NetImplBase
         return pins;
     }
 
+    // FIXIT remove dtype
+    int addLayer(const String &name, const String &type, const int &dtype, LayerParams &params)
+    {
+        int id = getLayerId(name);
+        if (id >= 0)
+        {
+            if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented")
+            {
+                CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
+                return -1;
+            }
+            else
+            {
+                LayerData& ld = layers.find(id)->second;
+                ld.type = type;
+                ld.params = params;
+                return -1;
+            }
+        }
+
+        id = ++lastLayerId;
+        layerNameToId.insert(std::make_pair(name, id));
+        layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params)));
+        if (params.get<bool>("has_dynamic_shapes", false))
+            hasDynamicShapes = true;
+
+        if (dtype == CV_8S)
+            netWasQuantized = true;
+
+        return id;
+    }
+
     void connect(int outLayerId, int outNum, int inLayerId, int inNum)
     {
         CV_Assert(outLayerId < inLayerId);
@@ -1638,6 +1618,40 @@ struct Net::Impl : public detail::NetImplBase
         addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
         ldOut.requiredOutputs.insert(outNum);
         ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
+
+        CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")");
+    }
+
+    int registerOutput(const std::string& outputName, int layerId, int outputPort)
+    {
+        int checkLayerId = getLayerId(outputName);
+        if (checkLayerId >= 0)
+        {
+            if (checkLayerId == layerId)
+            {
+                if (outputPort == 0)
+                {
+                    // layer name correlates with its output name
+                    CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked");
+                    outputNameToId.insert(std::make_pair(outputName, layerId));
+                    return checkLayerId;
+                }
+            }
+            CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort));
+        }
+#if 0  // TODO
+        if (outputPort == 0)
+            // make alias only, need to adopt getUnconnectedOutLayers() call
+#endif
+        LayerParams outputLayerParams;
+        outputLayerParams.name = outputName;
+        outputLayerParams.type = "Identity";
+        int dtype = CV_32F;  // FIXIT remove
+        int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams);
+        connect(layerId, outputPort, outputLayerId, 0);
+        CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort);
+        outputNameToId.insert(std::make_pair(outputName, outputLayerId));
+        return outputLayerId;
     }
 
     void initBackend(const std::vector<LayerPin>& blobsToKeep_)
@@ -1649,14 +1663,6 @@ struct Net::Impl : public detail::NetImplBase
         }
         else if (preferableBackend == DNN_BACKEND_HALIDE)
             initHalideBackend();
-        else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-            initInfEngineBackend(blobsToKeep_);
-#else
-            CV_Assert(false && "This OpenCV version is built without Inference Engine NN Builder API support");
-#endif
-        }
         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
 #ifdef HAVE_DNN_NGRAPH
@@ -1678,7 +1684,7 @@ struct Net::Impl : public detail::NetImplBase
         else if (preferableBackend == DNN_BACKEND_CUDA)
             initCUDABackend(blobsToKeep_);
         else
-            CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
+            CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend));
     }
 
     void initHalideBackend()
@@ -1736,322 +1742,17 @@ struct Net::Impl : public detail::NetImplBase
         }
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    // Before launching Inference Engine graph we need to specify output blobs.
-    // This function requests output blobs based on inputs references of
-    // layers from default backend or layers from different graphs.
-    void addInfEngineNetOutputs(LayerData &ld)
-    {
-        CV_TRACE_FUNCTION();
-        Ptr<InfEngineBackendNet> layerNet;
-        if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
-        {
-            Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
-            if (!node.empty())
-            {
-                Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
-                CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
-                layerNet = ieNode->net;
-            }
-        }
-        // For an every input reference we check that it belongs to one of
-        // the Inference Engine backend graphs. Request an output blob if it is.
-        // Do nothing if layer's input is from the same graph.
-        for (int i = 0; i < ld.inputBlobsId.size(); ++i)
-        {
-            LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
-            Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
-            if (!inpNode.empty())
-            {
-                Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
-                CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
-                if (layerNet != ieInpNode->net)
-                {
-                    // layerNet is empty or nodes are from different graphs.
-                    ieInpNode->net->addOutput(ieInpNode->layer.getName());
-                }
-            }
-        }
-    }
-
-    void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
-    {
-        CV_TRACE_FUNCTION();
-        CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
-        MapIdToLayerData::iterator it;
-        Ptr<InfEngineBackendNet> net;
-
-        for (it = layers.begin(); it != layers.end(); ++it)
-        {
-            LayerData &ld = it->second;
-            if (ld.id == 0)
-            {
-                CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
-                          (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
-                for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
-                {
-                    InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
-                    dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
-#else
-                    dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]);
-#endif
-                }
-            }
-            else
-            {
-                for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
-                {
-                    InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
-                    dataPtr->name = ld.name;
-#else
-                    dataPtr->setName(ld.name);
-#endif
-                }
-            }
-        }
-
-        if (skipInfEngineInit)
-        {
-            Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
-            CV_Assert(!node.empty());
-
-            Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
-            CV_Assert(!ieNode.empty());
-            ieNode->net->reset();
-
-            for (it = layers.begin(); it != layers.end(); ++it)
-            {
-                LayerData &ld = it->second;
-                if (ld.id == 0)
-                {
-                    for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
-                    {
-                        InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
-                        dataPtr->name = netInputLayer->outNames[i];
-#else
-                        dataPtr->setName(netInputLayer->outNames[i]);
-#endif
-                    }
-                }
-                else
-                {
-                    for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
-                    {
-                        InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000)
-                        dataPtr->name = ld.name;
-#else
-                        dataPtr->setName(ld.name);
-#endif
-                    }
-                }
-                ieNode->net->addBlobs(ld.inputBlobsWrappers);
-                ieNode->net->addBlobs(ld.outputBlobsWrappers);
-                ld.skip = true;
-            }
-            layers[lastLayerId].skip = false;
-            ieNode->net->init((Target)preferableTarget);
-            return;
-        }
-
-        // Build Inference Engine networks from sets of layers that support this
-        // backend. Split a whole model on several Inference Engine networks if
-        // some of layers are not implemented.
-
-        bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
-                                   BackendRegistry::checkIETarget(DNN_TARGET_CPU);
-
-        // Set of all input and output blobs wrappers for current network.
-        std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
-        for (it = layers.begin(); it != layers.end(); ++it)
-        {
-            LayerData &ld = it->second;
-            if (ld.id == 0 && ld.skip)
-                continue;
-            bool fused = ld.skip;
-
-            Ptr<Layer> layer = ld.layerInstance;
-            if (!fused && !layer->supportBackend(preferableBackend))
-            {
-                bool customizable = ld.id != 0 &&
-                                    INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
-                                    supportsCPUFallback;
-                // TODO: there is a bug in Myriad plugin with custom layers shape infer.
-                if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
-                {
-                    for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
-                    {
-                        customizable = ld.inputBlobs[i]->size[0] == 1;
-                    }
-                }
-
-                // TODO: fix these workarounds
-                if (preferableTarget == DNN_TARGET_MYRIAD ||
-                    preferableTarget == DNN_TARGET_HDDL ||
-                    preferableTarget == DNN_TARGET_OPENCL ||
-                    preferableTarget == DNN_TARGET_OPENCL_FP16)
-                    customizable &= ld.type != "Concat";
-
-                if (preferableTarget == DNN_TARGET_OPENCL ||
-                    preferableTarget == DNN_TARGET_OPENCL_FP16)
-                    customizable &= ld.type != "Power";
-
-                if (preferableTarget == DNN_TARGET_OPENCL)
-                    customizable &= ld.type != "Eltwise";
-
-                if (!customizable)
-                {
-                    addInfEngineNetOutputs(ld);
-                    net = Ptr<InfEngineBackendNet>();
-                    netBlobsWrappers.clear();  // Is not used for R5 release but we don't wrap it to #ifdef.
-                    layer->preferableTarget = DNN_TARGET_CPU;
-                    continue;
-                }
-            }
-            ld.skip = true;  // Initially skip all Inference Engine supported layers.
-
-            // Create a new network if one of inputs from different Inference Engine graph.
-            for (int i = 0; i < ld.inputBlobsId.size(); ++i)
-            {
-                LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
-                Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
-                if (!inpNode.empty())
-                {
-                    Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
-                    CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
-                    if (ieInpNode->net != net)
-                    {
-                        net = Ptr<InfEngineBackendNet>();
-                        netBlobsWrappers.clear();  // Is not used for R5 release but we don't wrap it to #ifdef.
-                        break;
-                    }
-                }
-            }
-
-            Ptr<BackendNode> node;
-            if (!net.empty())
-            {
-                if (fused)
-                {
-                    bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
-                                   ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
-                    CV_Assert(inPlace);
-                    node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
-                    ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
-                }
-            }
-            else
-                net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
-
-            if (!fused)
-            {
-                if (layer->supportBackend(preferableBackend))
-                    node = layer->initInfEngine(ld.inputBlobsWrappers);
-                else
-                {
-                    node = Ptr<BackendNode>(new InfEngineBackendNode(
-                        ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
-                }
-            }
-            else if (node.empty())
-                continue;
-
-            CV_Assert(!node.empty());
-            ld.backendNodes[preferableBackend] = node;
-
-            Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
-            CV_Assert(!ieNode.empty());
-            ieNode->net = net;
-
-            for (const auto& pin : blobsToKeep_)
-            {
-                if (pin.lid == ld.id)
-                {
-                    ieNode->net->addOutput(ieNode->layer.getName());
-                    break;
-                }
-            }
-
-            // Convert weights in FP16 for specific targets.
-            if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
-                 preferableTarget == DNN_TARGET_MYRIAD ||
-                 preferableTarget == DNN_TARGET_HDDL ||
-                 preferableTarget == DNN_TARGET_FPGA) && !fused)
-            {
-#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
-                for (const std::string& name : {"weights", "biases"})
-                {
-                    auto it = ieNode->layer.getParameters().find(name);
-                    if (it != ieNode->layer.getParameters().end())
-                    {
-                        InferenceEngine::Blob::Ptr bp = it->second.as<InferenceEngine::Blob::Ptr>();
-                        it->second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(bp));
-                    }
-                }
-#else
-                auto& blobs = ieNode->layer.getConstantData();
-                if (blobs.empty())
-                {
-                    // In case of non weightable layer we have to specify
-                    // it's precision adding dummy blob.
-                    auto blob = InferenceEngine::make_shared_blob<int16_t>(
-                                    InferenceEngine::Precision::FP16,
-                                    InferenceEngine::Layout::C, {1});
-                    blob->allocate();
-                    blobs[""] = blob;
-                }
-                else
-                {
-                    for (auto& it : blobs)
-                        it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
-                }
-#endif
-            }
-
-            if (!fused)
-                net->addLayer(ieNode->layer);
-
-            net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
-            net->addBlobs(ld.inputBlobsWrappers);
-            net->addBlobs(ld.outputBlobsWrappers);
-            addInfEngineNetOutputs(ld);
-        }
-
-        // Initialize all networks.
-        for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
-        {
-            LayerData &ld = it->second;
-            if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
-                continue;
-
-            Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
-            if (node.empty())
-                continue;
-
-            Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
-            if (ieNode.empty())
-                continue;
-
-            CV_Assert(!ieNode->net.empty());
-
-            if (!ieNode->net->isInitialized())
-            {
-                ieNode->net->init((Target)preferableTarget);
-                ld.skip = false;
-            }
-        }
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
+    /** mark input pins as outputs from other subnetworks
+     * FIXIT must be done by DNN engine not ngraph.
+     */
     void addNgraphOutputs(LayerData &ld)
     {
         CV_TRACE_FUNCTION();
 
+        CV_LOG_DEBUG(NULL, "DNN/IE: layer of new subnet: " << ld.name << "@" << ld.type);
+
         Ptr<InfEngineNgraphNet> layerNet;
         auto it = ld.backendNodes.find(preferableBackend);
         if (it != ld.backendNodes.end())
@@ -2075,8 +1776,8 @@ struct Net::Impl : public detail::NetImplBase
                 CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
                 if (layerNet != ieInpNode->net)
                 {
-                    ieInpNode->net->addOutput(ieInpNode->node->get_friendly_name());
-                    ieInpNode->net->setUnconnectedNodes(ieInpNode);
+                    CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name());
+                    ieInpNode->net->addOutput(ieInpNode);
                 }
             }
         }
@@ -2085,14 +1786,13 @@ struct Net::Impl : public detail::NetImplBase
     void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
     {
         CV_TRACE_FUNCTION();
-        CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
+        CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
 
-        MapIdToLayerData::iterator it;
         Ptr<InfEngineNgraphNet> net;
 
-        for (it = layers.begin(); it != layers.end(); ++it)
+        for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it)
         {
-            LayerData &ld = it->second;
+            const LayerData& ld = it->second;
             if (ld.id == 0)
             {
                 CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
@@ -2128,9 +1828,9 @@ struct Net::Impl : public detail::NetImplBase
             InfEngineNgraphNet& ienet = *ieNode->net;
             ienet.reset();
 
-            for (it = layers.begin(); it != layers.end(); ++it)
+            for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
             {
-                LayerData &ld = it->second;
+                LayerData& ld = it->second;
                 if (ld.id == 0)
                 {
                     for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
@@ -2172,17 +1872,23 @@ struct Net::Impl : public detail::NetImplBase
         // Build Inference Engine networks from sets of layers that support this
         // backend. Split a whole model on several Inference Engine networks if
         // some of layers are not implemented.
-        for (it = layers.begin(); it != layers.end(); ++it)
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
         {
-            LayerData &ld = it->second;
+            LayerData& ld = it->second;
+
+            CV_LOG_DEBUG(NULL, "DNN/IE: processing layer " << ld.name << "@" << ld.type << " (" << ld.id << ") ...");
 
             if (ld.id == 0 && ld.skip)
+            {
+                CV_LOG_DEBUG(NULL, "DNN/IE:    SKIP!");
                 continue;
+            }
 
             bool fused = ld.skip;
             Ptr<Layer> layer = ld.layerInstance;
             if (!fused && !layer->supportBackend(preferableBackend))
             {
+                CV_LOG_DEBUG(NULL, "DNN/IE:    NOT supported!");
                 bool customizable = ld.id != 0 && supportsCPUFallback;
 
                 // TODO: there is a bug in Myriad plugin with custom layers shape infer.
@@ -2210,6 +1916,7 @@ struct Net::Impl : public detail::NetImplBase
 
                 if (!customizable)
                 {
+                    CV_LOG_DEBUG(NULL, "DNN/IE:    NOT customizable!");
                     addNgraphOutputs(ld);
                     net = Ptr<InfEngineNgraphNet>();
                     layer->preferableTarget = DNN_TARGET_CPU;
@@ -2221,7 +1928,7 @@ struct Net::Impl : public detail::NetImplBase
                         if (!inpNode.empty()) {
                             Ptr<InfEngineNgraphNode> ieNode = inpNode.dynamicCast<InfEngineNgraphNode>();
                             CV_Assert(!ieNode.empty());
-                            ieNode->net->setUnconnectedNodes(ieNode);
+                            ieNode->net->addOutput(ieNode);
                         }
                     }
                     continue;
@@ -2309,9 +2016,22 @@ struct Net::Impl : public detail::NetImplBase
                         continue;
 
                     auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
-                    CV_Assert(oid < ieInpNode->node->get_output_size());
+                    const auto& ngraph_input_node = ieInpNode->node;
+                    CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
+
+                    // Handle parameters from other subnets. Output port is not used in this case
+                    if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) &&
+                            ngraph_input_node->get_output_size() == 1)
+                    {
+                        inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ngraph_input_node));
+                        continue;
+                    }
+                    CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), "");
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
-                    inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node));
+                    // FIXIT refactor ".initNgraph()" API to use Output<Node>
+                    // WA: use Concat to emulate Identity operation with requested output port
+                    auto oid_node = std::make_shared<ngraph::op::Concat>(ngraph::OutputVector {ngraph_input_node->output(oid)}, 0);
+                    inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(oid_node));
 #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
                     inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
 #else
@@ -2321,21 +2041,30 @@ struct Net::Impl : public detail::NetImplBase
 
                 if (layer->supportBackend(preferableBackend))
                 {
+                    CV_LOG_DEBUG(NULL, "DNN/IE: wrap layer " << ld.name << "@" << ld.type << " - outputs: " << ld.outputBlobsWrappers.size());
                     node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes);
+#if 0  // FIXIT doesn't work with multiple outputs (set name is applied to the same node)
                     for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
                     {
                         InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
                         node.dynamicCast<InfEngineNgraphNode>()->setName(dataPtr->getName());
                     }
+#else
+                    node.dynamicCast<InfEngineNgraphNode>()->setName(layer->name);
+#endif
                 }
                 else
                 {
+                    CV_LOG_DEBUG(NULL, "DNN/IE: layer is not supported: " << ld.name << "@" << ld.type);
                     node = Ptr<BackendNode>(new InfEngineNgraphNode(inputNodes,
                         ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
                 }
             }
             else if (node.empty())
+            {
+                CV_LOG_DEBUG(NULL, "DNN/IE: node.empty() bypass...");
                 continue;
+            }
 
             ld.backendNodes[preferableBackend] = node;
 
@@ -2343,15 +2072,11 @@ struct Net::Impl : public detail::NetImplBase
             CV_Assert(!ieNode.empty());
             ieNode->net = net;
 
-            if (ld.consumers.empty()) {
-                // TF EAST_text_detection
-                ieNode->net->setUnconnectedNodes(ieNode);
-            }
             for (const auto& pin : blobsToKeep_)
             {
                 if (pin.lid == ld.id)
                 {
-                    ieNode->net->addOutput(ieNode->node->get_friendly_name());
+                    ieNode->net->addOutput(ieNode);
                     break;
                 }
             }
@@ -2382,7 +2107,7 @@ struct Net::Impl : public detail::NetImplBase
 
             if (!ieNode->net->isInitialized())
             {
-                ieNode->net->setUnconnectedNodes(ieNode);
+                ieNode->net->addOutput(ieNode);
                 ieNode->net->createNet((Target)preferableTarget);
                 ld.skip = false;
             }
@@ -2430,10 +2155,9 @@ struct Net::Impl : public detail::NetImplBase
         CV_TRACE_FUNCTION();
         CV_Assert_N(preferableBackend == DNN_BACKEND_WEBNN, haveWebnn());
 
-        MapIdToLayerData::iterator it;
         Ptr<WebnnNet> net;
 
-        for (it = layers.begin(); it != layers.end(); ++it)
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
         {
             LayerData &ld = it->second;
             if (ld.id == 0)
@@ -2462,7 +2186,7 @@ struct Net::Impl : public detail::NetImplBase
         // Build WebNN networks from sets of layers that support this
         // backend. Split a whole model on several WebNN networks if
         // some of layers are not implemented.
-        for (it = layers.begin(); it != layers.end(); ++it)
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
         {
             LayerData &ld = it->second;
 
@@ -2662,8 +2386,7 @@ struct Net::Impl : public detail::NetImplBase
         if (!haveVulkan())
             return;
 
-        MapIdToLayerData::iterator it = layers.begin();
-        for (; it != layers.end(); it++)
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
         {
             LayerData &ld = it->second;
             Ptr<Layer> layer = ld.layerInstance;
@@ -2812,7 +2535,7 @@ struct Net::Impl : public detail::NetImplBase
             ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
 
         //allocate parents
-        for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
+        for (set<int>::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
             allocateLayer(*i, layersShapes);
 
         //bind inputs
@@ -2894,16 +2617,21 @@ struct Net::Impl : public detail::NetImplBase
 
         if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
                         preferableBackend != DNN_BACKEND_CUDA &&
-                        preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
                         preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
            return;
 
+#if 0  // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes
+        if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return;
+#endif
+
         // scan through all the layers. If there is convolution layer followed by the activation layer,
         // we try to embed this activation into the convolution and disable separate execution of the activation
+
+        // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)"
         std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
                                       blobsToKeep_.end());
-        MapIdToLayerData::iterator it;
-        for (it = layers.begin(); it != layers.end(); it++)
+        for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
         {
             int lid = it->first;
             LayerData& ld = layers[lid];
@@ -2925,6 +2653,13 @@ struct Net::Impl : public detail::NetImplBase
                 LayerPin lpNext(ld.consumers[0].lid, 0);
                 while (nextData)
                 {
+#ifdef HAVE_INF_ENGINE
+                    if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0)
+                    {
+                        CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type);
+                        break;
+                    }
+#endif
                     /* we use `tryFuse` member of convolution layer to fuse eltwise later
                      * it's not intended to be fused here; hence, we stop when we encounter eltwise
                      */
@@ -3450,8 +3185,7 @@ struct Net::Impl : public detail::NetImplBase
     {
         CV_TRACE_FUNCTION();
 
-        MapIdToLayerData::iterator it;
-        for (it = layers.begin(); it != layers.end(); it++)
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
             it->second.flag = 0;
 
         CV_Assert(!layers[0].outputBlobs.empty());
@@ -3485,7 +3219,7 @@ struct Net::Impl : public detail::NetImplBase
         // Fake references to input blobs.
         for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
             blobManager.addReference(LayerPin(0, i));
-        for (it = layers.begin(); it != layers.end(); ++it)
+        for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it)
         {
             const LayerData& ld = it->second;
             blobManager.addReferences(ld.inputBlobsId);
@@ -3496,7 +3230,7 @@ struct Net::Impl : public detail::NetImplBase
             blobManager.addReference(blobsToKeep_[i]);
         }
 
-        for (it = layers.begin(); it != layers.end(); it++)
+        for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
         {
             int lid = it->first;
             allocateLayer(lid, layersShapes);
@@ -3517,7 +3251,11 @@ struct Net::Impl : public detail::NetImplBase
             TickMeter tm;
             tm.start();
 
+#ifndef HAVE_VULKAN
+            std::map<int, Ptr<BackendNode> >::const_iterator it = ld.backendNodes.find(preferableBackend);
+#else
             std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
+#endif
             if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
             {
                 if (isAsync)
@@ -3699,18 +3437,17 @@ struct Net::Impl : public detail::NetImplBase
                 {
                     forwardHalide(ld.outputBlobsWrappers, node);
                 }
-                else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-                {
-                    forwardInfEngine(ld.outputBlobsWrappers, node, isAsync);
-                }
+#ifdef HAVE_INF_ENGINE
                 else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
                 {
                     forwardNgraph(ld.outputBlobsWrappers, node, isAsync);
                 }
-                 else if (preferableBackend == DNN_BACKEND_WEBNN)
+#endif
+                else if (preferableBackend == DNN_BACKEND_WEBNN)
                 {
                     forwardWebnn(ld.outputBlobsWrappers, node, isAsync);
                 }
+#ifdef HAVE_VULKAN
                 else if (preferableBackend == DNN_BACKEND_VKCOM)
                 {
                     try
@@ -3724,6 +3461,7 @@ struct Net::Impl : public detail::NetImplBase
                         forwardLayer(ld);
                     }
                 }
+#endif
                 else
                 {
                     CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
@@ -3748,8 +3486,7 @@ struct Net::Impl : public detail::NetImplBase
 
         if (clearFlags)
         {
-            MapIdToLayerData::iterator it;
-            for (it = layers.begin(); it != layers.end(); it++)
+            for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
                 it->second.flag = 0;
         }
 
@@ -3758,8 +3495,7 @@ struct Net::Impl : public detail::NetImplBase
             return;
 
         //forward parents
-        MapIdToLayerData::iterator it;
-        for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
         {
             LayerData &ld = it->second;
             if (ld.flag)
@@ -3845,7 +3581,7 @@ struct Net::Impl : public detail::NetImplBase
             for(int i = 0; i < inputLayerIds.size(); i++)
             {
                 int layerId = inputLayerIds[i].lid;
-                LayersShapesMap::iterator it =
+                LayersShapesMap::const_iterator it =
                         inOutShapes.find(layerId);
                 if(it == inOutShapes.end() ||
                         it->second.out.empty())
@@ -3928,7 +3664,7 @@ struct Net::Impl : public detail::NetImplBase
         inOutShapes.clear();
 
         inOutShapes[0].in = netInputShapes; //insert shape for first input layer
-        for (MapIdToLayerData::iterator it = layers.begin();
+        for (MapIdToLayerData::const_iterator it = layers.begin();
              it != layers.end(); it++)
         {
             getLayerShapesRecursively(it->first, inOutShapes);
@@ -3969,12 +3705,11 @@ struct Net::Impl : public detail::NetImplBase
         CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes"));
         LayersShapesMap layersShapes;
         layersShapes[0].in = inputShapes;
-        for (MapIdToLayerData::iterator it = layers.begin();
-             it != layers.end(); it++)
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
         {
             int layerId = it->first;
             LayerData& layerData = it->second;
-            std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
+            const std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
             LayerShapes& layerShapes = layersShapes[layerId];
             CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size());
             if (layerShapes.in.empty())
@@ -3984,7 +3719,7 @@ struct Net::Impl : public detail::NetImplBase
                     const LayerPin& inputPin = inputLayerIds[i];
                     int inputLayerId = inputPin.lid;
                     CV_LOG_DEBUG(NULL, "    input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")");
-                    LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
+                    LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId);
                     if (inputIt == layersShapes.end() || inputIt->second.out.empty())
                     {
                         getLayerShapesRecursively(inputLayerId, layersShapes);
@@ -4001,19 +3736,23 @@ struct Net::Impl : public detail::NetImplBase
         CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE");
     }
 
-    LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
+    LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins) const
     {
         return *std::max_element(pins.begin(), pins.end());
     }
 
-    Mat getBlob(const LayerPin& pin)
+    Mat getBlob(const LayerPin& pin) const
     {
         CV_TRACE_FUNCTION();
 
         if (!pin.valid())
             CV_Error(Error::StsObjectNotFound, "Requested blob not found");
 
-        LayerData &ld = layers[pin.lid];
+        MapIdToLayerData::const_iterator it = layers.find(pin.lid);
+        if (it == layers.end())
+            CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid));
+
+        const LayerData &ld = it->second;
         if ((size_t)pin.oid >= ld.outputBlobs.size())
         {
             CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
@@ -4029,6 +3768,7 @@ struct Net::Impl : public detail::NetImplBase
 
         if (ld.outputBlobs[pin.oid].depth() == CV_16S)
         {
+            Mat output_blob;
             convertFp16(ld.outputBlobs[pin.oid], output_blob);
             return output_blob;
         }
@@ -4036,7 +3776,7 @@ struct Net::Impl : public detail::NetImplBase
             return ld.outputBlobs[pin.oid];
     }
 
-    Mat getBlob(String outputName)
+    Mat getBlob(String outputName) const
     {
         return getBlob(getPinByAlias(outputName));
     }
@@ -4062,27 +3802,13 @@ struct Net::Impl : public detail::NetImplBase
             // Transfer data to CPU if it's require.
             ld.outputBlobsWrappers[pin.oid]->copyToHost();
         }
-        CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
+        CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
 
-        if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-            Ptr<InfEngineBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<InfEngineBackendWrapper>();
-            return std::move(wrapper->futureMat);
+        Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
+        return std::move(wrapper->futureMat);
 #else
-            CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
-#endif
-        }
-        else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        {
-#ifdef HAVE_DNN_NGRAPH
-            Ptr<NgraphBackendWrapper> wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast<NgraphBackendWrapper>();
-            return std::move(wrapper->futureMat);
-#else
-            CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
-#endif
-        }
+        CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required");
 #endif  // HAVE_INF_ENGINE
-        CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 backend is required");
     }
 
     AsyncArray getBlobAsync(String outputName)
@@ -4096,9 +3822,9 @@ struct Net::Impl : public detail::NetImplBase
     Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
 #endif
 
-    string dump();
+    string dump() const;
 
-    void dumpNetworkToFile()
+    void dumpNetworkToFile() const
     {
 #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
         string dumpFileNameBase = getDumpFileNameBase();
@@ -4156,40 +3882,18 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
     CV_TRACE_REGION_NEXT("backendNode");
 
     Ptr<BackendNode> backendNode;
-#ifdef HAVE_DNN_NGRAPH
-    if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
     {
         auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
         Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
         backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
         backendNode = backendNodeNGraph;
     }
-    else
-#endif
-    {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        Ptr<InfEngineBackendNode> backendNodeNN(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
-        backendNodeNN->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
-        backendNode = backendNodeNN;
-#else
-        CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
-#endif
-    }
 
     CV_TRACE_REGION_NEXT("register_outputs");
 
-#ifdef HAVE_DNN_NGRAPH
     auto ngraphFunction = ieNet.getFunction();
-#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
-    std::list< std::shared_ptr<ngraph::Node> > ngraphOperations;
-#else
-    std::vector< std::shared_ptr<ngraph::Node> > ngraphOperations;
-#endif
-    if (ngraphFunction)
-    {
-        ngraphOperations = ngraphFunction->get_ops();
-    }
-#endif
+    CV_Assert(ngraphFunction);
+    std::vector< std::shared_ptr<ngraph::Node> > ngraphOperations = ngraphFunction->get_ops();
 
     for (auto& it : ieNet.getOutputsInfo())
     {
@@ -4201,8 +3905,6 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
 
         LayerData& ld = cvNet.impl->layers[lid];
 
-#ifdef HAVE_DNN_NGRAPH
-        if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
         {
             Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
             cvLayer->name = outputName;
@@ -4210,44 +3912,18 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
 
             auto process_layer = [&](const std::string& name) -> bool
             {
-                if (ngraphFunction)
+                CV_TRACE_REGION("ngraph_function");
+                for (const auto& op : ngraphOperations)
                 {
-                    CV_TRACE_REGION("ngraph_function");
-                    for (const auto& op : ngraphOperations)
+                    CV_Assert(op);
+                    if (op->get_friendly_name() == name)
                     {
-                        CV_Assert(op);
-                        if (op->get_friendly_name() == name)
-                        {
-                            const std::string typeName = op->get_type_info().name;
-                            cvLayer->type = typeName;
-                            return true;
-                        }
-                    }
-                    return false;
-                }
-                else
-                {
-#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
-                    CV_Error(Error::StsNotImplemented, "This OpenCV version is built with Inference Engine which has dropped IR v7 support");
-#else
-                    CV_TRACE_REGION("legacy_cnn_layer");
-                    try
-                    {
-                        InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(name.c_str());
-                        CV_Assert(ieLayer);
-
-                        cvLayer->type = ieLayer->type;
+                        const std::string typeName = op->get_type_info().name;
+                        cvLayer->type = typeName;
                         return true;
                     }
-                    catch (const std::exception& e)
-                    {
-                        CV_UNUSED(e);
-                        CV_LOG_DEBUG(NULL, "IE layer extraction failure: '" << name << "' - " << e.what());
-                        return false;
-                    }
-#endif
-
                 }
+                return false;
             };
 
             bool found = process_layer(outputName);
@@ -4266,37 +3942,6 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
             ld.layerInstance = cvLayer;
             ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode;
         }
-        else
-#endif
-        {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-            Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
-
-            InferenceEngine::CNNLayerPtr ieLayer;
-            try
-            {
-                ieLayer = ieNet.getLayerByName(outputName.c_str());
-            }
-            catch (...)
-            {
-                auto pos = outputName.rfind('.');  // cut port number: ".0"
-                if (pos != std::string::npos)
-                {
-                    std::string layerName = outputName.substr(0, pos);
-                    ieLayer = ieNet.getLayerByName(layerName.c_str());
-                }
-            }
-            CV_Assert(ieLayer);
-
-            cvLayer->name = outputName;
-            cvLayer->type = ieLayer->type;
-            ld.layerInstance = cvLayer;
-
-            ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019] = backendNode;
-#else
-            CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
-#endif
-        }
 
         for (int i = 0; i < inputsNames.size(); ++i)
             cvNet.connect(0, i, lid, i);
@@ -4304,7 +3949,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
 
     CV_TRACE_REGION_NEXT("finalize");
 
-    cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam());
+    cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
 
     cvNet.impl->skipInfEngineInit = true;
     return cvNet;
@@ -4318,16 +3963,11 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
     CV_UNUSED(xml); CV_UNUSED(bin);
     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
-    InferenceEngine::CNNNetReader reader;
-    reader.ReadNetwork(xml);
-    reader.ReadWeights(bin);
 
-    InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
-#else
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     InferenceEngine::Core& ie = getCore("");
     InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
-#endif
 
     return Impl::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
@@ -4354,26 +3994,8 @@ Net Net::readFromModelOptimizer(
     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else
 
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
-    InferenceEngine::CNNNetReader reader;
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
-    try
-    {
-        reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize);
-
-        InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
-        InferenceEngine::TBlob<uint8_t>::Ptr weightsBlobPtr(new InferenceEngine::TBlob<uint8_t>(tensorDesc));
-        weightsBlobPtr->allocate();
-        std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize);
-        reader.SetWeights(weightsBlobPtr);
-    }
-    catch (const std::exception& e)
-    {
-        CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
-    }
-
-    InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
-#else
     InferenceEngine::Core& ie = getCore("");
 
     std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
@@ -4390,7 +4012,6 @@ Net Net::readFromModelOptimizer(
     {
         CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
     }
-#endif
 
     return Impl::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
@@ -4404,34 +4025,8 @@ Net::~Net()
 int Net::addLayer(const String &name, const String &type, const int &dtype, LayerParams &params)
 {
     CV_TRACE_FUNCTION();
-
-    int id = impl->getLayerId(name);
-    if (id >= 0)
-    {
-        if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented")
-        {
-            CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
-            return -1;
-        }
-        else
-        {
-            LayerData& ld = impl->layers.find(id)->second;
-            ld.type = type;
-            ld.params = params;
-            return -1;
-        }
-    }
-
-    id = ++impl->lastLayerId;
-    impl->layerNameToId.insert(std::make_pair(name, id));
-    impl->layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params)));
-    if (params.get<bool>("has_dynamic_shapes", false))
-        impl->hasDynamicShapes = true;
-
-    if (dtype == CV_8S)
-        impl->netWasQuantized = true;
-
-    return id;
+    CV_Assert(impl);
+    return impl->addLayer(name, type, dtype, params);
 }
 
 int Net::addLayer(const String &name, const String &type, LayerParams &params)
@@ -4475,10 +4070,18 @@ void Net::connect(String _outPin, String _inPin)
     impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
 }
 
+int Net::registerOutput(const std::string& outputName, int layerId, int outputPort)
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(impl);
+    return impl->registerOutput(outputName, layerId, outputPort);
+}
+
 Mat Net::forward(const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     String layerName = outputName;
 
@@ -4500,6 +4103,7 @@ AsyncArray Net::forwardAsync(const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
 #ifdef CV_CXX11
     String layerName = outputName;
@@ -4514,8 +4118,8 @@ AsyncArray Net::forwardAsync(const String& outputName)
     std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
     impl->setUpNet(pins);
 
-    if (!(impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
-        CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backends only");
+    if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only");
 
     impl->isAsync = true;
     impl->forwardToLayer(impl->getLayerData(layerName));
@@ -4531,6 +4135,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     String layerName = outputName;
 
@@ -4612,6 +4217,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs,
                   const std::vector<String>& outBlobNames)
 {
     CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     std::vector<LayerPin> pins;
     for (int i = 0; i < outBlobNames.size(); i++)
@@ -4639,6 +4245,7 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
                      const std::vector<String>& outBlobNames)
 {
     CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     std::vector<LayerPin> pins;
     for (int i = 0; i < outBlobNames.size(); i++)
@@ -4937,7 +4544,7 @@ void Net::setPreferableBackend(int backendId)
 
 #ifdef HAVE_INF_ENGINE
     if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
-        backendId = getInferenceEngineBackendTypeParam();
+        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
 #endif
 
     if( impl->preferableBackend != backendId )
@@ -5000,6 +4607,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
 {
     CV_TRACE_FUNCTION();
     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     LayerPin pin;
     pin.lid = 0;
@@ -5059,7 +4667,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
     impl->netWasAllocated = impl->netWasAllocated && oldShape;
 }
 
-Mat Net::getParam(LayerId layer, int numParam)
+Mat Net::getParam(int layer, int numParam) const
 {
     LayerData &ld = impl->getLayerData(layer);
     std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
@@ -5067,7 +4675,7 @@ Mat Net::getParam(LayerId layer, int numParam)
     return layerBlobs[numParam];
 }
 
-void Net::setParam(LayerId layer, int numParam, const Mat &blob)
+void Net::setParam(int layer, int numParam, const Mat &blob)
 {
     LayerData &ld = impl->getLayerData(layer);
 
@@ -5077,7 +4685,7 @@ void Net::setParam(LayerId layer, int numParam, const Mat &blob)
     layerBlobs[numParam] = blob;
 }
 
-int Net::getLayerId(const String &layer)
+int Net::getLayerId(const String &layer) const
 {
     return impl->getLayerId(layer);
 }
@@ -5120,7 +4728,7 @@ String Net::dump()
     return impl->dump();
 }
 
-string Net::Impl::dump()
+string Net::Impl::dump() const
 {
     bool hasInput = !netInputLayer->inputsData.empty();
 
@@ -5181,8 +4789,8 @@ string Net::Impl::dump()
         case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break;
         case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break;
         case DNN_BACKEND_INFERENCE_ENGINE: // fallthru
-        case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: backend = "DLIE/"; break;
-        case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "NGRAPH/"; break;
+        case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: // fallthru
+        case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break;
         case DNN_BACKEND_OPENCV: backend = "OCV/"; break;
         case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break;
         case DNN_BACKEND_CUDA: backend = "CUDA/"; break;
@@ -5388,13 +4996,18 @@ void Net::dumpToFile(const String& path) {
     file.close();
 }
 
-Ptr<Layer> Net::getLayer(LayerId layerId)
+Ptr<Layer> Net::getLayer(int layerId) const
+{
+    LayerData &ld = impl->getLayerData(layerId);
+    return ld.getLayerInstance();
+}
+Ptr<Layer> Net::getLayer(const LayerId& layerId) const
 {
     LayerData &ld = impl->getLayerData(layerId);
     return ld.getLayerInstance();
 }
 
-std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
+std::vector<Ptr<Layer> > Net::getLayerInputs(int layerId) const
 {
     LayerData &ld = impl->getLayerData(layerId);
 
@@ -5413,7 +5026,7 @@ std::vector<String> Net::getLayerNames() const
     std::vector<String> res;
     res.reserve(impl->layers.size());
 
-    Impl::MapIdToLayerData::iterator it;
+    Impl::MapIdToLayerData::const_iterator it;
     for (it = impl->layers.begin(); it != impl->layers.end(); it++)
     {
         if (it->second.id) //skip Data layer
@@ -5430,13 +5043,27 @@ bool Net::empty() const
 
 std::vector<int> Net::getUnconnectedOutLayers() const
 {
+    CV_TRACE_FUNCTION();
+    CV_Assert(impl);
+
     std::vector<int> layersIds;
 
-    Impl::MapIdToLayerData::iterator it;
+    // registerOutput() flow
+    const std::map<std::string, int>& outputNameToId = impl->outputNameToId;
+    if (!outputNameToId.empty())
+    {
+        for (std::map<std::string, int>::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it)
+        {
+            layersIds.push_back(it->second);
+        }
+        return layersIds;
+    }
+
+    Impl::MapIdToLayerData::const_iterator it;
     for (it = impl->layers.begin(); it != impl->layers.end(); it++)
     {
         int lid = it->first;
-        LayerData &ld = it->second;
+        const LayerData &ld = it->second;
 
         if (ld.requiredOutputs.size() == 0)
             layersIds.push_back(lid);
@@ -5536,13 +5163,13 @@ int64 Net::getFLOPS(const MatShape& netInputShape) const
 int64 Net::getFLOPS(const int layerId,
               const std::vector<MatShape>& netInputShapes) const
 {
-    Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
+    Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId);
     CV_Assert(layer != impl->layers.end());
 
     LayerShapes shapes;
     impl->getLayerShapes(netInputShapes, layerId, shapes);
 
-    return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
+    return const_cast<LayerData&>(layer->second).getLayerInstance()->getFLOPS(shapes.in, shapes.out);
 }
 
 int64 Net::getFLOPS(const int layerId,
@@ -5556,7 +5183,7 @@ void Net::getLayerTypes(std::vector<String>& layersTypes) const
     layersTypes.clear();
 
     std::map<String, int> layers;
-    for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
+    for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin();
          it != impl->layers.end(); it++)
     {
         if (layers.find(it->second.type) == layers.end())
@@ -5564,7 +5191,7 @@ void Net::getLayerTypes(std::vector<String>& layersTypes) const
         layers[it->second.type]++;
     }
 
-    for (std::map<String, int>::iterator it = layers.begin();
+    for (std::map<String, int>::const_iterator it = layers.begin();
          it != layers.end(); it++)
     {
         layersTypes.push_back(it->first);
@@ -5574,7 +5201,7 @@ void Net::getLayerTypes(std::vector<String>& layersTypes) const
 int Net::getLayersCount(const String& layerType) const
 {
     int count = 0;
-    for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
+    for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin();
          it != impl->layers.end(); it++)
     {
         if (it->second.type == layerType)
@@ -5589,7 +5216,7 @@ void Net::getMemoryConsumption(const int layerId,
 {
     CV_TRACE_FUNCTION();
 
-    Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
+    Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId);
     CV_Assert(layer != impl->layers.end());
 
     weights = blobs = 0;
@@ -5658,7 +5285,7 @@ void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
     for(int i = 0; i < layerIds.size(); i++)
     {
         int w = 0, b = 0;
-        Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
+        Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerIds[i]);
         CV_Assert(layer != impl->layers.end());
 
         for(int j = 0; j < layer->second.params.blobs.size(); j++)
@@ -5764,13 +5391,6 @@ Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
     return Ptr<BackendNode>();
 }
 
-Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
-{
-    CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
-                                       " layers is not defined.");
-    return Ptr<BackendNode>();
-}
-
 Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper> > & inputs, const std::vector<Ptr<BackendNode> >& nodes)
 {
     CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
@@ -6092,6 +5712,13 @@ void LayerFactory::unregisterLayer(const String &type)
     }
 }
 
+bool LayerFactory::isLayerRegistered(const std::string& type)
+{
+    cv::AutoLock lock(getLayerFactoryMutex());
+    auto& registeredLayers = getLayerFactoryImpl();
+    return registeredLayers.find(type) != registeredLayers.end();
+}
+
 Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
 {
     CV_TRACE_FUNCTION();
diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp
index ffeb3bfda1..6350a4e168 100644
--- a/modules/dnn/src/dnn_common.hpp
+++ b/modules/dnn/src/dnn_common.hpp
@@ -5,8 +5,8 @@
 #ifndef __OPENCV_DNN_COMMON_HPP__
 #define __OPENCV_DNN_COMMON_HPP__
 
-#include <unordered_set>
 #include <unordered_map>
+#include <unordered_set>
 
 #include <opencv2/dnn.hpp>
 
@@ -59,7 +59,7 @@ class LayerHandler
 public:
     void addMissing(const std::string& name, const std::string& type);
     bool contains(const std::string& type) const;
-    void printMissing();
+    void printMissing() const;
 
 protected:
     LayerParams getNotImplementedParams(const std::string& name, const std::string& op);
@@ -71,12 +71,12 @@ private:
 struct NetImplBase
 {
     const int networkId;  // network global identifier
-    int networkDumpCounter;  // dump counter
+    mutable int networkDumpCounter;  // dump counter
     int dumpLevel;  // level of information dumps (initialized through OPENCV_DNN_NETWORK_DUMP parameter)
 
     NetImplBase();
 
-    std::string getDumpFileNameBase();
+    std::string getDumpFileNameBase() const;
 };
 
 }  // namespace detail
diff --git a/modules/dnn/src/graph_simplifier.cpp b/modules/dnn/src/graph_simplifier.cpp
index a23fce30f5..e58e0e38e8 100644
--- a/modules/dnn/src/graph_simplifier.cpp
+++ b/modules/dnn/src/graph_simplifier.cpp
@@ -108,7 +108,7 @@ bool Subgraph::match(const Ptr<ImportGraphWrapper>& net, int nodeId,
 
         for (int j = 0; j < inputNodes.size(); ++j)
         {
-            if (nodes[inputNodes[j]].empty())  // Unknown input node type.
+            if (nodes[inputNodes[j]].empty() || node->getInputName(j).empty())  // Unknown input node type.
                 continue;
             nodeId = getInputNodeId(net, node, j);
             const Ptr<ImportNodeWrapper> inpNode = net->getNode(nodeId);
diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp
index a61766337e..44cd074de4 100644
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -330,7 +330,7 @@ public:
 InfEngineNgraphNode::InfEngineNgraphNode(std::shared_ptr<ngraph::Node>&& _node)
     : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) {}
 
-InfEngineNgraphNode::InfEngineNgraphNode(std::shared_ptr<ngraph::Node>& _node)
+InfEngineNgraphNode::InfEngineNgraphNode(const std::shared_ptr<ngraph::Node>& _node)
     : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) {}
 
 InfEngineNgraphNode::InfEngineNgraphNode(const std::vector<Ptr<BackendNode> >& nodes,
@@ -379,16 +379,21 @@ InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEn
     device_name = "CPU";
 }
 
-void InfEngineNgraphNet::addOutput(const std::string& name)
+void InfEngineNgraphNet::addOutput(const Ptr<InfEngineNgraphNode>& node)
 {
-    requestedOutputs.push_back(name);
+    CV_Assert(node);
+    CV_Assert(node->node);
+    const std::string& name = node->node->get_friendly_name();
+    requestedOutputs.insert({name, node});
 }
 
 void InfEngineNgraphNet::setNodePtr(std::shared_ptr<ngraph::Node>* ptr) {
     all_nodes.emplace((*ptr)->get_friendly_name(), ptr);
 }
 
- void InfEngineNgraphNet::release() {
+ void InfEngineNgraphNet::release()
+ {
+     // FIXIT release should not be conditional, release ALL
      for (auto& node : components.back()) {
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
          if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) {
@@ -397,7 +402,6 @@ void InfEngineNgraphNet::setNodePtr(std::shared_ptr<ngraph::Node>* ptr) {
 #endif
              auto it = all_nodes.find(node->get_friendly_name());
              if (it != all_nodes.end()) {
-                 unconnectedNodes.erase(*(it->second));
                  it->second->reset();
                  all_nodes.erase(it);
              }
@@ -422,7 +426,8 @@ void InfEngineNgraphNet::dfs(std::shared_ptr<ngraph::Node>& node,
     }
 }
 
-int InfEngineNgraphNet::getNumComponents() {
+int InfEngineNgraphNet::getNumComponents()
+{
     if (!components.empty()) {
         return components.size();
     }
@@ -445,17 +450,21 @@ int InfEngineNgraphNet::getNumComponents() {
 void InfEngineNgraphNet::createNet(Target targetId) {
     if (!hasNetOwner)
     {
-        CV_Assert(!unconnectedNodes.empty());
+        CV_Assert(!requestedOutputs.empty());
         ngraph::ResultVector outs;
-        for (auto& node : unconnectedNodes)
+
+        for (auto output_node_it = requestedOutputs.begin(); output_node_it != requestedOutputs.end(); ++output_node_it)
         {
-            auto out = std::make_shared<ngraph::op::Result>(node);
+            CV_LOG_DEBUG(NULL, "DNN/NGRAPH: Add 'Result' output: " << output_node_it->first);
+            CV_Assert(output_node_it->second);
+            auto out = std::make_shared<ngraph::op::Result>(output_node_it->second->node);
             outs.push_back(out);
         }
         CV_Assert_N(!inputs_vec.empty(), !outs.empty());
         ngraph_function = std::make_shared<ngraph::Function>(outs, inputs_vec);
 
         int num_comp = getNumComponents();
+        CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp);
         if (num_comp > 1) {
             for (int i = num_comp - 1; i >= 0; --i) {
                 ngraph::ResultVector outputs;
@@ -466,6 +475,7 @@ void InfEngineNgraphNet::createNet(Target targetId) {
 #else
                     if (node->is_parameter()) {
 #endif
+                        CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'");
                         auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
                         inps.push_back(parameter);
                     }
@@ -474,10 +484,12 @@ void InfEngineNgraphNet::createNet(Target targetId) {
 #else
                     else if (node->is_output()) {
 #endif
+                        CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'");
                         auto result = std::dynamic_pointer_cast<ngraph::op::Result>(node);
                         outputs.push_back(result);
                     }
                 }
+                CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size());
                 isInit = false;
                 CV_Assert_N(!inps.empty(), !outputs.empty());
                 ngraph_function = std::make_shared<ngraph::Function>(outputs, inps);
@@ -574,7 +586,7 @@ void InfEngineNgraphNet::init(Target targetId)
             auto node = ngraph_function->output(i).get_node();
             for (size_t j = 0; j < node->get_input_size(); ++j) {
                 std::string name = node->input_value(j).get_node()->get_friendly_name();
-                auto iter = std::find(requestedOutputs.begin(), requestedOutputs.end(), name);
+                auto iter = requestedOutputs.find(name);
                 if (iter != requestedOutputs.end()) {
                     requestedOutputs.erase(iter);
                     cnn.addOutput(name);
@@ -582,10 +594,6 @@ void InfEngineNgraphNet::init(Target targetId)
             }
         }
     }
-    for (const auto& name : requestedOutputs)
-    {
-        cnn.addOutput(name);
-    }
 
     for (const auto& it : cnn.getInputsInfo())
     {
@@ -630,9 +638,6 @@ ngraph::ParameterVector InfEngineNgraphNet::setInputs(const std::vector<cv::Mat>
     return current_inp;
 }
 
-void InfEngineNgraphNet::setUnconnectedNodes(Ptr<InfEngineNgraphNode>& node) {
-    unconnectedNodes.insert(node->node);
-}
 
 void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net)
 {
@@ -732,10 +737,10 @@ void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net)
                 }
             }
         }
-        if (isHetero)
-            netExec = ie.LoadNetwork(net, "HETERO:" + device_name + ",CPU", config);
-        else
-            netExec = ie.LoadNetwork(net, device_name, config);
+
+        std::string ieDevice = isHetero ? ("HETERO:" + device_name + ",CPU") : device_name;
+        CV_LOG_INFO(NULL, "DNN/IE: Calling LoadNetwork(device=" << ieDevice << ")...");
+        netExec = ie.LoadNetwork(net, ieDevice, config);
     }
     catch (const std::exception& ex)
     {
diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp
index 617f1d4542..0d287a22a5 100644
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@@ -37,7 +37,7 @@ public:
     InfEngineNgraphNet(detail::NetImplBase& netImpl);
     InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net);
 
-    void addOutput(const std::string& name);
+    void addOutput(const Ptr<InfEngineNgraphNode>& node);
 
     bool isInitialized();
     void init(Target targetId);
@@ -47,7 +47,6 @@ public:
     void initPlugin(InferenceEngine::CNNNetwork& net);
     ngraph::ParameterVector setInputs(const std::vector<cv::Mat>& inputs, const std::vector<std::string>& names);
 
-    void setUnconnectedNodes(Ptr<InfEngineNgraphNode>& node);
     void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs);
 
     void createNet(Target targetId);
@@ -88,8 +87,7 @@ public:
 
     InferenceEngine::CNNNetwork cnn;
     bool hasNetOwner;
-    std::vector<std::string> requestedOutputs;
-    std::unordered_set<std::shared_ptr<ngraph::Node>> unconnectedNodes;
+    std::unordered_map<std::string, Ptr<InfEngineNgraphNode> > requestedOutputs;
 
     std::map<std::string, InferenceEngine::TensorDesc> outputsDesc;
 };
@@ -102,7 +100,7 @@ public:
                         std::vector<Mat>& internals);
 
     InfEngineNgraphNode(std::shared_ptr<ngraph::Node>&& _node);
-    InfEngineNgraphNode(std::shared_ptr<ngraph::Node>& _node);
+    InfEngineNgraphNode(const std::shared_ptr<ngraph::Node>& _node);
 
     void setName(const std::string& name);
 
diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp
index d22a070805..bb81f14425 100644
--- a/modules/dnn/src/layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@@ -170,11 +170,14 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return preferableTarget == DNN_TARGET_CPU || dims == 4;
+#endif
         return (backendId == DNN_BACKEND_OPENCV) ||
                backendId == DNN_BACKEND_CUDA ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide()) ||
-               backendId == DNN_BACKEND_WEBNN ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && (preferableTarget == DNN_TARGET_CPU || dims == 4));
+               backendId == DNN_BACKEND_WEBNN;
     }
 
 #ifdef HAVE_OPENCL
@@ -382,16 +385,6 @@ public:
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
-        const size_t numChannels = weights_.total();
-        addConstantData("weights", wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
-        addConstantData("biases", wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp
index 59548a9c0c..0d6ab19e4d 100644
--- a/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@@ -63,9 +63,12 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_CUDA ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
+               backendId == DNN_BACKEND_CUDA;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -116,32 +119,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
-        std::vector<size_t> dims = input->getDims();
-        CV_Assert(!dims.empty());
-
-        InferenceEngine::Builder::Layer ieLayer(name);
-        ieLayer.setName(name);
-        if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
-        {
-            ieLayer.setType("Copy");
-        }
-        else
-        {
-            ieLayer.setType("Split");
-            ieLayer.getParameters()["axis"] = dims.size() - 1;
-            ieLayer.getParameters()["out_sizes"] = dims[0];
-        }
-        ieLayer.setInputPorts({InferenceEngine::Port(dims)});
-        ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp
index f620d66a39..675546f76f 100644
--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@@ -113,11 +113,13 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) ||  // By channels
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !padding) ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
                (backendId == DNN_BACKEND_WEBNN && !padding) ||
                (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding);
     }
@@ -343,18 +345,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
-
-        InferenceEngine::Builder::ConcatLayer ieLayer(name);
-        ieLayer.setAxis(normalize_axis(axis, input->getDims().size()));
-        ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/layers/const_layer.cpp b/modules/dnn/src/layers/const_layer.cpp
index 1f307b8fa6..4392763be7 100644
--- a/modules/dnn/src/layers/const_layer.cpp
+++ b/modules/dnn/src/layers/const_layer.cpp
@@ -34,9 +34,11 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
                backendId == DNN_BACKEND_WEBNN ||
                backendId == DNN_BACKEND_CUDA;
     }
@@ -78,16 +80,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::ConstLayer ieLayer(name);
-        ieLayer.setData(wrapToInfEngineBlob(blobs[0]));
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index bcc783d8a0..1af34472df 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -330,7 +330,7 @@ public:
         }
 #endif
 #ifdef HAVE_INF_ENGINE
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
             bool isArmTarget = preferableTarget == DNN_TARGET_CPU && isArmComputePlugin();
             if (isArmTarget && blobs.empty())
@@ -340,7 +340,7 @@ public:
             if (ksize == 3)
                 return preferableTarget != DNN_TARGET_MYRIAD && !isArmTarget;
             bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
-            if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || !isMyriad) && blobs.empty())
+            if (!isMyriad && blobs.empty())
                 return false;
             return (!isMyriad || dilation.width == dilation.height);
         }
@@ -421,7 +421,9 @@ public:
         if (!blobs.empty())
         {
             Mat wm = blobs[0].reshape(1, numOutput);
-            if( wm.step1() % VEC_ALIGN != 0 )
+            if ((wm.step1() % VEC_ALIGN != 0) ||
+                !isAligned<VEC_ALIGN * sizeof(float)>(wm.data)
+            )
             {
                 int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
                 Mat wm_buffer = Mat(numOutput, newcols, wm.type());
@@ -759,69 +761,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
-    {
-        CV_Assert(!blobs.empty());
-        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
-        std::vector<size_t> dims = input->getDims();
-        CV_Assert(dims.size() == 4 || dims.size() == 5);
-        const int inpCn = dims[1];
-        const int outCn = blobs[0].size[0];
-        const int inpGroupCn = blobs[0].size[1];
-        const int group = inpCn / inpGroupCn;
-        InferenceEngine::Layout layout = (dims.size() == 4) ? InferenceEngine::Layout::OIHW :
-                                                              InferenceEngine::Layout::NCDHW;
-
-        auto ieWeights = wrapToInfEngineBlob(blobs[0], layout);
-        if (fusedWeights)
-        {
-            if (weightsMat.isContinuous())
-            {
-                Mat cvWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
-                ieWeights = wrapToInfEngineBlob(cvWeights, layout);
-            }
-            else
-            {
-                ieWeights = InferenceEngine::make_shared_blob<float>({
-                                InferenceEngine::Precision::FP32,
-                                ieWeights->getTensorDesc().getDims(), layout
-                            });
-                ieWeights->allocate();
-
-                Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, outCn);
-                Mat cvWeights = weightsMat.colRange(0, newWeights.cols);
-                cvWeights.copyTo(newWeights);
-            }
-        }
-        InferenceEngine::Blob::Ptr ieBiases;
-        if (hasBias() || fusedBias)
-        {
-            Mat biasesMat({outCn}, CV_32F, &biasvec[0]);
-            ieBiases = wrapToInfEngineBlob(biasesMat, {(size_t)outCn}, InferenceEngine::Layout::C);
-        }
-
-        InferenceEngine::Builder::ConvolutionLayer ieLayer(name);
-
-        ieLayer.setKernel(kernel_size);
-        ieLayer.setStrides(strides);
-        ieLayer.setDilation(dilations);
-        ieLayer.setPaddingsBegin(pads_begin);
-        ieLayer.setPaddingsEnd(pads_end);
-        ieLayer.setGroup((size_t)group);
-        ieLayer.setOutDepth((size_t)outCn);
-
-        InferenceEngine::Builder::Layer l = ieLayer;
-        addConstantData("weights", ieWeights, l);
-        if (ieBiases)
-            addConstantData("biases", ieBiases, l);
-
-        if (!padMode.empty())
-            l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper");
-
-        return Ptr<BackendNode>(new InfEngineBackendNode(l));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
@@ -1660,7 +1599,6 @@ public:
                                 }
                             }
                         }
-
                         // now compute dot product of the weights
                         // and im2row-transformed part of the tensor
                     #if CV_TRY_AVX512_SKX
@@ -1995,13 +1933,6 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-#if CV_SSE3
-        uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE();
-        uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE();
-        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
-        _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
-#endif
-
         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
@@ -2138,10 +2069,6 @@ public:
             ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
                             kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
         }
-#if CV_SSE3
-        _MM_SET_FLUSH_ZERO_MODE(ftzMode);
-        _MM_SET_DENORMALS_ZERO_MODE(dazMode);
-#endif
     }
 
 #ifdef HAVE_CUDA
@@ -2329,52 +2256,6 @@ public:
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
             return group == 1;
         }
-
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        {
-            if (kernel_size.size() == 3 && preferableTarget != DNN_TARGET_CPU) {
-                return false;
-            }
-
-            if (std::accumulate(adjust_pads.begin(), adjust_pads.end(), 0, std::plus<size_t>()) > 0)
-            {
-                if (padMode.empty())
-                {
-                    if (preferableTarget != DNN_TARGET_CPU && group != 1)
-                    {
-                        for (int i = 0; i < adjust_pads.size(); i++) {
-                            if (adjust_pads[i] && pads_begin[i])
-                                return false;
-                        }
-                    }
-                    for (int i = 0; i < adjust_pads.size(); i++) {
-                        if (pads_end[i] < adjust_pads[i])
-                            return false;
-                    }
-                    return true;
-                }
-                else if (padMode == "SAME")
-                {
-                    for (int i = 0; i < adjust_pads.size(); i++) {
-                        if (kernel_size[i] < pads_begin[i] + 1 + adjust_pads[i])
-                            return false;
-                    }
-                    return true;
-                }
-                else if (padMode == "VALID")
-                    return false;
-            }
-
-            if (group != 1)
-            {
-                return preferableTarget == DNN_TARGET_CPU;
-            }
-            if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16)
-                return std::accumulate(dilations.begin(), dilations.end(), 1, std::multiplies<size_t>()) == 1;
-            return true;
-        }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 #endif  // HAVE_INF_ENGINE
         {
             return backendId == DNN_BACKEND_CUDA ||
@@ -3032,64 +2913,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &) CV_OVERRIDE
-    {
-        CV_Assert(!blobs.empty());
-        InferenceEngine::Layout layout = blobs[0].dims == 5? InferenceEngine::Layout::NCDHW :
-                                                             InferenceEngine::Layout::OIHW;
-
-        auto ieWeights = wrapToInfEngineBlob(blobs[0], layout);
-        if (fusedWeights)
-        {
-            ieWeights = InferenceEngine::make_shared_blob<float>({
-                            InferenceEngine::Precision::FP32,
-                            ieWeights->getTensorDesc().getDims(), layout
-                        });
-            ieWeights->allocate();
-
-            int inpCn = blobs[0].size[0];
-            Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, inpCn);
-            transpose(weightsMat, newWeights);
-        }
-
-        const int outGroupCn = blobs[0].size[1];  // Weights are in IOHW or OIDHW layout
-        const int group = numOutput / outGroupCn;
-
-        InferenceEngine::Builder::DeconvolutionLayer ieLayer(name);
-
-        ieLayer.setKernel(kernel_size);
-        ieLayer.setStrides(strides);
-        ieLayer.setDilation(dilations);
-        ieLayer.setPaddingsBegin(pads_begin);
-
-        if (padMode.empty())
-        {
-            std::vector<size_t> paddings_end;
-            for (int i = 0; i < pads_end.size(); i++) {
-                paddings_end.push_back(pads_end[i] - adjust_pads[i]);
-            }
-            ieLayer.setPaddingsEnd(paddings_end);
-        }
-        else if (padMode == "SAME")
-        {
-            std::vector<size_t> paddings_end;
-            for (int i = 0; i < pads_begin.size(); i++) {
-                paddings_end.push_back(kernel_size[i] - pads_begin[i] - 1 - adjust_pads[i]);
-            }
-            ieLayer.setPaddingsEnd(paddings_end);
-        }
-        ieLayer.setGroup((size_t)group);
-        ieLayer.setOutDepth((size_t)numOutput);
-
-        InferenceEngine::Builder::Layer l = ieLayer;
-        addConstantData("weights", ieWeights, l);
-        if (hasBias())
-            addConstantData("biases", wrapToInfEngineBlob(biasesMat, {(size_t)numOutput}, InferenceEngine::Layout::C), l);
-        return Ptr<BackendNode>(new InfEngineBackendNode(l));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
index 77d86d5652..61d4f44432 100644
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -221,7 +221,7 @@ public:
     {
         return backendId == DNN_BACKEND_OPENCV ||
                (backendId == DNN_BACKEND_CUDA && !_groupByClasses) ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && !_locPredTransposed && _bboxesNormalized);
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized);
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -1001,30 +1001,6 @@ public:
     }
 #endif
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::DetectionOutputLayer ieLayer(name);
-
-        ieLayer.setNumClasses(_numClasses);
-        ieLayer.setShareLocation(_shareLocation);
-        ieLayer.setBackgroudLabelId(_backgroundLabelId);
-        ieLayer.setNMSThreshold(_nmsThreshold);
-        ieLayer.setTopK(_topK > 0 ? _topK : _keepTopK);
-        ieLayer.setKeepTopK(_keepTopK);
-        ieLayer.setConfidenceThreshold(_confidenceThreshold);
-        ieLayer.setVariantEncodedInTarget(_varianceEncodedInTarget);
-        ieLayer.setCodeType("caffe.PriorBoxParameter." + _codeType);
-        ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(3));
-
-        InferenceEngine::Builder::Layer l = ieLayer;
-        l.getParameters()["eta"] = std::string("1.0");
-        l.getParameters()["clip"] = _clip;
-
-        return Ptr<BackendNode>(new InfEngineBackendNode(l));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
index bfabef9d68..0085ba7449 100644
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -186,14 +186,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::Layer ieLayer = func.initInfEngineBuilderAPI();
-        ieLayer.setName(this->name);
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
@@ -341,10 +333,6 @@ struct ReLUFunctor : public BaseFunctor
 
     bool supportBackend(int backendId, int)
     {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-            return slope >= 0 || !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1);
-#endif
 #ifdef HAVE_DNN_NGRAPH
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
             return true;
@@ -462,13 +450,6 @@ struct ReLUFunctor : public BaseFunctor
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(slope);
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
     {
@@ -534,11 +515,14 @@ struct ReLU6Functor : public BaseFunctor
 
     bool supportBackend(int backendId, int)
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                backendId == DNN_BACKEND_HALIDE ||
-               backendId == DNN_BACKEND_WEBNN ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+               backendId == DNN_BACKEND_WEBNN;
     }
 
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
@@ -620,12 +604,6 @@ struct ReLU6Functor : public BaseFunctor
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        return InferenceEngine::Builder::ClampLayer("").setMinValue(minValue).setMaxValue(maxValue);
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
@@ -743,12 +721,6 @@ struct BaseDefaultFunctor : public BaseFunctor
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        CV_Error(Error::StsNotImplemented, "");
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
@@ -782,10 +754,13 @@ struct TanHFunctor : public BaseDefaultFunctor<TanHFunctor>
 
     bool supportBackend(int backendId, int)
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
-               backendId == DNN_BACKEND_HALIDE ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+               backendId == DNN_BACKEND_HALIDE;
     }
 
     inline float calculate(float x) const
@@ -808,13 +783,6 @@ struct TanHFunctor : public BaseDefaultFunctor<TanHFunctor>
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        return InferenceEngine::Builder::TanHLayer("");
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
     {
@@ -937,10 +905,13 @@ struct SigmoidFunctor : public BaseDefaultFunctor<SigmoidFunctor>
 
     bool supportBackend(int backendId, int)
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
-               backendId == DNN_BACKEND_HALIDE ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||  backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+               backendId == DNN_BACKEND_HALIDE;
     }
 
     inline float calculate(float x) const
@@ -963,12 +934,6 @@ struct SigmoidFunctor : public BaseDefaultFunctor<SigmoidFunctor>
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        return InferenceEngine::Builder::SigmoidLayer("");
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
@@ -992,10 +957,13 @@ struct ELUFunctor : public BaseDefaultFunctor<ELUFunctor>
 
     bool supportBackend(int backendId, int)
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
-               backendId == DNN_BACKEND_HALIDE ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||  backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+               backendId == DNN_BACKEND_HALIDE;
     }
 
     inline float calculate(float x) const
@@ -1023,13 +991,6 @@ struct ELUFunctor : public BaseDefaultFunctor<ELUFunctor>
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        return InferenceEngine::Builder::ELULayer("");
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
     {
@@ -1050,8 +1011,8 @@ struct AbsValFunctor : public BaseDefaultFunctor<AbsValFunctor>
     bool supportBackend(int backendId, int)
     {
 #ifdef HAVE_INF_ENGINE
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-            return !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1);
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
 #endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
@@ -1078,12 +1039,6 @@ struct AbsValFunctor : public BaseDefaultFunctor<AbsValFunctor>
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-0.999999f);
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
@@ -1930,14 +1885,15 @@ struct PowerFunctor : public BaseFunctor
 
     bool supportBackend(int backendId, int targetId)
     {
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-            return (targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) || power == 1.0 || power == 0.5;
+#ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
             return true;
-        else
+#endif
+        {
             return backendId == DNN_BACKEND_OPENCV ||
                    backendId == DNN_BACKEND_CUDA ||
                    backendId == DNN_BACKEND_HALIDE;
+        }
     }
 
     void finalize()
@@ -2029,14 +1985,6 @@ struct PowerFunctor : public BaseFunctor
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        return InferenceEngine::Builder::PowerLayer("").setPower(power)
-                                                       .setScale(scale)
-                                                       .setShift(shift);
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
@@ -2189,10 +2137,13 @@ struct ChannelsPReLUFunctor : public BaseFunctor
 
     bool supportBackend(int backendId, int)
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
-               backendId == DNN_BACKEND_HALIDE ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+               backendId == DNN_BACKEND_HALIDE;
     }
 
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
@@ -2282,15 +2233,6 @@ struct ChannelsPReLUFunctor : public BaseFunctor
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
-    {
-        InferenceEngine::Builder::Layer l = InferenceEngine::Builder::PReLULayer("");
-        const size_t numChannels = scale.total();
-        addConstantData("weights", wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C), l);
-        return l;
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp
index 2c473ff412..43d925055c 100644
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -164,6 +164,11 @@ public:
         if (hasVecInput && ELTWISE_CHANNNELS_SAME)
             return backendId == DNN_BACKEND_OPENCV;
 
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return channelsMode == ELTWISE_CHANNNELS_SAME;
+#endif
+
         if (backendId == DNN_BACKEND_CUDA)
         {
             if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
@@ -172,9 +177,8 @@ public:
         }
 
         return backendId == DNN_BACKEND_OPENCV ||
-               (backendId == DNN_BACKEND_HALIDE && op != DIV) ||  // TODO: not implemented, see PR #15811
-               ((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()))
-                || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && channelsMode == ELTWISE_CHANNNELS_SAME));
+               (backendId == DNN_BACKEND_HALIDE && op != DIV)  // TODO: not implemented, see PR #15811
+               ;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -837,34 +841,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::EltwiseLayer ieLayer(name);
-
-        ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
-
-        if (op == SUM)
-            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM);
-        else if (op == PROD)
-            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL);
-        else if (op == DIV)
-            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV);
-        else if (op == MAX)
-            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
-        else if (op == MIN)
-            ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MIN);
-        else
-            CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
-
-        InferenceEngine::Builder::Layer l = ieLayer;
-        if (!coeffs.empty())
-            l.getParameters()["coeff"] = coeffs;
-
-        return Ptr<BackendNode>(new InfEngineBackendNode(l));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp
index 69bc422ee3..b3f57dc7cd 100644
--- a/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
@@ -72,9 +72,12 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_CUDA ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
+               backendId == DNN_BACKEND_CUDA;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -171,25 +174,10 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::Layer ieLayer(name);
-        ieLayer.setName(name);
-        ieLayer.setType("Flatten");
-        ieLayer.getParameters()["axis"] = (size_t)_startAxis;
-        ieLayer.getParameters()["end_axis"] = _endAxis;  // Do not cast to size_t because it might be negative.
-        ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
-        ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
-virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
-                                    const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
-{
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
         std::vector<size_t> dims = ieInpNode->get_shape();
 
diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp
index 99acba908b..5d0ad5fde7 100644
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@@ -148,12 +148,15 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return axis == 1;
+#endif
+
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) ||
-               (backendId == DNN_BACKEND_WEBNN && axis == 1) ||
-               (((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && !blobs.empty()) ||
-                backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
+               (backendId == DNN_BACKEND_WEBNN && axis == 1);
     }
 
     virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
@@ -570,23 +573,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::FullyConnectedLayer ieLayer(name);
-
-        const int outNum = blobs[0].size[0];
-        ieLayer.setOutputNum(outNum);
-
-        InferenceEngine::Builder::Layer l = ieLayer;
-        addConstantData("weights", wrapToInfEngineBlob(blobs[0], {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW), l);
-        if (bias)
-            addConstantData("biases", wrapToInfEngineBlob(blobs[1], {(size_t)outNum}, InferenceEngine::Layout::C), l);
-
-        return Ptr<BackendNode>(new InfEngineBackendNode(l));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp
index 67a4b3c065..fd88a3c3d2 100644
--- a/modules/dnn/src/layers/layers_common.simd.hpp
+++ b/modules/dnn/src/layers/layers_common.simd.hpp
@@ -81,6 +81,8 @@ void fastConv( const float* weights, size_t wstep, const float* bias,
                int blockSize, int vecsize, int vecsize_aligned,
                const float* relu, bool initOutput )
 {
+    CV_Assert(isAligned<32>(weights));
+
     int outCn = outShape[1];
     size_t outPlaneSize = outShape[2]*outShape[3];
     float r0 = 1.f, r1 = 1.f, r2 = 1.f;
diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp
index 224441b0e7..6c3a654159 100644
--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@@ -99,12 +99,10 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
             return bias == (int)bias;
-        }
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
-            return bias == (int)bias;
-        }
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                backendId == DNN_BACKEND_HALIDE ||
@@ -444,24 +442,6 @@ public:
 #endif  // HAVE_HALIDE
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        float alphaSize = alpha;
-        if (!normBySize)
-            alphaSize *= (type == SPATIAL_NRM ? size*size : size);
-
-        InferenceEngine::Builder::NormLayer ieLayer(name);
-        ieLayer.setSize(size);
-        ieLayer.setAlpha(alphaSize);
-        ieLayer.setBeta(beta);
-        ieLayer.setAcrossMaps(type == CHANNEL_NRM);
-
-        InferenceEngine::Builder::Layer l = ieLayer;
-        l.getParameters()["k"] = bias;
-        return Ptr<BackendNode>(new InfEngineBackendNode(l));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp
index 455f4997df..dc23656b7a 100644
--- a/modules/dnn/src/layers/mvn_layer.cpp
+++ b/modules/dnn/src/layers/mvn_layer.cpp
@@ -124,14 +124,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        {
-            bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
-            return !zeroDev && (!isMyriad || eps <= 1e-7f);
-        }
-#endif
-#ifdef HAVE_DNN_NGRAPH
+#ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
             return true;
 #endif
@@ -387,16 +380,6 @@ public:
         }
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::MVNLayer ieLayer(name);
-        ieLayer.setAcrossChannels(acrossChannels);
-        ieLayer.setNormalize(normVariance);
-        ieLayer.setEpsilon(eps);
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp
index 236f2e43f1..2017d76801 100644
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@ -70,17 +70,15 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
             if (pnorm != 2)
                 return false;
 
-            bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
-            if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && isMyriad)
-                return !acrossSpatial;
-
             return startAxis == 1;
         }
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                (backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
     }
@@ -270,58 +268,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
-        std::vector<size_t> dims = input->getDims();
-        if (dims.size() == 4)
-        {
-            InferenceEngine::Builder::NormalizeLayer ieLayer(name);
-
-            ieLayer.setChannelShared(false);
-            ieLayer.setAcrossMaps(acrossSpatial);
-            ieLayer.setEpsilon(epsilon);
-
-            InferenceEngine::Builder::Layer l = ieLayer;
-            const int numChannels = dims[1];
-            InferenceEngine::Blob::Ptr weights;
-            if (blobs.empty())
-            {
-                weights = InferenceEngine::make_shared_blob<float>({
-                              InferenceEngine::Precision::FP32,
-                              {(size_t)numChannels}, InferenceEngine::Layout::C
-                          });
-                weights->allocate();
-
-                Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels);
-                Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat);
-                l.getParameters()["channel_shared"] = false;
-            }
-            else
-            {
-                CV_Assert(numChannels == blobs[0].total());
-                weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C);
-                l.getParameters()["channel_shared"] = blobs[0].total() == 1;
-            }
-            addConstantData("weights", weights, l);
-            l.getParameters()["across_spatial"] = acrossSpatial;
-            return Ptr<BackendNode>(new InfEngineBackendNode(l));
-        }
-        else
-        {
-            InferenceEngine::Builder::GRNLayer ieLayer(name);
-            ieLayer.setBeta(epsilon);
-
-            InferenceEngine::Builder::Layer l = ieLayer;
-            l.getParameters()["bias"] = epsilon;
-
-            return Ptr<BackendNode>(new InfEngineBackendNode(l));
-        }
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/not_implemented_layer.cpp b/modules/dnn/src/layers/not_implemented_layer.cpp
index c4b1343902..3fd52c09d9 100644
--- a/modules/dnn/src/layers/not_implemented_layer.cpp
+++ b/modules/dnn/src/layers/not_implemented_layer.cpp
@@ -87,11 +87,6 @@ public:
         CV_Error(Error::StsNotImplemented, msg);
     }
 
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
-    {
-        CV_Error(Error::StsNotImplemented, msg);
-    }
-
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp
index 7534145f53..aea8ab3168 100644
--- a/modules/dnn/src/layers/padding_layer.cpp
+++ b/modules/dnn/src/layers/padding_layer.cpp
@@ -102,10 +102,10 @@ public:
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
 #ifdef HAVE_INF_ENGINE
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
             bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
-            if (INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && isMyriad)
+            if (isMyriad)
                 return dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0;
 
             return (dstRanges.size() <= 4 || !isArmComputePlugin());
@@ -219,30 +219,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::Layer ieLayer(name);
-        ieLayer.setName(name);
-        ieLayer.setType("Pad");
-
-        std::vector<int> begins(paddings.size(), 0), ends(paddings.size(), 0);
-        for (int i = 0; i < paddings.size(); ++i)
-        {
-            begins[i] = paddings[i].first;
-            ends[i] = paddings[i].second;
-        }
-        ieLayer.getParameters()["pads_begin"] = begins;
-        ieLayer.getParameters()["pads_end"] = ends;
-        ieLayer.getParameters()["pad_mode"] = paddingType;
-        if (paddingType == "constant")
-            ieLayer.getParameters()["pad_value"] = paddingValue;
-
-        ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
-        ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp
index 9e66eb6a64..033b3d9aee 100644
--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
@@ -115,13 +115,16 @@ public:
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
 #ifdef HAVE_INF_ENGINE
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && preferableTarget == DNN_TARGET_CPU)
-            return _order.size() <= 4 || !isArmComputePlugin();
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            if (preferableTarget == DNN_TARGET_CPU)
+                return _order.size() <= 4 || !isArmComputePlugin();
+            return true;
+        }
 #endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                backendId == DNN_BACKEND_WEBNN ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()) ||
                (backendId == DNN_BACKEND_VKCOM && haveVulkan());
     }
 
@@ -418,16 +421,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::PermuteLayer ieLayer(name);
-        ieLayer.setOrder(_order);
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 7cb86a9515..f8616a4184 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -199,34 +199,13 @@ public:
         {
             return type == MAX || type == AVE || type == ROI;
         }
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        {
-            if (computeMaxIdx)
-                return false;
-            if (kernel_size.size() == 3)
-                return preferableTarget == DNN_TARGET_CPU;
-            if (kernel_size.size() == 1)
-                return false;
-            if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) {
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-                if (type == MAX && (pads_begin[1] == 1 && pads_begin[0] == 1) && (strides[0] == 2 && strides[1] == 2)) {
-                    return !isMyriadX();
-                }
-#endif
-                return type == MAX || type == AVE;
-            }
-            else
-                return type != STOCHASTIC && type != SUM;
-        }
-#endif
+#ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
-#ifdef HAVE_DNN_NGRAPH
             return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
-#endif
         }
-        else if (backendId == DNN_BACKEND_OPENCV)
+#endif
+        if (backendId == DNN_BACKEND_OPENCV)
         {
             if (kernel_size.size() == 3)
                 return preferableTarget == DNN_TARGET_CPU;
@@ -550,54 +529,6 @@ public:
             return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        if (type == MAX || type == AVE)
-        {
-            InferenceEngine::Builder::PoolingLayer ieLayer(name);
-
-            ieLayer.setKernel(kernel_size);
-            ieLayer.setStrides(strides);
-            ieLayer.setPaddingsBegin(pads_begin);
-            ieLayer.setPaddingsEnd(pads_end);
-
-            ieLayer.setPoolingType(type == MAX ?
-                                   InferenceEngine::Builder::PoolingLayer::PoolingType::MAX :
-                                   InferenceEngine::Builder::PoolingLayer::PoolingType::AVG);
-            ieLayer.setRoundingType(ceilMode ?
-                                    InferenceEngine::Builder::PoolingLayer::RoundingType::CEIL :
-                                    InferenceEngine::Builder::PoolingLayer::RoundingType::FLOOR);
-            ieLayer.setExcludePad(!avePoolPaddedArea);
-
-            InferenceEngine::Builder::Layer l = ieLayer;
-            if (!padMode.empty())
-                l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper");
-            return Ptr<BackendNode>(new InfEngineBackendNode(l));
-        }
-        else if (type == ROI)
-        {
-            InferenceEngine::Builder::ROIPoolingLayer ieLayer(name);
-            ieLayer.setSpatialScale(spatialScale);
-            ieLayer.setPooled({pooledSize.height, pooledSize.width});
-            ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
-            return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-        }
-        else if (type == PSROI)
-        {
-            InferenceEngine::Builder::PSROIPoolingLayer ieLayer(name);
-            ieLayer.setSpatialScale(spatialScale);
-            ieLayer.setOutputDim(psRoiOutChannels);
-            ieLayer.setGroupSize(pooledSize.width);
-            ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
-            return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-        }
-        else
-            CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
-        return Ptr<BackendNode>();
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp
index f7340b1e67..160b36c18d 100644
--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@@ -298,9 +298,7 @@ public:
 #endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() &&
-                   ( _explicitSizes || (_minSize.size() == 1 && _maxSize.size() <= 1)))
-               || (backendId == DNN_BACKEND_VKCOM && haveVulkan());
+               (backendId == DNN_BACKEND_VKCOM && haveVulkan());
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -510,69 +508,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        if (_explicitSizes)
-        {
-            InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
-            ieLayer.setSteps({_stepY, _stepX});
-
-            CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
-            ieLayer.setOffset(_offsetsX[0]);
-
-            ieLayer.setClip(_clip);
-            ieLayer.setFlip(false);  // We already flipped aspect ratios.
-
-            InferenceEngine::Builder::Layer l = ieLayer;
-
-            CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
-            CV_Assert(_boxWidths.size() == _boxHeights.size());
-            l.getParameters()["width"] = _boxWidths;
-            l.getParameters()["height"] = _boxHeights;
-            l.getParameters()["variance"] = _variance;
-            return Ptr<BackendNode>(new InfEngineBackendNode(l));
-        }
-        else
-        {
-            InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
-
-            CV_Assert(!_explicitSizes);
-            ieLayer.setMinSize(_minSize[0]);
-            if (!_maxSize.empty())
-                ieLayer.setMaxSize(_maxSize[0]);
-
-            CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
-            ieLayer.setOffset(_offsetsX[0]);
-
-            ieLayer.setClip(_clip);
-            ieLayer.setFlip(false);  // We already flipped aspect ratios.
-
-            InferenceEngine::Builder::Layer l = ieLayer;
-            if (_stepX == _stepY)
-            {
-                l.getParameters()["step"] = _stepX;
-                l.getParameters()["step_h"] = 0.0f;
-                l.getParameters()["step_w"] = 0.0f;
-            }
-            else
-            {
-                l.getParameters()["step"] = 0.0f;
-                l.getParameters()["step_h"] = _stepY;
-                l.getParameters()["step_w"] = _stepX;
-            }
-            if (!_aspectRatios.empty())
-            {
-                l.getParameters()["aspect_ratio"] = _aspectRatios;
-            }
-            CV_Assert(!_variance.empty());
-            l.getParameters()["variance"] = _variance;
-            return Ptr<BackendNode>(new InfEngineBackendNode(l));
-        }
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp
index aeb5d44a47..e9edcf1547 100644
--- a/modules/dnn/src/layers/proposal_layer.cpp
+++ b/modules/dnn/src/layers/proposal_layer.cpp
@@ -96,7 +96,7 @@ public:
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
 #ifdef HAVE_INF_ENGINE
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
             bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
             return !isMyriad;
@@ -338,32 +338,6 @@ public:
         layerOutputs[0].col(2).copyTo(dst);
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::ProposalLayer ieLayer(name);
-
-        ieLayer.setBaseSize(baseSize);
-        ieLayer.setFeatStride(featStride);
-        ieLayer.setMinSize(16);
-        ieLayer.setNMSThresh(nmsThreshold);
-        ieLayer.setPostNMSTopN(keepTopAfterNMS);
-        ieLayer.setPreNMSTopN(keepTopBeforeNMS);
-
-        std::vector<float> scalesVec(scales.size());
-        for (int i = 0; i < scales.size(); ++i)
-            scalesVec[i] = scales.get<float>(i);
-        ieLayer.setScale(scalesVec);
-
-        std::vector<float> ratiosVec(ratios.size());
-        for (int i = 0; i < ratios.size(); ++i)
-            ratiosVec[i] = ratios.get<float>(i);
-        ieLayer.setRatio(ratiosVec);
-
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp
index 14210becb4..dbb3e2700a 100644
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
@@ -184,7 +184,7 @@ public:
         CV_Assert(!reverse || !bidirectional);
 
         // read activations
-        DictValue activations = params.get<DictValue>("activations", "");
+        DictValue activations = params.get<DictValue>("activations", DictValue(String()));
         if (activations.size() == 1) // if activations wasn't specified use default
         {
             f_activation = sigmoid;
diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp
index 797df4819d..ac7d1abfb1 100644
--- a/modules/dnn/src/layers/reorg_layer.cpp
+++ b/modules/dnn/src/layers/reorg_layer.cpp
@@ -151,10 +151,12 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_CUDA ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+               backendId == DNN_BACKEND_CUDA;
     }
 
 #ifdef HAVE_OPENCL
@@ -198,16 +200,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::ReorgYoloLayer ieLayer(name);
-        ieLayer.setStride(reorgStride);
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp
index 0ba3abf047..f62235dc20 100644
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@@ -202,10 +202,13 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
-               backendId == DNN_BACKEND_WEBNN ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
+               backendId == DNN_BACKEND_WEBNN;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -306,17 +309,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::ReshapeLayer ieLayer(name);
-        CV_Assert(outShapes.size() == 1);
-        ieLayer.setDims(outShapes[0]);
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp
index 42eb2e2331..ab640dbf3f 100644
--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@@ -78,7 +78,7 @@ public:
             return interpolation == "nearest" || interpolation == "bilinear" || interpolation == "opencv_linear";
 
 #ifdef HAVE_INF_ENGINE
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
             return (interpolation == "nearest" && scaleWidth == scaleHeight) ||
                    (interpolation == "bilinear");
@@ -308,38 +308,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::Layer ieLayer(name);
-        ieLayer.setName(name);
-        if (interpolation == "nearest")
-        {
-            ieLayer.setType("Resample");
-            ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST");
-            ieLayer.getParameters()["antialias"] = false;
-            if (scaleWidth != scaleHeight)
-                CV_Error(Error::StsNotImplemented, "resample with sw != sh");
-            ieLayer.getParameters()["factor"] = 1.0f / scaleWidth;
-        }
-        else if (interpolation == "bilinear")
-        {
-            ieLayer.setType("Interp");
-            ieLayer.getParameters()["pad_beg"] = 0;
-            ieLayer.getParameters()["pad_end"] = 0;
-            ieLayer.getParameters()["align_corners"] = alignCorners;
-        }
-        else
-            CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
-        ieLayer.getParameters()["width"] = outWidth;
-        ieLayer.getParameters()["height"] = outHeight;
-        ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
-        ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp
index fcee451556..d727734cf3 100644
--- a/modules/dnn/src/layers/scale_layer.cpp
+++ b/modules/dnn/src/layers/scale_layer.cpp
@@ -78,11 +78,13 @@ public:
         {
             return backendId == DNN_BACKEND_OPENCV;
         }
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return axis > 0;
+#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                backendId == DNN_BACKEND_HALIDE ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1 && !blobs.empty()) ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0) ||
                (backendId == DNN_BACKEND_WEBNN && axis >0);
     }
 
@@ -314,34 +316,6 @@ public:
     }
 #endif  // HAVE_HALIDE
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-        InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ScaleShiftLayer(name);
-
-        CV_Assert(!blobs.empty());
-        const size_t numChannels = blobs[0].total();
-        if (hasWeights)
-        {
-            addConstantData("weights", wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C), l);
-        }
-        else
-        {
-            auto weights = InferenceEngine::make_shared_blob<float>({
-                               InferenceEngine::Precision::FP32, {(size_t)numChannels},
-                               InferenceEngine::Layout::C
-                           });
-            weights->allocate();
-            float* buf = weights->buffer().as<float*>();
-            std::fill(buf, buf + numChannels, 1);
-            addConstantData("weights", weights, l);
-        }
-        if (hasBias)
-            addConstantData("biases", wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C), l);
-        return Ptr<BackendNode>(new InfEngineBackendNode(l));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
-
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
index 20b493636a..4370e566a5 100644
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@ -166,12 +166,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-            return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
-                sliceRanges.size() == 1 && sliceRanges[0].size() == 4 && !hasSteps;
-#endif
-#ifdef HAVE_DNN_NGRAPH
+#ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
             return sliceRanges.size() == 1 && !hasSteps;
 #endif
@@ -573,64 +568,6 @@ public:
     }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        CV_Assert_N(finalSliceRanges.size() == 1, inputs.size() <= 2);
-
-        std::vector<size_t> axes, offsets, dims;
-        int from, to, step;
-        int numDims = finalSliceRanges[0].size();
-        if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
-        {
-            from = axis;
-            to = numDims;
-            step = 1;
-        }
-        else
-        {
-            from = numDims - 1;
-            to = axis - 1;
-            step = -1;
-        }
-        for (int i = from; i != to; i += step)
-        {
-            axes.push_back(i);
-            offsets.push_back(finalSliceRanges[0][i].start);
-            dims.push_back(finalSliceRanges[0][i].size());
-        }
-
-        InferenceEngine::Builder::Layer ieLayer(name);
-        ieLayer.setName(name);
-        ieLayer.setType("Crop");
-        ieLayer.getParameters()["axis"] = axes;
-        ieLayer.getParameters()["dim"] = dims;
-        ieLayer.getParameters()["offset"] = offsets;
-        ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
-        ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
-
-        if (inputs.size() != 2)
-        {
-            std::vector<size_t> outShape(numDims);
-            for (int i = 0; i < numDims; ++i)
-                outShape[i] = finalSliceRanges[0][i].size();
-
-            ieLayer.getInputPorts()[1].setParameter("type", "weights");
-
-            auto shapeSource = InferenceEngine::make_shared_blob<float>({
-                                   InferenceEngine::Precision::FP32, outShape,
-                                   InferenceEngine::Layout::ANY
-                               });
-            shapeSource->allocate();
-            addConstantData("weights", shapeSource, ieLayer);
-        }
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif
-#endif
-
-
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp
index db2951808f..790f181325 100644
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@@ -99,6 +99,10 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
+#ifdef HAVE_INF_ENGINE
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            return true;
+#endif
 #ifdef HAVE_WEBNN
         if (backendId == DNN_BACKEND_WEBNN) {
             // TODO: support logSoftMax
@@ -112,8 +116,6 @@ public:
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !logSoftMax) ||
                (backendId == DNN_BACKEND_VKCOM && haveVulkan());
     }
 
@@ -360,17 +362,6 @@ public:
         return Ptr<BackendNode>();
     }
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
-    {
-        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
-
-        InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
-        ieLayer.setAxis(normalize_axis(axisRaw, input->getDims().size()));
-
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-    }
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index a2b28462e8..7cfc546b12 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -10,6 +10,8 @@
 
 #include <opencv2/dnn/layer_reg.private.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #undef CV_LOG_STRIP_LEVEL
 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
@@ -48,8 +50,12 @@ CV__DNN_INLINE_NS_BEGIN
 
 extern bool DNN_DIAGNOSTICS_RUN;
 
+class ONNXLayerHandler;
+
 class ONNXImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     opencv_onnx::ModelProto model_proto;
     struct LayerInfo {
         int layerId;
@@ -80,7 +86,7 @@ public:
     void populateNet();
 
 protected:
-    std::unique_ptr<detail::LayerHandler> missingLayerHandler;
+    std::unique_ptr<ONNXLayerHandler> layerHandler;
     Net& dstNet;
 
     opencv_onnx::GraphProto graph_proto;
@@ -94,15 +100,19 @@ protected:
 
     std::map<std::string, LayerInfo> layer_id;
     typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;
+    typedef std::map<std::string, LayerInfo>::const_iterator ConstIterLayerId_t;
 
     void handleNode(const opencv_onnx::NodeProto& node_proto);
 
 private:
+    friend class ONNXLayerHandler;
     typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
     typedef std::map<std::string, ONNXImporterNodeParser> DispatchMap;
     typedef std::map<std::string, DispatchMap> DomainDispatchMap;
 
     DomainDispatchMap domain_dispatch_map;
+    std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto);
+    const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto);
     void buildDispatchMap_ONNX_AI(int opset_version);
     void buildDispatchMap_COM_MICROSOFT(int opset_version);
 
@@ -156,6 +166,7 @@ private:
     void parseSoftMax              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
     void parseDetectionOutput      (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
     void parseCumSum               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+    void parseSimpleLayers         (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 
     // Domain: com.microsoft
     // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
@@ -178,9 +189,38 @@ private:
     const std::string str_domain_ai_onnx = "ai.onnx";
 };
 
+class ONNXLayerHandler : public detail::LayerHandler
+{
+public:
+    explicit ONNXLayerHandler(ONNXImporter* importer_);
+
+    void fillRegistry(const opencv_onnx::GraphProto& net);
+
+protected:
+    ONNXImporter* importer;
+};
+
+ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){}
+
+void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net)
+{
+    int layersSize = net.node_size();
+    for (int li = 0; li < layersSize; li++) {
+        const opencv_onnx::NodeProto &node_proto = net.node(li);
+        const std::string& name = node_proto.output(0);
+        const std::string& type = node_proto.op_type();
+        const std::string& layer_type_domain = importer->getLayerTypeDomain(node_proto);
+        const auto& dispatch = importer->getDispatchMap(node_proto);
+        if (dispatch.find(type) == dispatch.end())
+        {
+            addMissing(name, cv::format("%s.%s", layer_type_domain.c_str(), type.c_str()));
+        }
+    }
+    printMissing();
+}
 
 ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
-    : missingLayerHandler(DNN_DIAGNOSTICS_RUN ? new detail::LayerHandler() : nullptr)
+    : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
     , dstNet(net)
     , onnx_opset(0)
 {
@@ -203,7 +243,7 @@ ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
 }
 
 ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
-    : missingLayerHandler(DNN_DIAGNOSTICS_RUN ? new detail::LayerHandler() : nullptr)
+    : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
     , dstNet(net)
     , onnx_opset(0)
 {
@@ -517,7 +557,11 @@ void ONNXImporter::addLayer(LayerParams& layerParams,
     int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams);
     for (int i = 0; i < node_proto.output_size(); ++i)
     {
-        layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i)));
+        const std::string& output_name = node_proto.output(i);
+        if (!output_name.empty())
+        {
+            layer_id.insert(std::make_pair(output_name, LayerInfo(id, i)));
+        }
     }
 
     std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
@@ -540,7 +584,11 @@ void ONNXImporter::addLayer(LayerParams& layerParams,
     layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
     for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
     {
-        outShapes[node_proto.output(i)] = layerOutShapes[i];
+        const std::string& output_name = node_proto.output(i);
+        if (!output_name.empty())
+        {
+            outShapes[node_proto.output(i)] = layerOutShapes[i];
+        }
     }
 }
 
@@ -795,6 +843,7 @@ void ONNXImporter::populateNet()
 
     if (DNN_DIAGNOSTICS_RUN) {
         CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!");
+        layerHandler->fillRegistry(graph_proto);
     }
 
     for(int li = 0; li < layersSize; li++)
@@ -803,57 +852,92 @@ void ONNXImporter::populateNet()
         handleNode(node_proto);
     }
 
+    // register outputs
+    for (int i = 0; i < graph_proto.output_size(); ++i)
+    {
+        const std::string& output_name = graph_proto.output(i).name();
+        if (output_name.empty())
+        {
+            CV_LOG_ERROR(NULL, "DNN/ONNX: can't register output without name: " << i);
+            continue;
+        }
+        ConstIterLayerId_t layerIt = layer_id.find(output_name);
+        if (layerIt == layer_id.end())
+        {
+            CV_LOG_ERROR(NULL, "DNN/ONNX: can't find layer for output name: '" << output_name << "'. Does model imported properly?");
+            continue;
+        }
+
+        const LayerInfo& li = layerIt->second;
+        int outputId = dstNet.registerOutput(output_name, li.layerId, li.outputId); CV_UNUSED(outputId);
+        // no need to duplicate message from engine: CV_LOG_DEBUG(NULL, "DNN/ONNX: registered output='" << output_name << "' with id=" << outputId);
+    }
+
     CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!"));
 }
 
+std::string ONNXImporter::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto)
+{
+    if (!node_proto.has_domain())
+        return str_domain_ai_onnx;
+    const std::string& domain = node_proto.domain();
+    if (domain.empty())
+        return str_domain_ai_onnx;
+    return domain;
+}
+
+const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx::NodeProto& node_proto)
+{
+    static DispatchMap empty_map;
+    const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
+    auto it = domain_dispatch_map.find(layer_type_domain);
+    if (it == domain_dispatch_map.end())
+    {
+        return empty_map;
+    }
+
+    return it->second;
+}
+
+const std::string& extractNodeName(const opencv_onnx::NodeProto& node_proto)
+{
+    if (node_proto.has_name() && !node_proto.name().empty())
+    {
+        return node_proto.name();
+    }
+    for (int i = 0; i < node_proto.output_size(); ++i)
+    {
+        const std::string& name = node_proto.output(i);
+        // There are two ways to leave an optional input or output unspecified:
+        // the first, available only for trailing inputs and outputs, is to simply not provide that input;
+        // the second method is to use an empty string in place of an input or output name.
+        if (!name.empty())
+        {
+            return name;
+        }
+    }
+    CV_Error(Error::StsAssert, "Couldn't deduce Node name.");
+}
+
 void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
 {
     CV_Assert(node_proto.output_size() >= 1);
-    const std::string& name = node_proto.output(0);
+    const std::string& name = extractNodeName(node_proto);
     const std::string& layer_type = node_proto.op_type();
-    const std::string& layer_type_domain = [&]()
+    const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
+    const auto& dispatch = getDispatchMap(node_proto);
+
+    CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and "
+                                                         << node_proto.output_size() << " outputs: "
+                                                         << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
+                                                         << cv::format(" from %sdomain='", onnx_opset_map.count(layer_type_domain) == 1 ? "" : "undeclared ")
+                                                         << layer_type_domain << "'"
+    );
+
+    if (dispatch.empty())
     {
-        if (!node_proto.has_domain())
-            return str_domain_ai_onnx;
-        const std::string& domain = node_proto.domain();
-        if (domain.empty())
-            return str_domain_ai_onnx;
-        return domain;
-    }();
-    const auto& dispatch = [&]()
-    {
-        if (layer_type_domain != str_domain_ai_onnx)
-        {
-            if (onnx_opset_map.find(layer_type_domain) == onnx_opset_map.end())
-            {
-                CV_LOG_WARNING(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
-                        << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
-                        << " from undeclared domain='" << layer_type_domain << "'"
-                );
-            }
-            else
-            {
-                CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
-                        << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
-                        << " from domain='" << layer_type_domain << "'"
-                );
-            }
-            auto it = domain_dispatch_map.find(layer_type_domain);
-            if (it == domain_dispatch_map.end())
-            {
-                CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'");
-                return DispatchMap();
-            }
-            return it->second;
-        }
-        else
-        {
-            CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
-                    << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
-            );
-            return domain_dispatch_map[str_domain_ai_onnx];
-        }
-    }();
+        CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'");
+    }
 
     LayerParams layerParams;
     try
@@ -1007,6 +1091,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
     const std::string& layer_type = node_proto.op_type();
+    const std::string output_name = node_proto.output(0);
 
     CV_Assert(node_proto.input_size() == 1);
     layerParams.type = "Pooling";
@@ -1127,7 +1212,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node
         layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
 
         node_proto.set_input(0, node_proto.output(0));
-        node_proto.set_output(0, layerParams.name);
+        node_proto.set_output(0, output_name);
     }
     else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
     {
@@ -1160,7 +1245,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node
         layerParams.set("dim", DictValue::arrayInt(targetShape.data(), targetShape.size()));
 
         node_proto.set_input(0, node_proto.output(0));
-        node_proto.set_output(0, layerParams.name);
+        node_proto.set_output(0, output_name);
     }
     addLayer(layerParams, node_proto);
 }
@@ -1188,6 +1273,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP
         CV_Assert(starts.size() == ends.size());
 
         if (axis > 0) {
+            CV_CheckLE(axis, 1024, "Slice layer can't have more than 1024 axes"); // arbitrary limit
             begin.resize(axis, 0);
             end.resize(axis, -1);
         }
@@ -1250,7 +1336,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP
                 {
                     Mat flipped;
                     flip(inp, flipped, 0);
-                    addConstant(layerParams.name, flipped);
+                    addConstant(node_proto.output(0), flipped);
                     return;
                 }
             }
@@ -1270,7 +1356,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP
         inputs.push_back(inp);
         runLayer(layerParams, inputs, sliced);
         CV_Assert(sliced.size() == 1);
-        addConstant(layerParams.name, sliced[0]);
+        addConstant(node_proto.output(0), sliced[0]);
         return;
     }
     addLayer(layerParams, node_proto);
@@ -1335,7 +1421,7 @@ void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodePr
         Mat blob_1 = getBlob(node_proto, 1);
         CV_Assert(blob_0.size == blob_1.size);
         Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1);
-        addConstant(layerParams.name, output);
+        addConstant(node_proto.output(0), output);
         return;
     }
     else if (is_const_0 || is_const_1)
@@ -1451,12 +1537,13 @@ void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::No
 {
     CV_Assert(node_proto.input_size() == 0);
     CV_Assert(layerParams.blobs.size() == 1);
-    addConstant(layerParams.name, layerParams.blobs[0]);
+    addConstant(node_proto.output(0), layerParams.blobs[0]);
 }
 
 void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
+    const std::string output_name = node_proto.output(0);
     LayerParams lstmParams = layerParams;
     lstmParams.name += "/lstm";
 
@@ -1470,6 +1557,13 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr
     const int numDirs = Wx.size[0];  // Is 1 for forward only and 2 for bidirectional LSTM.
     const int numFeatures = Wx.size[2];
 
+    // Following checks are deduced from the IFGO->IGFO loop below
+    // Wx is numDirs X numHidden*3 X numFeatures
+    // Wh is numDirs X numHidden*3 X numHidden
+    CV_CheckLE(numHidden * 3, Wx.size[1], "Wx should have beat  least 3x hidden_size in dimension 1");
+    CV_CheckLE(numHidden * 3, Wh.size[1], "Wh should have be at least 3x hidden_size in dimension 1");
+    CV_CheckLE(numHidden, Wh.size[2], "Wh should have be at least hidden_size in dimension 2");
+
     Mat h0, c0;
     if (!node_proto.input(5).empty()) {
         h0 = getBlob(node_proto, 5);
@@ -1491,6 +1585,9 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr
     Mat bh = b.colRange(b.cols / 2, b.cols);
     b = bx + bh;
 
+    // b is numDirs X numHidden*3
+    CV_CheckLE(numHidden * 3, b.cols, "Bias data should have at least 3x hidden_size columns");
+
     // IFGO->IGFO
     for (int k = 0; k < numDirs; ++k)
     {
@@ -1538,13 +1635,14 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr
     layerParams.type = "Reshape";
     layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size()));
     node_proto.set_input(0, lstmParams.name);  // redirect input to LSTM
-    node_proto.set_output(0, layerParams.name);  // keep origin LSTM's name
+    node_proto.set_output(0, output_name);  // keep origin LSTM's name
     addLayer(layerParams, node_proto);
 }
 
 void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
+    const std::string output_name = node_proto.output(0);
     LayerParams gruParams = layerParams;
     gruParams.name += "/gru";
 
@@ -1578,7 +1676,7 @@ void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodePro
     layerParams.type = "Reshape";
     layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size()));
     node_proto.set_input(0, gruParams.name);  // redirect input to GRU
-    node_proto.set_output(0, layerParams.name);  // keep origin GRU's name
+    node_proto.set_output(0, output_name);  // keep origin GRU's name
     addLayer(layerParams, node_proto);
 }
 
@@ -1852,6 +1950,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro
 {
     opencv_onnx::NodeProto node_proto = node_proto_;
     const std::string& layer_type = node_proto.op_type();
+    const std::string output_name = node_proto.output(0);
     CV_Assert(node_proto.input_size() == 2);
 
     bool isDiv = layer_type == "Div";
@@ -1936,7 +2035,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro
 
         if (inp0.dims == 1 && inp1.dims == 1)
             out.dims = 1;  // to workaround dims == 1
-        addConstant(layerParams.name, out);
+        addConstant(output_name, out);
         return;
     }
     else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
@@ -1952,7 +2051,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro
             opencv_onnx::NodeProto proto;
             proto.add_input(node_proto.input(1));
             proto.add_input(node_proto.input(0));
-            proto.add_output(layerParams.name);
+            proto.add_output(output_name);
             node_proto = proto;
         }
 
@@ -2131,7 +2230,7 @@ void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::N
         std::vector<Mat> inputs(1, getBlob(node_proto, 0)), transposed;
         runLayer(layerParams, inputs, transposed);
         CV_Assert(transposed.size() == 1);
-        addConstant(layerParams.name, transposed[0]);
+        addConstant(node_proto.output(0), transposed[0]);
         return;
     }
     addLayer(layerParams, node_proto);
@@ -2183,7 +2282,7 @@ void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::Nod
         Mat inp = getBlob(node_proto, 0);
         Mat out = inp.reshape(1, outShape);
         out.dims = outShape.size();  // to workaround dims == 1
-        addConstant(layerParams.name, out);
+        addConstant(node_proto.output(0), out);
         return;
     }
     int depth = layerParams.get<int>("depth", CV_32F);
@@ -2212,7 +2311,7 @@ void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::Nod
         }
 
         Mat output = input.reshape(1, 2, out_size);
-        addConstant(layerParams.name, output);
+        addConstant(node_proto.output(0), output);
         return;
     }
     IterShape_t shapeIt = outShapes.find(node_proto.input(0));
@@ -2284,7 +2383,7 @@ void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::N
         }
 
         Mat out = input.reshape(0, dims);
-        addConstant(layerParams.name, out);
+        addConstant(node_proto.output(0), out);
         return;
     }
 
@@ -2323,6 +2422,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
     CV_CheckEQ(node_proto.input_size(), 2, "");
     const std::string& input0 = node_proto.input(0);
     const std::string& input1 = node_proto.input(1);
+    const std::string output_name = node_proto.output(0);
     Mat newShapeMat = getBlob(input1);
     MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
 
@@ -2392,7 +2492,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
         input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
         Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
         output = output.reshape(0, targetShape);
-        addConstant(layerParams.name, output);
+        addConstant(output_name, output);
         return;
     }
 
@@ -2422,7 +2522,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
 
         layerParams.set("axis", broadcast_axes[0]);
         layerParams.type = "Concat";
-        node_proto.set_output(0, layerParams.name);
+        node_proto.set_output(0, output_name);
     }
     else if (broadcast_axes.empty())
     {
@@ -2448,7 +2548,7 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod
         if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
             std::vector<Mat> inputs(1, getBlob(node_proto, 0)), outputs;
             runLayer(layerParams, inputs, outputs);
-            addConstant(layerParams.name, outputs[0]);
+            addConstant(node_proto.output(0), outputs[0]);
             return;
         }
     }
@@ -2462,7 +2562,7 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod
         if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
             Mat input = getBlob(node_proto, 0);
             Mat out = input.reshape(0, dim);
-            addConstant(layerParams.name, out);
+            addConstant(node_proto.output(0), out);
             return;
         }
         replaceLayerParam(layerParams, "shape", "dim");
@@ -2514,11 +2614,9 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP
     if (isDynamicShape)
     {
         CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0)));
-        // FIXIT repair assertion
-        // Disabled to pass face detector tests from #20422
-        // CV_Assert(!isDynamicShape);  // not supported
+        CV_Assert(!isDynamicShape);  // not supported
     }
-    addConstant(layerParams.name, shapeMat);
+    addConstant(node_proto.output(0), shapeMat);
 }
 
 void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
@@ -2542,7 +2640,7 @@ void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodePr
         Mat dst;
         blob.convertTo(dst, type);
         dst.dims = blob.dims;
-        addConstant(layerParams.name, dst);
+        addConstant(node_proto.output(0), dst);
         return;
     }
     else
@@ -2569,7 +2667,7 @@ void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx
     for (int i = 0; i < inpShape.size(); i++)
         CV_CheckGT(inpShape[i], 0, "");
     Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
-    addConstant(layerParams.name, tensor);
+    addConstant(node_proto.output(0), tensor);
 }
 
 void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
@@ -2597,7 +2695,7 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node
         } else {
             out.dims = 1;
         }
-        addConstant(layerParams.name, out);
+        addConstant(node_proto.output(0), out);
         return;
     }
     else
@@ -2691,7 +2789,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node
         runLayer(layerParams, inputs, concatenated);
 
         CV_Assert(concatenated.size() == 1);
-        addConstant(layerParams.name, concatenated[0]);
+        addConstant(node_proto.output(0), concatenated[0]);
         return;
     }
     else
@@ -2740,16 +2838,20 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node
 
     // opset-10: input = [X, scales]
     // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes]
+    // opset-13: may have empty input, [X, "", "", sizes] or [x, "", scales]
     int scalesInputId = node_proto.input_size() == 2 ? 1 : 2;
+    const std::string& scale_name = node_proto.input(scalesInputId);
+    Mat scales;
+    if(!scale_name.empty())
+        scales = getBlob(node_proto, scalesInputId);
 
-    Mat scales = getBlob(node_proto, scalesInputId);
     if (!scales.empty())
     {
         CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected");
         layerParams.set("zoom_factor_y", scales.at<float>(2));
         layerParams.set("zoom_factor_x", scales.at<float>(3));
     }
-    else if (node_proto.input_size() >= 4)  // opset-11
+    else if (node_proto.input_size() >= 4)  // opset-11 [x, roi, scales, sizes] or opset-13: input = [X, "", "", sizes]
     {
         const std::string& inputSizes = node_proto.input(3);
         if (constBlobs.find(inputSizes) != constBlobs.end())
@@ -2871,6 +2973,15 @@ void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::Node
     addLayer(layerParams, node_proto);
 }
 
+void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+    for (int j = 0; j < node_proto.input_size(); j++) {
+        if (layer_id.find(node_proto.input(j)) == layer_id.end())
+            layerParams.blobs.push_back(getBlob(node_proto, j));
+    }
+    addLayer(layerParams, node_proto);
+}
+
 void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 {
     const std::string& name = layerParams.name;
@@ -2886,20 +2997,11 @@ void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx:
         }
     }
 
-    CV_LOG_INFO(NULL, "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
+    CV_LOG_IF_INFO(NULL, !LayerFactory::isLayerRegistered(layer_type), "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
             << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
     );
 
-    if (missingLayerHandler)
-    {
-        missingLayerHandler->addMissing(layerParams.name, layerParams.type);
-    }
-
-    for (int j = 0; j < node_proto.input_size(); j++) {
-        if (layer_id.find(node_proto.input(j)) == layer_id.end())
-            layerParams.blobs.push_back(getBlob(node_proto, j));
-    }
-    addLayer(layerParams, node_proto);
+    parseSimpleLayers(layerParams, node_proto);
 }
 
 void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
@@ -3349,6 +3451,15 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
     dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput;
     dispatch["CumSum"] = &ONNXImporter::parseCumSum;
 
+    std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
+                                          "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
+                                          "Identity", "Log", "Round", "Selu", "Sigmoid", "Sin", "Sinh", "Softmax",
+                                          "Softplus", "Softsign", "Sqrt", "Tan", "ThresholdedRelu"};
+    for (const auto& name : simpleLayers)
+    {
+        dispatch[name] = &ONNXImporter::parseSimpleLayers;
+    }
+
     // ai.onnx: opset 10+
     dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant;
     dispatch["QLinearConv"] = &ONNXImporter::parseQConv;
diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp
index d9b98404c3..2899545c6d 100644
--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@@ -20,52 +20,17 @@ namespace cv { namespace dnn {
 
 #ifdef HAVE_INF_ENGINE
 
-static Backend parseInferenceEngineBackendType(const cv::String& backend)
-{
-    CV_Assert(!backend.empty());
-    if (backend == CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        return DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
-    if (backend == CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API)
-        return DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019;
-    CV_Error(Error::StsBadArg, cv::format("Unknown IE backend: %s", backend.c_str()));
-}
-static const char* dumpInferenceEngineBackendType(Backend backend)
-{
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        return CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        return CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API;
-    CV_Error(Error::StsBadArg, cv::format("Invalid backend ID for IE: %d", backend));
-}
-Backend& getInferenceEngineBackendTypeParam()
-{
-    static Backend param = parseInferenceEngineBackendType(
-        utils::getConfigurationParameterString("OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE",
-#ifdef HAVE_DNN_NGRAPH
-            CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH
-#elif defined(HAVE_DNN_IE_NN_BUILDER_2019)
-            CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API
-#else
-#error "Build configuration error: nGraph or NN Builder API backend should be enabled"
-#endif
-        )
-    );
-    return param;
-}
-
 CV__DNN_INLINE_NS_BEGIN
 
 cv::String getInferenceEngineBackendType()
 {
-    return dumpInferenceEngineBackendType(getInferenceEngineBackendTypeParam());
+    return "NGRAPH";
 }
 cv::String setInferenceEngineBackendType(const cv::String& newBackendType)
 {
-    Backend newBackend = parseInferenceEngineBackendType(newBackendType);
-    Backend& param = getInferenceEngineBackendTypeParam();
-    Backend old = param;
-    param = newBackend;
-    return dumpInferenceEngineBackendType(old);
+    if (newBackendType != "NGRAPH")
+        CV_Error(Error::StsNotImplemented, cv::format("DNN/IE: only NGRAPH backend is supported: %s", newBackendType.c_str()));
+    return newBackendType;
 }
 
 CV__DNN_INLINE_NS_END
@@ -98,508 +63,6 @@ void infEngineBlobsToMats(const std::vector<InferenceEngine::Blob::Ptr>& blobs,
 }
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-
-// For networks with input layer which has an empty name, IE generates a name id[some_number].
-// OpenCV lets users use an empty input name and to prevent unexpected naming,
-// we can use some predefined name.
-static std::string kDefaultInpLayerName = "empty_inp_layer_name";
-static std::string kOpenCVLayersType = "OpenCVLayer";
-
-static std::string shapesToStr(const std::vector<Mat>& mats)
-{
-    std::ostringstream shapes;
-    shapes << mats.size() << " ";
-    for (const Mat& m : mats)
-    {
-        shapes << m.dims << " ";
-        for (int i = 0; i < m.dims; ++i)
-            shapes << m.size[i] << " ";
-    }
-    return shapes.str();
-}
-
-static void strToShapes(const std::string& str, std::vector<std::vector<size_t> >& shapes)
-{
-    std::istringstream ss(str);
-    int num, dims;
-    ss >> num;
-    shapes.resize(num);
-    for (int i = 0; i < num; ++i)
-    {
-        ss >> dims;
-        shapes[i].resize(dims);
-        for (int j = 0; j < dims; ++j)
-            ss >> shapes[i][j];
-    }
-}
-
-class InfEngineCustomLayer : public InferenceEngine::ILayerExecImpl
-{
-public:
-    explicit InfEngineCustomLayer(const InferenceEngine::CNNLayer& layer) : cnnLayer(layer)
-    {
-        std::istringstream iss(layer.GetParamAsString("impl"));
-        size_t ptr;
-        iss >> ptr;
-        cvLayer = (Layer*)ptr;
-
-        std::vector<std::vector<size_t> > shapes;
-        strToShapes(layer.GetParamAsString("internals"), shapes);
-        internals.resize(shapes.size());
-        for (int i = 0; i < shapes.size(); ++i)
-            internals[i].create(std::vector<int>(shapes[i].begin(), shapes[i].end()), CV_32F);
-    }
-
-    virtual InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
-                                                std::vector<InferenceEngine::Blob::Ptr>& outputs,
-                                                InferenceEngine::ResponseDesc *resp) noexcept
-    {
-        std::vector<Mat> inpMats, outMats;
-        infEngineBlobsToMats(inputs, inpMats);
-        infEngineBlobsToMats(outputs, outMats);
-
-        try
-        {
-            cvLayer->forward(inpMats, outMats, internals);
-            return InferenceEngine::StatusCode::OK;
-        }
-        catch (...)
-        {
-            return InferenceEngine::StatusCode::GENERAL_ERROR;
-        }
-    }
-
-    virtual InferenceEngine::StatusCode
-    getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
-                               InferenceEngine::ResponseDesc* resp) noexcept
-    {
-        std::vector<InferenceEngine::DataConfig> inDataConfig;
-        std::vector<InferenceEngine::DataConfig> outDataConfig;
-        for (auto& it : cnnLayer.insData)
-        {
-            InferenceEngine::DataConfig conf;
-            conf.desc = it.lock()->getTensorDesc();
-            inDataConfig.push_back(conf);
-        }
-
-        for (auto& it : cnnLayer.outData)
-        {
-            InferenceEngine::DataConfig conf;
-            conf.desc = it->getTensorDesc();
-            outDataConfig.push_back(conf);
-        }
-
-        InferenceEngine::LayerConfig layerConfig;
-        layerConfig.inConfs = inDataConfig;
-        layerConfig.outConfs = outDataConfig;
-
-        conf.push_back(layerConfig);
-        return InferenceEngine::StatusCode::OK;
-    }
-
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config,
-                                     InferenceEngine::ResponseDesc *resp) noexcept
-    {
-        return InferenceEngine::StatusCode::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer cnnLayer;
-    dnn::Layer* cvLayer;
-    std::vector<Mat> internals;
-};
-
-class InfEngineCustomLayerShapeInfer : public InferenceEngine::IShapeInferImpl
-{
-public:
-      InferenceEngine::StatusCode
-      inferShapes(const std::vector<InferenceEngine::Blob::CPtr>& inBlobs,
-                  const std::map<std::string, std::string>& params,
-                  const std::map<std::string, InferenceEngine::Blob::Ptr>& blobs,
-                  std::vector<InferenceEngine::SizeVector>& outShapes,
-                  InferenceEngine::ResponseDesc* desc) noexcept override
-      {
-          strToShapes(params.at("outputs"), outShapes);
-          return InferenceEngine::StatusCode::OK;
-      }
-};
-
-class InfEngineCustomLayerFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    explicit InfEngineCustomLayerFactory(const InferenceEngine::CNNLayer* layer) : cnnLayer(*layer) {}
-
-    InferenceEngine::StatusCode
-    getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls,
-                       InferenceEngine::ResponseDesc* resp) noexcept override {
-        impls.push_back(std::make_shared<InfEngineCustomLayer>(cnnLayer));
-        return InferenceEngine::StatusCode::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer cnnLayer;
-};
-
-InferenceEngine::StatusCode InfEngineExtension::getFactoryFor(
-        InferenceEngine::ILayerImplFactory*& factory,
-        const InferenceEngine::CNNLayer* cnnLayer,
-        InferenceEngine::ResponseDesc* resp
-) noexcept
-{
-    if (cnnLayer->type != kOpenCVLayersType)
-        return InferenceEngine::StatusCode::NOT_IMPLEMENTED;
-    factory = new InfEngineCustomLayerFactory(cnnLayer);
-    return InferenceEngine::StatusCode::OK;
-}
-
-InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::Builder::Layer& _layer)
-    : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019), layer(_layer) {}
-
-    InfEngineBackendNode::InfEngineBackendNode(Ptr<Layer>& cvLayer_, std::vector<Mat*>& inputs,
-                                               std::vector<Mat>& outputs,
-                                               std::vector<Mat>& internals)
-        : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019), layer(cvLayer_->name),
-          cvLayer(cvLayer_)
-{
-    CV_Assert(!cvLayer->name.empty());
-    layer.setName(cvLayer->name);
-    layer.setType(kOpenCVLayersType);
-    layer.getParameters()["impl"] = (size_t)cvLayer.get();
-    layer.getParameters()["outputs"] = shapesToStr(outputs);
-    layer.getParameters()["internals"] = shapesToStr(internals);
-    layer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
-    layer.setOutputPorts(std::vector<InferenceEngine::Port>(outputs.size()));
-}
-
-static std::vector<Ptr<InfEngineBackendWrapper> >
-infEngineWrappers(const std::vector<Ptr<BackendWrapper> >& ptrs)
-{
-    std::vector<Ptr<InfEngineBackendWrapper> > wrappers(ptrs.size());
-    for (int i = 0; i < ptrs.size(); ++i)
-    {
-        CV_Assert(!ptrs[i].empty());
-        wrappers[i] = ptrs[i].dynamicCast<InfEngineBackendWrapper>();
-        CV_Assert(!wrappers[i].empty());
-    }
-    return wrappers;
-}
-
-InfEngineBackendNet::InfEngineBackendNet() : netBuilder("")
-{
-    hasNetOwner = false;
-    device_name = "CPU";
-}
-
-InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net) : netBuilder(""), cnn(net)
-{
-    hasNetOwner = true;
-    device_name = "CPU";
-}
-
-void InfEngineBackendNet::connect(const std::vector<Ptr<BackendWrapper> >& inputs,
-                                  const std::vector<Ptr<BackendWrapper> >& outputs,
-                                  const std::string& layerName)
-{
-    std::vector<Ptr<InfEngineBackendWrapper> > inpWrappers = infEngineWrappers(inputs);
-    std::map<std::string, int>::iterator it = layers.find(layerName);
-    CV_Assert(it != layers.end());
-
-    const int layerId = it->second;
-    for (size_t i = 0; i < inpWrappers.size(); ++i)
-    {
-        const auto& inp = inpWrappers[i];
-        const std::string& inpName = inp->dataPtr->getName();
-
-        std::string inpLayerName = inpName;
-        size_t inpPortId = inpName.rfind('.');
-        if (inpPortId != std::string::npos)
-        {
-            std::string portIdStr = inpName.substr(inpPortId + 1);
-            if (std::all_of(portIdStr.begin(), portIdStr.end(), ::isdigit))
-            {
-                inpLayerName = inpName.substr(0, inpPortId);
-                inpPortId = atoi(portIdStr.c_str());
-            }
-            else
-                inpPortId = 0;
-        }
-        else
-            inpPortId = 0;
-
-        int inpId;
-        it = layers.find(inpLayerName);
-        if (it == layers.end())
-        {
-            InferenceEngine::Builder::InputLayer inpLayer(!inpLayerName.empty() ? inpLayerName : kDefaultInpLayerName);
-            std::vector<size_t> shape(inp->blob->getTensorDesc().getDims());
-            inpLayer.setPort(InferenceEngine::Port(shape));
-            inpId = netBuilder.addLayer(inpLayer);
-
-            layers.insert({inpName, inpId});
-        }
-        else
-            inpId = it->second;
-
-        netBuilder.connect({(size_t)inpId, inpPortId}, {(size_t)layerId, i});
-        unconnectedPorts.erase({inpId, inpPortId});
-    }
-    CV_Assert(!outputs.empty());
-    for (int i = 0; i < outputs.size(); ++i)
-    {
-        InferenceEngine::DataPtr dataPtr = infEngineDataNode(outputs[i]);
-        std::string outputName = outputs.size() > 1 ? (layerName + "." + std::to_string(i)) : layerName;
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-        dataPtr->name = outputName;
-#else
-        dataPtr->setName(outputName);
-#endif
-    }
-}
-
-void InfEngineBackendNet::init(Target targetId)
-{
-    if (!hasNetOwner)
-    {
-        CV_Assert(!unconnectedPorts.empty());
-        for (const auto& port : unconnectedPorts)
-        {
-            InferenceEngine::Builder::OutputLayer outLayer("myconv1");
-#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
-            // Inference Engine determines network precision by ports.
-            InferenceEngine::Precision p = (targetId == DNN_TARGET_MYRIAD ||
-                                            targetId == DNN_TARGET_HDDL ||
-                                            targetId == DNN_TARGET_OPENCL_FP16) ?
-                                           InferenceEngine::Precision::FP16 :
-                                           InferenceEngine::Precision::FP32;
-            outLayer.setPort(InferenceEngine::Port({}, p));
-#endif
-            netBuilder.addLayer({InferenceEngine::PortInfo(port.first, port.second)}, outLayer);
-        }
-        netBuilder.getContext().addShapeInferImpl(kOpenCVLayersType,
-                            std::make_shared<InfEngineCustomLayerShapeInfer>());
-        cnn = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(netBuilder.build()));
-    }
-
-    switch (targetId)
-    {
-        case DNN_TARGET_CPU:
-            device_name = "CPU";
-            break;
-        case DNN_TARGET_OPENCL:
-        case DNN_TARGET_OPENCL_FP16:
-            device_name = "GPU";
-            break;
-        case DNN_TARGET_MYRIAD:
-            device_name = "MYRIAD";
-            break;
-        case DNN_TARGET_HDDL:
-            device_name = "HDDL";
-            break;
-        case DNN_TARGET_FPGA:
-            device_name = "FPGA";
-            break;
-        default:
-            CV_Error(Error::StsNotImplemented, "Unknown target");
-    };
-
-    for (const auto& name : requestedOutputs)
-    {
-        cnn.addOutput(name);
-    }
-
-    for (const auto& it : cnn.getInputsInfo())
-    {
-        const std::string& name = it.first;
-        auto blobIt = allBlobs.find(name);
-        CV_Assert(blobIt != allBlobs.end());
-        it.second->setPrecision(blobIt->second->getTensorDesc().getPrecision());
-    }
-    for (const auto& it : cnn.getOutputsInfo())
-    {
-        const std::string& name = it.first;
-        auto blobIt = allBlobs.find(name);
-        CV_Assert(blobIt != allBlobs.end());
-        it.second->setPrecision(blobIt->second->getTensorDesc().getPrecision());  // Should be always FP32
-    }
-
-    initPlugin(cnn);
-}
-
-void InfEngineBackendNet::addLayer(InferenceEngine::Builder::Layer& layer)
-{
-#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
-    // Add weights to network and connect them after input blobs.
-    std::map<std::string, InferenceEngine::Parameter>& params = layer.getParameters();
-    std::vector<int> blobsIds;
-    std::vector<int> portIds;
-    for (const std::string& name : {"weights", "biases"})
-    {
-        bool asInput = false;
-        int portId = 0;
-        for (int i = 0; i < layer.getInputPorts().size(); ++i)
-        {
-            const auto& port = layer.getInputPorts()[i];
-            auto it = port.getParameters().find("type");
-            if (it != port.getParameters().end() && it->second == name)
-            {
-                portId = i;
-                asInput = true;
-                break;
-            }
-        }
-
-        if (!asInput)
-            continue;
-
-        auto it = params.find(name);
-        if (it != params.end())
-        {
-            InferenceEngine::Blob::Ptr blob = it->second.as<InferenceEngine::Blob::Ptr>();
-            params.erase(it);
-            int blobId = netBuilder.addLayer(InferenceEngine::Builder::ConstLayer(name).setData(blob));
-            blobsIds.push_back(blobId);
-            portIds.push_back(portId);
-        }
-    }
-#endif
-
-    int id = netBuilder.addLayer(layer);
-    const std::string& layerName = layer.getName();
-
-    CV_Assert(layers.insert({layerName, id}).second);
-    for (int i = 0; i < layer.getOutputPorts().size(); ++i)
-        unconnectedPorts.insert({id, i});
-
-#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
-    // By default, all the weights are connected to last ports ids.
-    for (int i = 0; i < blobsIds.size(); ++i)
-    {
-        netBuilder.connect((size_t)blobsIds[i], {(size_t)id, (size_t)portIds[i]});
-    }
-#endif
-}
-
-void InfEngineBackendNet::addOutput(const std::string& name)
-{
-    requestedOutputs.push_back(name);
-}
-
-static InferenceEngine::Layout estimateLayout(const Mat& m)
-{
-    if (m.dims == 4)
-        return InferenceEngine::Layout::NCHW;
-    else if (m.dims == 2)
-        return InferenceEngine::Layout::NC;
-    else
-        return InferenceEngine::Layout::ANY;
-}
-
-static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std::string& name = "")
-{
-    std::vector<size_t> shape = getShape<size_t>(m);
-    if (m.type() == CV_32F)
-        return InferenceEngine::DataPtr(new InferenceEngine::Data(name,
-               {InferenceEngine::Precision::FP32, shape, estimateLayout(m)}));
-    else if (m.type() == CV_8U)
-        return InferenceEngine::DataPtr(new InferenceEngine::Data(name,
-               {InferenceEngine::Precision::U8, shape, estimateLayout(m)}));
-    else
-        CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type()));
-}
-
-InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape,
-                                               InferenceEngine::Layout layout)
-{
-    if (m.type() == CV_32F)
-        return InferenceEngine::make_shared_blob<float>(
-               {InferenceEngine::Precision::FP32, shape, layout}, (float*)m.data);
-    else if (m.type() == CV_8U)
-        return InferenceEngine::make_shared_blob<uint8_t>(
-               {InferenceEngine::Precision::U8, shape, layout}, (uint8_t*)m.data);
-    else
-        CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type()));
-}
-
-InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout)
-{
-    std::vector<size_t> shape = getShape<size_t>(m);
-    return wrapToInfEngineBlob(m, shape, layout);
-}
-
-InferenceEngine::Blob::Ptr cloneBlob(const InferenceEngine::Blob::Ptr& blob)
-{
-    InferenceEngine::Blob::Ptr copy;
-    auto description = blob->getTensorDesc();
-    InferenceEngine::Precision precision = description.getPrecision();
-    if (precision == InferenceEngine::Precision::FP32)
-    {
-        copy = InferenceEngine::make_shared_blob<float>(description);
-    }
-    else if (precision == InferenceEngine::Precision::U8)
-    {
-        copy = InferenceEngine::make_shared_blob<uint8_t>(description);
-    }
-    else
-        CV_Error(Error::StsNotImplemented, "Unsupported blob precision");
-    copy->allocate();
-    return copy;
-}
-
-InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr)
-{
-    CV_Assert(!ptr.empty());
-    Ptr<InfEngineBackendWrapper> p = ptr.dynamicCast<InfEngineBackendWrapper>();
-    CV_Assert(!p.empty());
-    return p->dataPtr;
-}
-
-InfEngineBackendWrapper::InfEngineBackendWrapper(int targetId, const cv::Mat& m)
-    : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, targetId)
-{
-    dataPtr = wrapToInfEngineDataNode(m);
-    blob = wrapToInfEngineBlob(m, estimateLayout(m));
-}
-
-InfEngineBackendWrapper::InfEngineBackendWrapper(Ptr<BackendWrapper> wrapper)
-    : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, wrapper->targetId)
-{
-    Ptr<InfEngineBackendWrapper> ieWrapper = wrapper.dynamicCast<InfEngineBackendWrapper>();
-    CV_Assert(!ieWrapper.empty());
-    InferenceEngine::DataPtr srcData = ieWrapper->dataPtr;
-
-    dataPtr = InferenceEngine::DataPtr(new InferenceEngine::Data(srcData->getName(), srcData->getTensorDesc()));
-    blob = ieWrapper->blob;
-}
-
-Ptr<BackendWrapper> InfEngineBackendWrapper::create(Ptr<BackendWrapper> wrapper)
-{
-    return Ptr<BackendWrapper>(new InfEngineBackendWrapper(wrapper));
-}
-
-InfEngineBackendWrapper::~InfEngineBackendWrapper()
-{
-
-}
-
-void InfEngineBackendWrapper::copyToHost()
-{
-
-}
-
-void InfEngineBackendWrapper::setHostDirty()
-{
-
-}
-
-#endif // HAVE_DNN_IE_NN_BUILDER_2019
-
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-static std::map<std::string, InferenceEngine::InferenceEnginePluginPtr>& getSharedPlugins()
-{
-    static std::map<std::string, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
-    return sharedPlugins;
-}
-#else
 static bool init_IE_plugins()
 {
     // load and hold IE plugins
@@ -653,7 +116,7 @@ InferenceEngine::Core& getCore(const std::string& id)
             : create_IE_Core_instance(id);
     return core;
 }
-#endif
+
 
 static bool detectArmPlugin_()
 {
@@ -672,10 +135,10 @@ static bool detectArmPlugin_()
 }
 
 #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)
-static bool detectMyriadX_(std::string device)
+static bool detectMyriadX_(const std::string& device)
 {
     AutoLock lock(getInitializationMutex());
-#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3)
+
     // Lightweight detection
     InferenceEngine::Core& ie = getCore(device);
     const std::vector<std::string> devices = ie.GetAvailableDevices();
@@ -689,481 +152,22 @@ static bool detectMyriadX_(std::string device)
         }
     }
     return false;
-#else
-    InferenceEngine::Builder::Network builder("");
-    InferenceEngine::idx_t inpId = builder.addLayer(
-                                   InferenceEngine::Builder::InputLayer().setPort(InferenceEngine::Port({1})));
-
-#if INF_ENGINE_RELEASE <= 2018050000
-    InferenceEngine::idx_t clampId;
-    {
-        InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ClampLayer();
-        auto& blobs = l.getConstantData();
-        auto blob = InferenceEngine::make_shared_blob<int16_t>(
-                        InferenceEngine::Precision::FP16,
-                        InferenceEngine::Layout::C, {1});
-        blob->allocate();
-        blobs[""] = blob;
-        clampId = builder.addLayer({inpId}, l);
-    }
-    builder.addLayer({InferenceEngine::PortInfo(clampId)}, InferenceEngine::Builder::OutputLayer());
-#else
-
-    InferenceEngine::idx_t clampId = builder.addLayer({inpId}, InferenceEngine::Builder::ClampLayer());
-    builder.addLayer({InferenceEngine::PortInfo(clampId)},
-                      InferenceEngine::Builder::OutputLayer().setPort(InferenceEngine::Port({},
-                      InferenceEngine::Precision::FP16)));
-#endif
-
-    InferenceEngine::CNNNetwork cnn = InferenceEngine::CNNNetwork(
-                                      InferenceEngine::Builder::convertToICNNNetwork(builder.build()));
-
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-    InferenceEngine::InferenceEnginePluginPtr enginePtr;
-    {
-        auto& sharedPlugins = getSharedPlugins();
-        auto pluginIt = sharedPlugins.find(device);
-        if (pluginIt != sharedPlugins.end()) {
-            enginePtr = pluginIt->second;
-        } else {
-            auto dispatcher = InferenceEngine::PluginDispatcher({""});
-            enginePtr = dispatcher.getPluginByDevice(device);
-            sharedPlugins[device] = enginePtr;
-        }
-    }
-    auto plugin = InferenceEngine::InferencePlugin(enginePtr);
-    try
-    {
-        auto netExec = plugin.LoadNetwork(cnn, {{"VPU_PLATFORM", "VPU_2480"}});
-#else
-    try
-    {
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
-        auto netExec = getCore(device).LoadNetwork(cnn, device, {{"VPU_PLATFORM", "VPU_2480"}});
-#else
-        auto netExec = getCore(device).LoadNetwork(cnn, device, {{"VPU_MYRIAD_PLATFORM", "VPU_MYRIAD_2480"}});
-#endif
-#endif
-        auto infRequest = netExec.CreateInferRequest();
-    } catch(...) {
-        return false;
-    }
-    return true;
-#endif
 }
 #endif  // !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)
 
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-
-void InfEngineBackendNet::initPlugin(InferenceEngine::CNNNetwork& net)
-{
-    CV_Assert(!isInitialized());
-
-    try
-    {
-        AutoLock lock(getInitializationMutex());
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-        auto& sharedPlugins = getSharedPlugins();
-        auto pluginIt = sharedPlugins.find(device_name);
-        if (pluginIt != sharedPlugins.end())
-        {
-            enginePtr = pluginIt->second;
-        }
-        else
-#else
-        InferenceEngine::Core& ie = getCore(device_name);
-#endif
-        {
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-            auto dispatcher = InferenceEngine::PluginDispatcher({""});
-            if (device_name == "FPGA")
-                enginePtr = dispatcher.getPluginByDevice("HETERO:FPGA,CPU");
-            else
-                enginePtr = dispatcher.getPluginByDevice(device_name);
-            sharedPlugins[device_name] = enginePtr;
-#else
-            isInit = true;
-#endif
-            std::vector<std::string> candidates;
-            std::string param_pluginPath = utils::getConfigurationParameterString("OPENCV_DNN_IE_EXTRA_PLUGIN_PATH", "");
-            if (!param_pluginPath.empty())
-            {
-                candidates.push_back(param_pluginPath);
-            }
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
-            if (device_name == "CPU" || device_name == "FPGA")
-            {
-                std::string suffixes[] = {"_avx2", "_sse4", ""};
-                bool haveFeature[] = {
-                    checkHardwareSupport(CPU_AVX2),
-                    checkHardwareSupport(CPU_SSE4_2),
-                    true
-                };
-                for (int i = 0; i < 3; ++i)
-                {
-                    if (!haveFeature[i])
-                        continue;
-#ifdef _WIN32
-                    candidates.push_back("cpu_extension" + suffixes[i] + ".dll");
-#elif defined(__APPLE__)
-                    candidates.push_back("libcpu_extension" + suffixes[i] + ".so");  // built as loadable module
-                    candidates.push_back("libcpu_extension" + suffixes[i] + ".dylib");  // built as shared library
-#else
-                    candidates.push_back("libcpu_extension" + suffixes[i] + ".so");
-#endif  // _WIN32
-                }
-            }
-#endif
-            bool found = false;
-            for (size_t i = 0; i != candidates.size(); ++i)
-            {
-                const std::string& libName = candidates[i];
-                try
-                {
-                    InferenceEngine::IExtensionPtr extension =
-                        InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(libName);
-
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-                    enginePtr->AddExtension(extension, 0);
-#else
-                    ie.AddExtension(extension, "CPU");
-#endif
-                    CV_LOG_INFO(NULL, "DNN-IE: Loaded extension plugin: " << libName);
-                    found = true;
-                    break;
-                }
-                catch(...) {}
-            }
-            if (!found && !candidates.empty())
-            {
-                CV_LOG_WARNING(NULL, "DNN-IE: Can't load extension plugin (extra layers for some networks). Specify path via OPENCV_DNN_IE_EXTRA_PLUGIN_PATH parameter");
-            }
-            // Some of networks can work without a library of extra layers.
-#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R1)
-            // OpenCV fallbacks as extensions.
-            try
-            {
-                ie.AddExtension(std::make_shared<InfEngineExtension>(), "CPU");
-            }
-            catch(const std::exception& e)
-            {
-                CV_LOG_INFO(NULL, "DNN-IE: Can't register OpenCV custom layers extension: " << e.what());
-            }
-#endif
-            // Limit the number of CPU threads.
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-#ifndef _WIN32
-            enginePtr->SetConfig({{
-                InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, format("%d", getNumThreads()),
-            }}, 0);
-#endif  // _WIN32
-#else
-            if (device_name == "CPU")
-                ie.SetConfig({{
-                    InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, format("%d", getNumThreads()),
-                }}, device_name);
-#endif
-        }
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-        plugin = InferenceEngine::InferencePlugin(enginePtr);
-        netExec = plugin.LoadNetwork(net, {});
-#else
-        bool isHetero = false;
-        if (device_name != "CPU")
-        {
-            isHetero = device_name == "FPGA";
-            for (auto& layer : net)
-            {
-                if (layer->type == kOpenCVLayersType)
-                {
-                    isHetero = true;
-#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2019R3)
-                    // Not sure about lower versions but in 2019R3 we do not need this
-                    layer->affinity = "CPU";
-                }
-                else
-                {
-                    layer->affinity = device_name;
-#endif
-                }
-            }
-        }
-        if (isHetero)
-            netExec = ie.LoadNetwork(net, "HETERO:" + device_name + ",CPU");
-        else
-            netExec = ie.LoadNetwork(net, device_name);
-#endif
-    }
-    catch (const std::exception& ex)
-    {
-        CV_Error(Error::StsError, format("Failed to initialize Inference Engine backend (device = %s): %s", device_name.c_str(), ex.what()));
-    }
-}
-
-bool InfEngineBackendNet::isInitialized()
-{
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-    return (bool)enginePtr;
-#else
-    return isInit;
-#endif
-}
-
-void InfEngineBackendNet::reset()
-{
-    allBlobs.clear();
-    infRequests.clear();
-    isInit = false;
-}
-
-void InfEngineBackendNet::addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs)
-{
-    auto wrappers = infEngineWrappers(ptrs);
-    for (const auto& wrapper : wrappers)
-    {
-        std::string name = wrapper->dataPtr->getName();
-        name = name.empty() ? kDefaultInpLayerName : name;
-        allBlobs.insert({name, wrapper->blob});
-    }
-}
-
-void InfEngineBackendNet::InfEngineReqWrapper::makePromises(const std::vector<Ptr<BackendWrapper> >& outsWrappers)
-{
-    auto outs = infEngineWrappers(outsWrappers);
-    outProms.clear();
-    outProms.resize(outs.size());
-    outsNames.resize(outs.size());
-    for (int i = 0; i < outs.size(); ++i)
-    {
-        outs[i]->futureMat = outProms[i].getArrayResult();
-        outsNames[i] = outs[i]->dataPtr->getName();
-    }
-}
-
-void InfEngineBackendNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                                  bool isAsync)
-{
-    CV_LOG_DEBUG(NULL, "InfEngineBackendNet::forward(" << (isAsync ? "async" : "sync") << ")");
-    // Look for finished requests.
-    Ptr<InfEngineReqWrapper> reqWrapper;
-    for (auto& wrapper : infRequests)
-    {
-        if (wrapper->isReady)
-        {
-            reqWrapper = wrapper;
-            break;
-        }
-    }
-    if (reqWrapper.empty())
-    {
-        reqWrapper = Ptr<InfEngineReqWrapper>(new InfEngineReqWrapper());
-        try
-        {
-            reqWrapper->req = netExec.CreateInferRequest();
-        }
-        catch (const std::exception& ex)
-        {
-            CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what()));
-        }
-        infRequests.push_back(reqWrapper);
-
-        InferenceEngine::BlobMap inpBlobs, outBlobs;
-        for (const auto& it : cnn.getInputsInfo())
-        {
-            const std::string& name = it.first;
-            auto blobIt = allBlobs.find(name);
-            CV_Assert(blobIt != allBlobs.end());
-            inpBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second;
-        }
-        for (const auto& it : cnn.getOutputsInfo())
-        {
-            const std::string& name = it.first;
-            auto blobIt = allBlobs.find(name);
-            CV_Assert(blobIt != allBlobs.end());
-            outBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second;
-        }
-        reqWrapper->req.SetInput(inpBlobs);
-        reqWrapper->req.SetOutput(outBlobs);
-
-        InferenceEngine::IInferRequest::Ptr infRequestPtr = reqWrapper->req;
-        infRequestPtr->SetUserData(reqWrapper.get(), 0);
-
-        infRequestPtr->SetCompletionCallback(
-            [](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode status)
-            {
-                CV_LOG_DEBUG(NULL, "DNN(IE): completionCallback(" << (int)status << ")");
-
-                InfEngineReqWrapper* wrapper;
-                request->GetUserData((void**)&wrapper, 0);
-                CV_Assert(wrapper && "Internal error");
-
-                size_t processedOutputs = 0;
-                try
-                {
-                    for (; processedOutputs < wrapper->outProms.size(); ++processedOutputs)
-                    {
-                        const std::string& name = wrapper->outsNames[processedOutputs];
-                        Mat m = infEngineBlobToMat(wrapper->req.GetBlob(name));
-
-                        try
-                        {
-                            CV_Assert(status == InferenceEngine::StatusCode::OK);
-                            wrapper->outProms[processedOutputs].setValue(m.clone());
-                        }
-                        catch (...)
-                        {
-                            try {
-                                wrapper->outProms[processedOutputs].setException(std::current_exception());
-                            } catch(...) {
-                                CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation");
-                            }
-                        }
-                    }
-                }
-                catch (...)
-                {
-                    std::exception_ptr e = std::current_exception();
-                    for (; processedOutputs < wrapper->outProms.size(); ++processedOutputs)
-                    {
-                        try {
-                            wrapper->outProms[processedOutputs].setException(e);
-                        } catch(...) {
-                            CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation");
-                        }
-                    }
-                }
-                wrapper->isReady = true;
-            }
-        );
-    }
-    if (isAsync)
-    {
-        // Copy actual data to infer request's input blobs.
-        for (const auto& it : cnn.getInputsInfo())
-        {
-            const std::string& name = it.first;
-            auto blobIt = allBlobs.find(name);
-            Mat srcMat = infEngineBlobToMat(blobIt->second);
-            Mat dstMat = infEngineBlobToMat(reqWrapper->req.GetBlob(name));
-            srcMat.copyTo(dstMat);
-        }
-
-        // Set promises to output blobs wrappers.
-        reqWrapper->makePromises(outBlobsWrappers);
-
-        reqWrapper->isReady = false;
-        reqWrapper->req.StartAsync();
-    }
-    else
-    {
-        reqWrapper->req.Infer();
-    }
-}
-
-bool InfEngineBackendLayer::getMemoryShapes(const std::vector<MatShape> &inputs,
-                                            const int requiredOutputs,
-                                            std::vector<MatShape> &outputs,
-                                            std::vector<MatShape> &internals) const
-{
-    InferenceEngine::ICNNNetwork::InputShapes inShapes = t_net.getInputShapes();
-    InferenceEngine::ICNNNetwork::InputShapes::iterator itr;
-    bool equal_flag = true;
-    size_t i = 0;
-    for (itr = inShapes.begin(); itr != inShapes.end(); ++itr)
-    {
-        InferenceEngine::SizeVector currentInShape(inputs[i].begin(), inputs[i].end());
-        if (itr->second != currentInShape)
-        {
-            itr->second = currentInShape;
-            equal_flag = false;
-        }
-        i++;
-    }
-
-    if (!equal_flag)
-    {
-        InferenceEngine::CNNNetwork curr_t_net(t_net);
-        curr_t_net.reshape(inShapes);
-    }
-    std::vector<size_t> dims = t_net.getOutputsInfo()[name]->getDims();
-    outputs.push_back(MatShape(dims.begin(), dims.end()));
-    return false;
-}
-
-bool InfEngineBackendLayer::supportBackend(int backendId)
-{
-    CV_LOG_DEBUG(NULL, "InfEngineBackendLayer::supportBackend(" << backendId << ")");
-    return backendId == DNN_BACKEND_DEFAULT ||
-           (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019);
-}
-
-void InfEngineBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs,
-                                    OutputArrayOfArrays internals)
-{
-    CV_Error(Error::StsInternal, "Choose Inference Engine as a preferable backend.");
-}
-
-InferenceEngine::Blob::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob)
-{
-    auto halfs = InferenceEngine::make_shared_blob<int16_t>({
-                     InferenceEngine::Precision::FP16, blob->getTensorDesc().getDims(),
-                     blob->getTensorDesc().getLayout()
-                 });
-    halfs->allocate();
-    Mat floatsData(1, blob->size(), CV_32F, blob->buffer());
-    Mat halfsData(1, blob->size(), CV_16SC1, halfs->buffer());
-    convertFp16(floatsData, halfsData);
-    return halfs;
-}
-
-void addConstantData(const std::string& name, InferenceEngine::Blob::Ptr data,
-                     InferenceEngine::Builder::Layer& l)
-{
-#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
-    l.getParameters()[name] = data;
-#else
-    l.addConstantData(name, data);
-#endif
-}
-
-#endif // HAVE_DNN_IE_NN_BUILDER_2019
-
 #endif  // HAVE_INF_ENGINE
 
-bool haveInfEngine()
-{
-#ifdef HAVE_INF_ENGINE
-    return true;
-#else
-    return false;
-#endif  // HAVE_INF_ENGINE
-}
-
-void forwardInfEngine(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                      Ptr<BackendNode>& node, bool isAsync)
-{
-    CV_Assert(haveInfEngine());
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-    CV_Assert(!node.empty());
-    Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
-    CV_Assert(!ieNode.empty());
-    ieNode->net->forward(outBlobsWrappers, isAsync);
-#else
-    CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support");
-#endif  // HAVE_INF_ENGINE
-}
 
 CV__DNN_INLINE_NS_BEGIN
 
 void resetMyriadDevice()
 {
 #ifdef HAVE_INF_ENGINE
+    CV_LOG_INFO(NULL, "DNN: Unregistering both 'MYRIAD' and 'HETERO:MYRIAD,CPU' plugins");
+
     AutoLock lock(getInitializationMutex());
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-    getSharedPlugins().erase("MYRIAD");
-#else
-    // Unregister both "MYRIAD" and "HETERO:MYRIAD,CPU" plugins
+
     InferenceEngine::Core& ie = getCore("MYRIAD");
     try
     {
@@ -1171,18 +175,16 @@ void resetMyriadDevice()
         ie.UnregisterPlugin("HETERO");
     }
     catch (...) {}
-#endif
 #endif  // HAVE_INF_ENGINE
 }
 
 void releaseHDDLPlugin()
 {
 #ifdef HAVE_INF_ENGINE
+    CV_LOG_INFO(NULL, "DNN: Unregistering both 'HDDL' and 'HETERO:HDDL,CPU' plugins");
+
     AutoLock lock(getInitializationMutex());
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1)
-    getSharedPlugins().erase("HDDL");
-#else
-    // Unregister both "HDDL" and "HETERO:HDDL,CPU" plugins
+
     InferenceEngine::Core& ie = getCore("HDDL");
     try
     {
@@ -1190,7 +192,6 @@ void releaseHDDLPlugin()
         ie.UnregisterPlugin("HETERO");
     }
     catch (...) {}
-#endif
 #endif  // HAVE_INF_ENGINE
 }
 
diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp
index ab2f161eaf..ed1323d7dd 100644
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@@ -48,37 +48,16 @@
 #pragma GCC diagnostic ignored "-Wsuggest-override"
 #endif
 
-#if defined(HAVE_DNN_IE_NN_BUILDER_2019) || INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2020_4)
-//#define INFERENCE_ENGINE_DEPRECATED  // turn off deprecation warnings from IE
-//there is no way to suppress warnings from IE only at this moment, so we are forced to suppress warnings globally
-#if defined(__GNUC__)
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-#ifdef _MSC_VER
-#pragma warning(disable: 4996)  // was declared deprecated
-#endif
-#endif
-
-#if defined(__GNUC__) && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_1)
-#pragma GCC visibility push(default)
-#endif
-
 #include <inference_engine.hpp>
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-#include <ie_builders.hpp>
-#endif
-
-#if defined(__GNUC__) && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_1)
-#pragma GCC visibility pop
-#endif
-
 #if defined(__GNUC__) && __GNUC__ >= 5
 //#pragma GCC diagnostic pop
 #endif
 
 #endif  // HAVE_INF_ENGINE
 
+#define CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 do { CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support (legacy API is not supported anymore)"); } while (0)
+
 namespace cv { namespace dnn {
 
 #ifdef HAVE_INF_ENGINE
@@ -90,167 +69,6 @@ Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob);
 void infEngineBlobsToMats(const std::vector<InferenceEngine::Blob::Ptr>& blobs,
                           std::vector<Mat>& mats);
 
-#ifdef HAVE_DNN_IE_NN_BUILDER_2019
-
-class InfEngineBackendNet
-{
-public:
-    InfEngineBackendNet();
-
-    InfEngineBackendNet(InferenceEngine::CNNNetwork& net);
-
-    void addLayer(InferenceEngine::Builder::Layer& layer);
-
-    void addOutput(const std::string& name);
-
-    void connect(const std::vector<Ptr<BackendWrapper> >& inputs,
-                 const std::vector<Ptr<BackendWrapper> >& outputs,
-                 const std::string& layerName);
-
-    bool isInitialized();
-
-    void init(Target targetId);
-
-    void forward(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                 bool isAsync);
-
-    void initPlugin(InferenceEngine::CNNNetwork& net);
-
-    void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs);
-
-    void reset();
-
-private:
-    InferenceEngine::Builder::Network netBuilder;
-
-    InferenceEngine::ExecutableNetwork netExec;
-    InferenceEngine::BlobMap allBlobs;
-    std::string device_name;
-#if INF_ENGINE_VER_MAJOR_LE(2019010000)
-    InferenceEngine::InferenceEnginePluginPtr enginePtr;
-    InferenceEngine::InferencePlugin plugin;
-#else
-    bool isInit = false;
-#endif
-
-    struct InfEngineReqWrapper
-    {
-        InfEngineReqWrapper() : isReady(true) {}
-
-        void makePromises(const std::vector<Ptr<BackendWrapper> >& outs);
-
-        InferenceEngine::InferRequest req;
-        std::vector<cv::AsyncPromise> outProms;
-        std::vector<std::string> outsNames;
-        bool isReady;
-    };
-
-    std::vector<Ptr<InfEngineReqWrapper> > infRequests;
-
-    InferenceEngine::CNNNetwork cnn;
-    bool hasNetOwner;
-
-    std::map<std::string, int> layers;
-    std::vector<std::string> requestedOutputs;
-
-    std::set<std::pair<int, int> > unconnectedPorts;
-};
-
-class InfEngineBackendNode : public BackendNode
-{
-public:
-    InfEngineBackendNode(const InferenceEngine::Builder::Layer& layer);
-
-    InfEngineBackendNode(Ptr<Layer>& layer, std::vector<Mat*>& inputs,
-                         std::vector<Mat>& outputs, std::vector<Mat>& internals);
-
-    void connect(std::vector<Ptr<BackendWrapper> >& inputs,
-                 std::vector<Ptr<BackendWrapper> >& outputs);
-
-    // Inference Engine network object that allows to obtain the outputs of this layer.
-    InferenceEngine::Builder::Layer layer;
-    Ptr<InfEngineBackendNet> net;
-    // CPU fallback in case of unsupported Inference Engine layer.
-    Ptr<dnn::Layer> cvLayer;
-};
-
-class InfEngineBackendWrapper : public BackendWrapper
-{
-public:
-    InfEngineBackendWrapper(int targetId, const Mat& m);
-
-    InfEngineBackendWrapper(Ptr<BackendWrapper> wrapper);
-
-    ~InfEngineBackendWrapper();
-
-    static Ptr<BackendWrapper> create(Ptr<BackendWrapper> wrapper);
-
-    virtual void copyToHost() CV_OVERRIDE;
-
-    virtual void setHostDirty() CV_OVERRIDE;
-
-    InferenceEngine::DataPtr dataPtr;
-    InferenceEngine::Blob::Ptr blob;
-    AsyncArray futureMat;
-};
-
-InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
-
-InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape, InferenceEngine::Layout layout);
-
-InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr);
-
-// Convert Inference Engine blob with FP32 precision to FP16 precision.
-// Allocates memory for a new blob.
-InferenceEngine::Blob::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob);
-
-void addConstantData(const std::string& name, InferenceEngine::Blob::Ptr data, InferenceEngine::Builder::Layer& l);
-
-// This is a fake class to run networks from Model Optimizer. Objects of that
-// class simulate responses of layers are imported by OpenCV and supported by
-// Inference Engine. The main difference is that they do not perform forward pass.
-class InfEngineBackendLayer : public Layer
-{
-public:
-    InfEngineBackendLayer(const InferenceEngine::CNNNetwork &t_net_) : t_net(t_net_) {};
-
-    virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
-                                 const int requiredOutputs,
-                                 std::vector<MatShape> &outputs,
-                                 std::vector<MatShape> &internals) const CV_OVERRIDE;
-
-    virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs,
-                         OutputArrayOfArrays internals) CV_OVERRIDE;
-
-    virtual bool supportBackend(int backendId) CV_OVERRIDE;
-
-private:
-    InferenceEngine::CNNNetwork t_net;
-};
-
-
-class InfEngineExtension : public InferenceEngine::IExtension
-{
-public:
-#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
-    virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {}
-#endif
-    virtual void Unload() noexcept {}
-    virtual void Release() noexcept {}
-    virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {}
-
-    virtual InferenceEngine::StatusCode getPrimitiveTypes(char**&, unsigned int&,
-                                                          InferenceEngine::ResponseDesc*) noexcept
-    {
-        return InferenceEngine::StatusCode::OK;
-    }
-
-    InferenceEngine::StatusCode getFactoryFor(InferenceEngine::ILayerImplFactory*& factory,
-                                              const InferenceEngine::CNNLayer* cnnLayer,
-                                              InferenceEngine::ResponseDesc* resp) noexcept;
-};
-
-#endif  // HAVE_DNN_IE_NN_BUILDER_2019
 
 
 CV__DNN_INLINE_NS_BEGIN
@@ -273,14 +91,8 @@ static inline std::vector<T> getShape(const Mat& mat)
     return result;
 }
 
-
 #endif  // HAVE_INF_ENGINE
 
-bool haveInfEngine();
-
-void forwardInfEngine(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
-                      Ptr<BackendNode>& node, bool isAsync);
-
 }}  // namespace dnn, namespace cv
 
 #endif  // __OPENCV_DNN_OP_INF_ENGINE_HPP__
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index efaedfaab1..763abf3b4d 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -11,6 +11,8 @@ Implementation of Tensorflow models parser
 
 #include "../precomp.hpp"
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #include <opencv2/dnn/shape_utils.hpp>
 #undef CV_LOG_STRIP_LEVEL
@@ -513,6 +515,7 @@ class TFLayerHandler;
 
 class TFImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 public:
     TFImporter(Net& net, const char *model, const char *config = NULL);
     TFImporter(Net& net, const char *dataModel, size_t lenModel,
@@ -3090,10 +3093,8 @@ void TFImporter::populateNet()
     {
         const tensorflow::NodeDef& layer = net.node(li);
 
-        const std::string name = layer.name();
-        const std::string type = layer.op();
-        const int ninputs = layer.input_size();
-        CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs");
+        CV_LOG_DEBUG(NULL, "DNN/TF: processing node (" << li << "/" << layersSize << ") with " << layer.input_size() << " inputs: "
+                                                           << cv::format("[%s]:(%s)", layer.op().c_str(), layer.name().c_str()));
 
         parseNode(layer);
     }
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
index 1e7f07a478..57a624d541 100644
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@@ -40,6 +40,9 @@
 //M*/
 
 #include "../precomp.hpp"
+
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <limits>
 #include <set>
 #include <map>
@@ -106,6 +109,8 @@ static inline bool endsWith(const String &str, const char *substr)
 
 struct TorchImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     typedef std::map<String, std::pair<int, Mat> > TensorsMap;
     Net net;
 
diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp
index 9de1ac6821..747dc02ce6 100644
--- a/modules/dnn/test/test_common.impl.hpp
+++ b/modules/dnn/test/test_common.impl.hpp
@@ -337,16 +337,6 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
     std::vector< tuple<Backend, Target> > targets;
     std::vector< Target > available;
 
-    {
-        available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019);
-        for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i)
-        {
-            if ((*i == DNN_TARGET_MYRIAD || *i == DNN_TARGET_HDDL) && !withVPU)
-                continue;
-            targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, *i));
-        }
-    }
-
     {
         available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
         for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i)
diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp
index 2846f9ae76..0fe19db5e9 100644
--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@@ -371,17 +371,17 @@ TEST_P(DNNTestOpenVINO, models)
             || modelName == "person-vehicle-bike-detection-2004"  // 2021.4+: ncDeviceOpen:1013 Failed to find booted device after boot
         )
     )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
     if (targetId == DNN_TARGET_OPENCL && (false
             || modelName == "face-detection-0106"  // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported
         )
     )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
     if (targetId == DNN_TARGET_OPENCL_FP16 && (false
             || modelName == "face-detection-0106"  // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported
         )
     )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 
 #if INF_ENGINE_VER_MAJOR_GE(2020020000)
@@ -397,12 +397,7 @@ TEST_P(DNNTestOpenVINO, models)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     bool isFP16 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD);
 
diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp
index c181dfa5eb..6c41a8dbbb 100644
--- a/modules/dnn/test/test_int8_layers.cpp
+++ b/modules/dnn/test/test_int8_layers.cpp
@@ -218,8 +218,16 @@ TEST_P(Test_Int8_layers, ReLU6)
 TEST_P(Test_Int8_layers, Sigmoid)
 {
     testLayer("maxpooling_sigmoid", "ONNX", 0.0011, 0.0032);
-    testLayer("maxpooling_sigmoid_dynamic_axes", "ONNX", 0.0011, 0.0032);
-    testLayer("maxpooling_sigmoid_1d", "ONNX", 0.0011, 0.0037);
+}
+
+TEST_P(Test_Int8_layers, Sigmoid_dynamic_axes)
+{
+    testLayer("maxpooling_sigmoid_dynamic_axes", "ONNX", 0.002, 0.0032);
+}
+
+TEST_P(Test_Int8_layers, Sigmoid_1d)
+{
+    testLayer("maxpooling_sigmoid_1d", "ONNX", 0.002, 0.0037);
 }
 
 TEST_P(Test_Int8_layers, Mish)
@@ -316,16 +324,48 @@ TEST_P(Test_Int8_layers, Identity)
     testLayer("expand_neg_batch", "ONNX", 0.00071, 0.0019);
 }
 
-TEST_P(Test_Int8_layers, Slice)
+TEST_P(Test_Int8_layers, Slice_split_tf)
 {
     testLayer("split", "TensorFlow", 0.0033, 0.0056);
+}
+
+TEST_P(Test_Int8_layers, Slice_4d_tf)
+{
     testLayer("slice_4d", "TensorFlow", 0.003, 0.0073);
+}
+
+TEST_P(Test_Int8_layers, Slice_strided_tf)
+{
     testLayer("strided_slice", "TensorFlow", 0.008, 0.0142);
+}
+
+TEST_P(Test_Int8_layers, Slice_onnx)
+{
     testLayer("slice", "ONNX", 0.0046, 0.0077);
-    testLayer("slice_dynamic_axes", "ONNX", 0.0039, 0.0084);
-    testLayer("slice_opset_11_steps_2d", "ONNX", 0.0052, 0.0124);
+}
+
+TEST_P(Test_Int8_layers, Slice_dynamic_axes_onnx)
+{
+    testLayer("slice_dynamic_axes", "ONNX", 0.0039, 0.02);
+}
+
+TEST_P(Test_Int8_layers, Slice_steps_2d_onnx11)
+{
+    testLayer("slice_opset_11_steps_2d", "ONNX", 0.01, 0.0124);
+}
+
+TEST_P(Test_Int8_layers, Slice_steps_3d_onnx11)
+{
     testLayer("slice_opset_11_steps_3d", "ONNX", 0.0068, 0.014);
+}
+
+TEST_P(Test_Int8_layers, Slice_steps_4d_onnx11)
+{
     testLayer("slice_opset_11_steps_4d", "ONNX", 0.0041, 0.008);
+}
+
+TEST_P(Test_Int8_layers, Slice_steps_5d_onnx11)
+{
     testLayer("slice_opset_11_steps_5d", "ONNX", 0.0085, 0.021);
 }
 
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 8bbccdbc96..3f4a437637 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -1265,12 +1265,7 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
     if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         throw SkipTestException("No support for async forward");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
     Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
@@ -1310,12 +1305,7 @@ TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
     if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         throw SkipTestException("No support for async forward");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     int blobSize[] = {2, 6, 75, 113};
     Mat inputs[] = {Mat(4, &blobSize[0], CV_8U), Mat()};
@@ -1348,12 +1338,7 @@ TEST_P(Layer_Test_Convolution_DLDT, multithreading)
     if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         throw SkipTestException("No support for async forward");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     std::string xmlPath = _tf("layer_convolution.xml");
     std::string binPath = _tf("layer_convolution.bin");
diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp
index 39bb73a918..108131bd9d 100644
--- a/modules/dnn/test/test_misc.cpp
+++ b/modules/dnn/test/test_misc.cpp
@@ -117,12 +117,7 @@ void test_readNet_IE_do_not_call_setInput(Backend backendId)
     const std::string& model = findDataFile("dnn/layers/layer_convolution.bin");
     const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net net = readNet(model, proto);
     net.setPreferableBackend(backendId);
@@ -462,12 +457,7 @@ TEST_P(Async, model_optimizer_pipeline_set_and_forward_single)
     const std::string& model = findDataFile("dnn/layers/layer_convolution.bin");
     const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net netSync = readNet(model, proto);
     netSync.setPreferableBackend(backendId);
@@ -523,12 +513,7 @@ TEST_P(Async, model_optimizer_pipeline_set_and_forward_all)
     const std::string& model = findDataFile("dnn/layers/layer_convolution.bin");
     const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net netSync = readNet(model, proto);
     netSync.setPreferableBackend(backendId);
@@ -586,12 +571,7 @@ TEST_P(Async, create_layer_pipeline_set_and_forward_all)
     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && dtype == CV_8U)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net netSync;
     Net netAsync;
@@ -697,12 +677,7 @@ TEST_P(Test_Model_Optimizer, forward_two_nets)
     const std::string& model = findDataFile("dnn/layers/layer_convolution.bin");
     const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net net0 = readNet(model, proto);
     net0.setPreferableTarget(targetId);
@@ -741,12 +716,7 @@ TEST_P(Test_Model_Optimizer, readFromBuffer)
     const std::string& weightsFile = findDataFile("dnn/layers/layer_convolution.bin");
     const std::string& modelFile = findDataFile("dnn/layers/layer_convolution.xml");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net net1 = readNetFromModelOptimizer(modelFile, weightsFile);
     net1.setPreferableBackend(backendId);
@@ -793,12 +763,7 @@ TEST_P(Test_Model_Optimizer, flexible_inputs)
     const std::string& model = findDataFile("dnn/layers/layer_convolution.bin");
     const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml");
 
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
-    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
-    else
-        FAIL() << "Unknown backendId";
+    ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
 
     Net net0 = readNet(model, proto);
     net0.setPreferableTarget(targetId);
diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp
index 1c3877b7b2..0e912ede54 100644
--- a/modules/dnn/test/test_onnx_conformance.cpp
+++ b/modules/dnn/test/test_onnx_conformance.cpp
@@ -1181,10 +1181,10 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
     }
 
     std::vector<std::string> layerNames = net.getUnconnectedOutLayersNames();
-    std::vector< std::vector<Mat> > outputs_;
+    std::vector<Mat> outputs;
     try
     {
-        net.forward(outputs_, layerNames);
+        net.forward(outputs, layerNames);
     }
     catch (...)
     {
@@ -1192,8 +1192,7 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
         applyTestTag(CV_TEST_TAG_DNN_ERROR_FORWARD);
         throw;
     }
-    ASSERT_GE(outputs_.size(), 1);
-    const std::vector<Mat>& outputs = outputs_[0];
+    ASSERT_GE(outputs.size(), 1);
 
     if (checkLayersFallbacks && checkFallbacks(net))
     {
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
index ec262ed301..bdd8f3b8b9 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
@@ -59,6 +59,12 @@ EOF_LABEL:
 
 bool filterApplied = false;
 
+#if INF_ENGINE_VER_MAJOR_EQ(2021040000) || INF_ENGINE_VER_MAJOR_EQ(2022010000)
+#define SKIP_SET_1 1
+#else
+#define SKIP_SET_1 0
+#endif
+
 // Update note: execute <opencv_extra>/testdata/dnn/onnx/generate_conformance_list.py
 BEGIN_SWITCH()
 CASE(test_abs)
@@ -82,11 +88,11 @@ CASE(test_adam_multiple)
 CASE(test_add)
     // no filter
 CASE(test_add_bcast)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_add_uint8)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_and2d)
@@ -106,131 +112,131 @@ CASE(test_and_bcast4v3d)
 CASE(test_and_bcast4v4d)
     // no filter
 CASE(test_argmax_default_axis_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_default_axis_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_default_axis_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_default_axis_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_keepdims_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_keepdims_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_keepdims_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_keepdims_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_negative_axis_keepdims_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_negative_axis_keepdims_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_negative_axis_keepdims_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_negative_axis_keepdims_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_no_keepdims_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_no_keepdims_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_no_keepdims_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmax_no_keepdims_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_default_axis_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_default_axis_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_default_axis_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_default_axis_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_keepdims_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_keepdims_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_keepdims_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_keepdims_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_negative_axis_keepdims_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_negative_axis_keepdims_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_negative_axis_keepdims_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_negative_axis_keepdims_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_no_keepdims_example)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_no_keepdims_example_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_no_keepdims_random)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_argmin_no_keepdims_random_select_last_index)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_asin)
@@ -256,11 +262,11 @@ CASE(test_averagepool_2d_ceil)
 CASE(test_averagepool_2d_default)
     // no filter
 CASE(test_averagepool_2d_pads)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_averagepool_2d_pads_count_include_pad)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_CPU;
     // MYRIAD is ok
     SKIP_OPENCL;
@@ -269,7 +275,7 @@ CASE(test_averagepool_2d_pads_count_include_pad)
 CASE(test_averagepool_2d_precomputed_pads)
     // no filter
 CASE(test_averagepool_2d_precomputed_pads_count_include_pad)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_averagepool_2d_precomputed_same_upper)
@@ -277,7 +283,7 @@ CASE(test_averagepool_2d_precomputed_same_upper)
 CASE(test_averagepool_2d_precomputed_strides)
     // no filter
 CASE(test_averagepool_2d_same_lower)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_averagepool_2d_same_upper)
@@ -287,11 +293,11 @@ CASE(test_averagepool_2d_strides)
 CASE(test_averagepool_3d_default)
     // no filter
 CASE(test_basic_conv_with_padding)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_basic_conv_without_padding)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_basic_convinteger)
@@ -349,11 +355,11 @@ CASE(test_cast_FLOAT_to_DOUBLE)
 CASE(test_cast_FLOAT_to_FLOAT16)
     // no filter
 CASE(test_cast_FLOAT_to_STRING)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_cast_STRING_to_FLOAT)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_castlike_BFLOAT16_to_FLOAT)
@@ -391,13 +397,13 @@ CASE(test_castlike_FLOAT_to_FLOAT16_expanded)
 CASE(test_castlike_FLOAT_to_STRING)
     // no filter
 CASE(test_castlike_FLOAT_to_STRING_expanded)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_castlike_STRING_to_FLOAT)
     // no filter
 CASE(test_castlike_STRING_to_FLOAT_expanded)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_ceil)
@@ -441,7 +447,7 @@ CASE(test_compress_negative_axis)
 CASE(test_concat_1d_axis_0)
     // no filter
 CASE(test_concat_1d_axis_negative_1)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_concat_2d_axis_0)
@@ -475,19 +481,19 @@ CASE(test_constantofshape_int_shape_zero)
 CASE(test_constantofshape_int_zeros)
     // no filter
 CASE(test_conv_with_autopad_same)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_conv_with_strides_and_asymmetric_padding)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_conv_with_strides_no_padding)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_conv_with_strides_padding)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_convinteger_with_padding)
@@ -555,13 +561,13 @@ CASE(test_det_nd)
 CASE(test_div)
     // no filter
 CASE(test_div_bcast)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_div_example)
     // no filter
 CASE(test_div_uint8)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_dropout_default)
@@ -573,7 +579,7 @@ CASE(test_dropout_default_mask_ratio)
 CASE(test_dropout_default_old)
     // no filter
 CASE(test_dropout_default_ratio)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_dropout_random_old)
@@ -697,11 +703,11 @@ CASE(test_globalaveragepool)
 CASE(test_globalaveragepool_precomputed)
     // no filter
 CASE(test_globalmaxpool)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_globalmaxpool_precomputed)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_greater)
@@ -811,12 +817,12 @@ CASE(test_log)
 CASE(test_log_example)
     // no filter
 CASE(test_logsoftmax_axis_0)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL;
     SKIP_OPENCL_FP16;
 #endif
 CASE(test_logsoftmax_axis_0_expanded)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL;
     SKIP_OPENCL_FP16;
 #endif
@@ -829,7 +835,7 @@ CASE(test_logsoftmax_axis_2)
 CASE(test_logsoftmax_axis_2_expanded)
     // no filter
 CASE(test_logsoftmax_default_axis)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_logsoftmax_default_axis_expanded)
@@ -839,12 +845,12 @@ CASE(test_logsoftmax_example_1)
 CASE(test_logsoftmax_example_1_expanded)
     // no filter
 CASE(test_logsoftmax_large_number)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL_FP16;
     SKIP_MYRIAD;
 #endif
 CASE(test_logsoftmax_large_number_expanded)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL_FP16;
     SKIP_MYRIAD;
 #endif
@@ -907,71 +913,71 @@ CASE(test_max_uint64)
 CASE(test_max_uint8)
     // no filter
 CASE(test_maxpool_1d_default)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_ceil)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_default)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_dilations)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_maxpool_2d_pads)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_precomputed_pads)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_precomputed_same_upper)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_precomputed_strides)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_same_lower)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_maxpool_2d_same_upper)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_strides)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_MYRIAD;
 #endif
 CASE(test_maxpool_2d_uint8)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_maxpool_3d_default)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_NON_CPU;
 #endif
 CASE(test_maxpool_with_argmax_2d_precomputed_pads)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_maxpool_with_argmax_2d_precomputed_strides)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_maxunpool_export_with_output_shape)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_maxunpool_export_without_output_shape)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_mean_example)
@@ -1041,13 +1047,13 @@ CASE(test_momentum_multiple)
 CASE(test_mul)
     // no filter
 CASE(test_mul_bcast)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_mul_example)
     // no filter
 CASE(test_mul_uint8)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_mvn)
@@ -1717,12 +1723,12 @@ CASE(test_slice_negative_axes)
 CASE(test_slice_start_out_of_bounds)
     // no filter
 CASE(test_softmax_axis_0)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL;
     SKIP_OPENCL_FP16;
 #endif
 CASE(test_softmax_axis_0_expanded)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL;
     SKIP_OPENCL_FP16;
 #endif
@@ -1735,7 +1741,7 @@ CASE(test_softmax_axis_2)
 CASE(test_softmax_axis_2_expanded)
     // no filter
 CASE(test_softmax_default_axis)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_softmax_default_axis_expanded)
@@ -1745,12 +1751,12 @@ CASE(test_softmax_example)
 CASE(test_softmax_example_expanded)
     // no filter
 CASE(test_softmax_large_number)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL_FP16;
     SKIP_MYRIAD;
 #endif
 CASE(test_softmax_large_number_expanded)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP_OPENCL_FP16;
     SKIP_MYRIAD;
 #endif
@@ -1771,26 +1777,11 @@ CASE(test_spacetodepth)
 CASE(test_spacetodepth_example)
     // no filter
 CASE(test_split_equal_parts_1d)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
-    SKIP_CPU;
-    // MYRIAD is ok
-    SKIP_OPENCL;
-    SKIP_OPENCL_FP16;
-#endif
+    // no filter
 CASE(test_split_equal_parts_2d)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
-    SKIP_CPU;
-    // MYRIAD is ok
-    SKIP_OPENCL;
-    SKIP_OPENCL_FP16;
-#endif
+    // no filter
 CASE(test_split_equal_parts_default_axis)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
-    SKIP_CPU;
-    // MYRIAD is ok
-    SKIP_OPENCL;
-    SKIP_OPENCL_FP16;
-#endif
+    // no filter
 CASE(test_split_variable_parts_1d)
     // no filter
 CASE(test_split_variable_parts_2d)
@@ -1822,13 +1813,13 @@ CASE(test_strnormalizer_nostopwords_nochangecase)
 CASE(test_sub)
     // no filter
 CASE(test_sub_bcast)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_sub_example)
     // no filter
 CASE(test_sub_uint8)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_sum_example)
@@ -1964,7 +1955,7 @@ CASE(test_unsqueeze_two_axes)
 CASE(test_unsqueeze_unsorted_axes)
     // no filter
 CASE(test_upsample_nearest)
-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
+#if SKIP_SET_1
     SKIP;
 #endif
 CASE(test_where_example)
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 9f13727e95..4918c72d10 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -885,9 +885,14 @@ TEST_P(Test_ONNX_layers, DynamicResize)
     testONNXModels("dynamic_resize_9", npy, 0, 0, false, true, 2);
     testONNXModels("dynamic_resize_10", npy, 0, 0, false, true, 2);
     testONNXModels("dynamic_resize_11", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_13", npy, 0, 0, false, true, 2);
     testONNXModels("dynamic_resize_scale_9", npy, 0, 0, false, true, 2);
     testONNXModels("dynamic_resize_scale_10", npy, 0, 0, false, true, 2);
     testONNXModels("dynamic_resize_scale_11", npy, 0, 0, false, true, 2);
+    testONNXModels("dynamic_resize_scale_13", npy, 0, 0, false, true, 2);
+
+    testONNXModels("resize_size_opset11");
+    testONNXModels("resize_size_opset13");
 }
 
 TEST_P(Test_ONNX_layers, Resize_HumanSeg)
diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt
index f3c6a70d1e..cd47421ea7 100644
--- a/modules/gapi/CMakeLists.txt
+++ b/modules/gapi/CMakeLists.txt
@@ -13,8 +13,14 @@ if(NOT TARGET ade)
   return()
 endif()
 
-if(INF_ENGINE_TARGET)
-  ocv_option(OPENCV_GAPI_INF_ENGINE "Build GraphAPI module with Inference Engine support" ON)
+if(TARGET ocv.3rdparty.openvino)
+  # TODO: remove OPENCV_GAPI_INF_ENGINE option
+  set(initial_value ON)
+  if(DEFINED OPENCV_GAPI_INF_ENGINE)
+    set(initial_value ${OPENCV_GAPI_INF_ENGINE})
+    message(WARNING "OPENCV_GAPI_INF_ENGINE option is deprecated. Use OPENCV_GAPI_WITH_OPENVINO option instead.")
+  endif()
+  ocv_option(OPENCV_GAPI_WITH_OPENVINO "G-API: Enable OpenVINO Toolkit support" ${initial_value})
 endif()
 
 set(the_description "OpenCV G-API Core Module")
@@ -45,6 +51,7 @@ file(GLOB gapi_ext_hdrs
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/fluid/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/gpu/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/infer/*.hpp"
+    "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/oak/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/ocl/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/own/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/plaidml/*.hpp"
@@ -54,6 +61,7 @@ file(GLOB gapi_ext_hdrs
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/gstreamer/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/onevpl/*.hpp"
+    "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/plaidml/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/util/*.hpp"
     )
 
@@ -127,6 +135,11 @@ set(gapi_srcs
     src/backends/fluid/gfluidcore.cpp
 	  src/backends/fluid/gfluidcore_func.dispatch.cpp
 
+    # OAK Backend (optional)
+    src/backends/oak/goak.cpp
+    src/backends/oak/goakbackend.cpp
+    src/backends/oak/goak_media_adapter.cpp
+
     # OCL Backend (currently built-in)
     src/backends/ocl/goclbackend.cpp
     src/backends/ocl/goclkernel.cpp
@@ -185,6 +198,8 @@ set(gapi_srcs
     src/streaming/onevpl/engine/processing_engine_base.cpp
     src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp
     src/streaming/onevpl/engine/decode/decode_session.cpp
+    src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp
+    src/streaming/onevpl/engine/transcode/transcode_session.cpp
     src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp
     src/streaming/onevpl/data_provider_dispatcher.cpp
 
@@ -219,12 +234,8 @@ ocv_create_module()
 
 ocv_target_link_libraries(${the_module} PRIVATE ade)
 
-if(OPENCV_GAPI_INF_ENGINE)
-  ocv_target_link_libraries(${the_module} PRIVATE ${INF_ENGINE_TARGET})
-endif()
-
-if (HAVE_NGRAPH)
-    ocv_target_link_libraries(${the_module} PRIVATE ngraph::ngraph)
+if(TARGET ocv.3rdparty.openvino AND OPENCV_GAPI_WITH_OPENVINO)
+  ocv_target_link_libraries(${the_module} PRIVATE ocv.3rdparty.openvino)
 endif()
 
 if(HAVE_TBB)
@@ -239,11 +250,8 @@ if(CV_TRACE AND HAVE_ITT)
 endif()
 
 set(__test_extra_deps "")
-if(OPENCV_GAPI_INF_ENGINE)
-  list(APPEND __test_extra_deps ${INF_ENGINE_TARGET})
-endif()
-if(HAVE_NGRAPH)
-  list(APPEND __test_extra_deps ngraph::ngraph)
+if(TARGET ocv.3rdparty.openvino AND OPENCV_GAPI_WITH_OPENVINO)
+  list(APPEND __test_extra_deps ocv.3rdparty.openvino)
 endif()
 ocv_add_accuracy_tests(${__test_extra_deps})
 
@@ -254,9 +262,6 @@ ocv_add_accuracy_tests(${__test_extra_deps})
 if(TARGET opencv_test_gapi)
   target_include_directories(opencv_test_gapi PRIVATE "${CMAKE_CURRENT_LIST_DIR}/src")
   target_link_libraries(opencv_test_gapi PRIVATE ade)
-  if (HAVE_NGRAPH)
-      ocv_target_compile_definitions(opencv_test_gapi PRIVATE -DHAVE_NGRAPH)
-  endif()
 endif()
 
 if(HAVE_TBB AND TARGET opencv_test_gapi)
@@ -272,6 +277,14 @@ if(HAVE_FREETYPE)
   ocv_target_include_directories(${the_module} PRIVATE ${FREETYPE_INCLUDE_DIRS})
 endif()
 
+if(HAVE_OAK)
+  ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_OAK)
+  if(TARGET opencv_test_gapi)
+    ocv_target_compile_definitions(opencv_test_gapi PRIVATE -DHAVE_OAK)
+  endif()
+  ocv_target_link_libraries(${the_module} PRIVATE depthai::core)
+endif()
+
 if(HAVE_PLAIDML)
   ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_PLAIDML)
   if(TARGET opencv_test_gapi)
@@ -329,16 +342,21 @@ ocv_add_samples()
 
 
 # Required for sample with inference on host
-if (TARGET example_gapi_onevpl_infer_single_roi)
-  if(OPENCV_GAPI_INF_ENGINE)
-    ocv_target_link_libraries(example_gapi_onevpl_infer_single_roi PRIVATE ${INF_ENGINE_TARGET})
-    ocv_target_compile_definitions(example_gapi_onevpl_infer_single_roi PRIVATE -DHAVE_INF_ENGINE)
+if(TARGET example_gapi_onevpl_infer_single_roi)
+  if(TARGET ocv.3rdparty.openvino AND OPENCV_GAPI_WITH_OPENVINO)
+    ocv_target_link_libraries(example_gapi_onevpl_infer_single_roi PRIVATE ocv.3rdparty.openvino)
   endif()
   if(HAVE_D3D11 AND HAVE_OPENCL)
     ocv_target_include_directories(example_gapi_onevpl_infer_single_roi SYSTEM PRIVATE ${OPENCL_INCLUDE_DIRS})
   endif()
 endif()
 
+if(TARGET example_gapi_pipeline_modeling_tool)
+  if(WIN32)
+    ocv_target_link_libraries(example_gapi_pipeline_modeling_tool winmm.lib)
+  endif()
+endif()
+
 # perf test dependencies postprocessing
 if(HAVE_GAPI_ONEVPL)
   # NB: TARGET opencv_perf_gapi doesn't exist before `ocv_add_perf_tests`
@@ -350,3 +368,13 @@ if(HAVE_GAPI_ONEVPL)
     endif()
   endif()
 endif()
+
+if(HAVE_OAK)
+  # FIXME: consider better solution
+  if(TARGET example_gapi_oak_rgb_camera_encoding)
+    ocv_target_compile_definitions(example_gapi_oak_rgb_camera_encoding PRIVATE -DHAVE_OAK)
+  endif()
+  if(TARGET example_gapi_oak_small_hetero_pipeline)
+    ocv_target_compile_definitions(example_gapi_oak_small_hetero_pipeline PRIVATE -DHAVE_OAK)
+  endif()
+endif()
diff --git a/modules/gapi/cmake/init.cmake b/modules/gapi/cmake/init.cmake
index 1c464328ca..dd4b0bccfa 100644
--- a/modules/gapi/cmake/init.cmake
+++ b/modules/gapi/cmake/init.cmake
@@ -1,7 +1,8 @@
 OCV_OPTION(WITH_ADE "Enable ADE framework (required for Graph API module)" ON)
 
-OCV_OPTION(WITH_FREETYPE "Enable FreeType framework" OFF)
-OCV_OPTION(WITH_PLAIDML  "Include PlaidML2 support"  OFF)
+OCV_OPTION(WITH_FREETYPE "Enable FreeType framework"     OFF)
+OCV_OPTION(WITH_PLAIDML  "Include PlaidML2 support"      OFF)
+OCV_OPTION(WITH_OAK      "Include OpenCV AI Kit support" OFF)
 
 if(NOT WITH_ADE)
   return()
@@ -39,3 +40,10 @@ if(WITH_GAPI_ONEVPL)
         set(HAVE_GAPI_ONEVPL TRUE)
     endif()
 endif()
+
+if(WITH_OAK)
+  find_package(depthai QUIET)
+  if(depthai_FOUND)
+      set(HAVE_OAK TRUE)
+  endif()
+endif()
diff --git a/modules/gapi/include/opencv2/gapi/gframe.hpp b/modules/gapi/include/opencv2/gapi/gframe.hpp
index 96913dc4cc..af5fc6bee5 100644
--- a/modules/gapi/include/opencv2/gapi/gframe.hpp
+++ b/modules/gapi/include/opencv2/gapi/gframe.hpp
@@ -86,6 +86,7 @@ enum class MediaFormat: int
 {
     BGR = 0,
     NV12,
+    GRAY,
 };
 
 /**
diff --git a/modules/gapi/include/opencv2/gapi/imgproc.hpp b/modules/gapi/include/opencv2/gapi/imgproc.hpp
index 72aea24288..de6f3bcdb9 100644
--- a/modules/gapi/include/opencv2/gapi/imgproc.hpp
+++ b/modules/gapi/include/opencv2/gapi/imgproc.hpp
@@ -504,8 +504,8 @@ namespace imgproc {
             }
             else
             {
-                int outSz_w = static_cast<int>(round(in.size.width  * fx));
-                int outSz_h = static_cast<int>(round(in.size.height * fy));
+                int outSz_w = saturate_cast<int>(in.size.width  * fx);
+                int outSz_h = saturate_cast<int>(in.size.height * fy);
                 GAPI_Assert(outSz_w > 0 && outSz_h > 0);
                 return in.withSize(Size(outSz_w, outSz_h));
             }
diff --git a/modules/gapi/include/opencv2/gapi/oak/oak.hpp b/modules/gapi/include/opencv2/gapi/oak/oak.hpp
new file mode 100644
index 0000000000..05fb09946f
--- /dev/null
+++ b/modules/gapi/include/opencv2/gapi/oak/oak.hpp
@@ -0,0 +1,131 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#ifndef OPENCV_GAPI_OAK_HPP
+#define OPENCV_GAPI_OAK_HPP
+
+#include <opencv2/gapi/garg.hpp>       // IStreamSource
+#include <opencv2/gapi/gkernel.hpp>    // GKernelPackage
+#include <opencv2/gapi/gstreaming.hpp> // GOptRunArgsP
+
+namespace cv {
+namespace gapi {
+namespace oak {
+
+// FIXME: copypasted from dai library
+struct EncoderConfig {
+    /**
+     * Rate control mode specifies if constant or variable bitrate should be used (H264 / H265)
+     */
+    enum class RateControlMode: int { CBR, VBR };
+
+    /**
+     * Encoding profile, H264, H265 or MJPEG
+     */
+    enum class Profile: int { H264_BASELINE, H264_HIGH, H264_MAIN, H265_MAIN, MJPEG };
+    /**
+     * Specifies prefered bitrate (kb) of compressed output bitstream
+     */
+    std::int32_t bitrate = 8000;
+    /**
+     * Every x number of frames a keyframe will be inserted
+     */
+    std::int32_t keyframeFrequency = 30;
+    /**
+     * Specifies maximum bitrate (kb) of compressed output bitstream
+     */
+    std::int32_t maxBitrate = 8000;
+    /**
+     * Specifies number of B frames to be inserted
+     */
+    std::int32_t numBFrames = 0;
+    /**
+     * This options specifies how many frames are available in this nodes pool (can help if
+     * receiver node is slow at consuming
+     */
+    std::uint32_t numFramesPool = 4;
+    /**
+     * Encoding profile, H264, H265 or MJPEG
+     */
+    Profile profile = Profile::H265_MAIN;
+    /**
+     * Value between 0-100% (approximates quality)
+     */
+    std::int32_t quality = 80;
+    /**
+     * Lossless mode ([M]JPEG only)
+     */
+    bool lossless = false;
+    /**
+     * Rate control mode specifies if constant or variable bitrate should be used (H264 / H265)
+     */
+    RateControlMode rateCtrlMode = RateControlMode::CBR;
+    /**
+     * Input and compressed output frame width
+     */
+    std::int32_t width = 1920;
+    /**
+     * Input and compressed output frame height
+     */
+    std::int32_t height = 1080;
+    /**
+     * Frame rate
+     */
+    float frameRate = 30.0f;
+};
+
+G_API_OP(GEncFrame, <GArray<uint8_t>(GFrame, EncoderConfig)>, "org.opencv.oak.enc_frame") {
+    static GArrayDesc outMeta(const GFrameDesc&, const EncoderConfig&) {
+        return cv::empty_array_desc();
+    }
+};
+
+G_API_OP(GSobelXY, <GFrame(GFrame, const cv::Mat&, const cv::Mat&)>, "org.opencv.oak.sobelxy") {
+    static GFrameDesc outMeta(const GFrameDesc& in, const cv::Mat&, const cv::Mat&) {
+        return in;
+    }
+};
+
+GAPI_EXPORTS GArray<uint8_t> encode(const GFrame& in, const EncoderConfig&);
+
+GAPI_EXPORTS GFrame sobelXY(const GFrame& in,
+                            const cv::Mat& hk,
+                            const cv::Mat& vk);
+
+// OAK backend & kernels ////////////////////////////////////////////////////////
+GAPI_EXPORTS cv::gapi::GBackend backend();
+GAPI_EXPORTS cv::gapi::GKernelPackage kernels();
+
+// Camera object ///////////////////////////////////////////////////////////////
+
+struct GAPI_EXPORTS ColorCameraParams {};
+
+class GAPI_EXPORTS ColorCamera: public cv::gapi::wip::IStreamSource {
+    cv::MediaFrame m_dummy;
+
+    virtual bool pull(cv::gapi::wip::Data &data) override;
+    virtual GMetaArg descr_of() const override;
+
+public:
+    ColorCamera();
+};
+
+} // namespace oak
+} // namespace gapi
+
+namespace detail {
+template<> struct CompileArgTag<gapi::oak::ColorCameraParams> {
+    static const char* tag() { return "gapi.oak.colorCameraParams"; }
+};
+
+template<> struct CompileArgTag<gapi::oak::EncoderConfig> {
+    static const char* tag() { return "gapi.oak.encoderConfig"; }
+};
+} // namespace detail
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_OAK_HPP
diff --git a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp
index 83afc99393..c566656cb6 100644
--- a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp
+++ b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp
@@ -19,12 +19,12 @@ namespace gapi {
 namespace wip {
 namespace gst {
 
-class GAPI_EXPORTS GStreamerPipeline
+class GAPI_EXPORTS_W GStreamerPipeline
 {
 public:
     class Priv;
 
-    explicit GStreamerPipeline(const std::string& pipeline);
+    GAPI_WRAP explicit GStreamerPipeline(const std::string& pipeline);
     IStreamSource::Ptr getStreamingSource(const std::string& appsinkName,
                                           const GStreamerSource::OutputType outputType =
                                               GStreamerSource::OutputType::MAT);
@@ -40,6 +40,18 @@ protected:
 
 using GStreamerPipeline = gst::GStreamerPipeline;
 
+// NB: Function for using from python
+// FIXME: a separate function is created due to absence of wrappers for `shared_ptr<> `
+// Ideally would be to wrap the `GStreamerPipeline::getStreamingSource()` method as is
+GAPI_EXPORTS_W cv::Ptr<IStreamSource>
+inline get_streaming_source(cv::Ptr<GStreamerPipeline>& pipeline,
+                            const std::string& appsinkName,
+                            const GStreamerSource::OutputType outputType
+                                = GStreamerSource::OutputType::MAT)
+{
+    return pipeline->getStreamingSource(appsinkName, outputType);
+}
+
 } // namespace wip
 } // namespace gapi
 } // namespace cv
diff --git a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp
index b81bad31b8..8b8a5ae312 100644
--- a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp
+++ b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp
@@ -32,13 +32,13 @@ namespace gst {
  *        Pipeline can actually contain many sink elements, but it must have one and only one
  *        appsink among them.
  *
- *      - data passed to appsink should be video-frame in NV12 format.
+ *      - data passed to appsink should be video-frame in NV12 or GRAY8 format.
  *
  * 'outputType' is used to select type of output data to produce: 'cv::MediaFrame' or 'cv::Mat'.
  * To produce 'cv::MediaFrame'-s you need to pass 'GStreamerSource::OutputType::FRAME' and,
  * correspondingly, 'GStreamerSource::OutputType::MAT' to produce 'cv::Mat'-s.
  * Please note, that in the last case, output 'cv::Mat' will be of BGR format, internal conversion
- * from NV12 GStreamer data will happen.
+ * from NV12 / GRAY8 GStreamer data will happen.
  * Default value for 'outputType' is 'GStreamerSource::OutputType::MAT'.
  *
  * @note Stream sources are passed to G-API via shared pointers, so please use gapi::make_src<>
@@ -82,6 +82,14 @@ protected:
 
 using GStreamerSource = gst::GStreamerSource;
 
+// NB: Overload for using from python
+GAPI_EXPORTS_W cv::Ptr<IStreamSource>
+inline make_gst_src(const std::string& pipeline,
+                    const GStreamerSource::OutputType outputType =
+                    GStreamerSource::OutputType::MAT)
+{
+    return make_src<GStreamerSource>(pipeline, outputType);
+}
 } // namespace wip
 } // namespace gapi
 } // namespace cv
diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp
index bfd922496a..d93b4c561d 100644
--- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp
+++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp
@@ -110,6 +110,62 @@ struct GAPI_EXPORTS CfgParam {
     static CfgParam create_implementation(uint32_t value);
     static CfgParam create_implementation(const char* value);
 
+
+    static constexpr const char *vpp_frames_pool_size_name() { return "vpp_frames_pool_size"; }
+    static CfgParam create_vpp_frames_pool_size(size_t value);
+
+    static constexpr const char *vpp_in_width_name() { return "vpp.In.Width"; }
+    static CfgParam create_vpp_in_width(uint16_t value);
+
+    static constexpr const char *vpp_in_height_name() { return "vpp.In.Height"; }
+    static CfgParam create_vpp_in_height(uint16_t value);
+
+    static constexpr const char *vpp_in_crop_x_name() { return "vpp.In.CropX"; }
+    static CfgParam create_vpp_in_crop_x(uint16_t value);
+
+    static constexpr const char *vpp_in_crop_y_name() { return "vpp.In.CropY"; }
+    static CfgParam create_vpp_in_crop_y(uint16_t value);
+
+    static constexpr const char *vpp_in_crop_w_name() { return "vpp.In.CropW"; }
+    static CfgParam create_vpp_in_crop_w(uint16_t value);
+
+    static constexpr const char *vpp_in_crop_h_name() { return "vpp.In.CropH"; }
+    static CfgParam create_vpp_in_crop_h(uint16_t value);
+
+
+    static constexpr const char *vpp_out_fourcc_name() { return "vpp.Out.FourCC"; }
+    static CfgParam create_vpp_out_fourcc(uint32_t value);
+
+    static constexpr const char *vpp_out_chroma_format_name() { return "vpp.Out.ChromaFormat"; }
+    static CfgParam create_vpp_out_chroma_format(uint16_t value);
+
+    static constexpr const char *vpp_out_width_name() { return "vpp.Out.Width"; }
+    static CfgParam create_vpp_out_width(uint16_t value);
+
+    static constexpr const char *vpp_out_height_name() { return "vpp.Out.Height"; }
+    static CfgParam create_vpp_out_height(uint16_t value);
+
+    static constexpr const char *vpp_out_crop_x_name() { return "vpp.Out.CropX"; }
+    static CfgParam create_vpp_out_crop_x(uint16_t value);
+
+    static constexpr const char *vpp_out_crop_y_name() { return "vpp.Out.CropY"; }
+    static CfgParam create_vpp_out_crop_y(uint16_t value);
+
+    static constexpr const char *vpp_out_crop_w_name() { return "vpp.Out.CropW"; }
+    static CfgParam create_vpp_out_crop_w(uint16_t value);
+
+    static constexpr const char *vpp_out_crop_h_name() { return "vpp.Out.CropH"; }
+    static CfgParam create_vpp_out_crop_h(uint16_t value);
+
+    static constexpr const char *vpp_out_pic_struct_name() { return "vpp.Out.PicStruct"; }
+    static CfgParam create_vpp_out_pic_struct(uint16_t value);
+
+    static constexpr const char *vpp_out_framerate_n_name() { return "vpp.Out.FrameRateExtN"; }
+    static CfgParam create_vpp_out_framerate_n(uint32_t value);
+
+    static constexpr const char *vpp_out_framerate_d_name() { return "vpp.Out.FrameRateExtD"; }
+    static CfgParam create_vpp_out_framerate_d(uint32_t value);
+
     /**
      * Create generic onevp::GSource configuration parameter.
      *
diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py
index b1326712fc..6323582f5b 100644
--- a/modules/gapi/misc/python/package/gapi/__init__.py
+++ b/modules/gapi/misc/python/package/gapi/__init__.py
@@ -297,3 +297,5 @@ cv.gapi.wip.draw.Image = cv.gapi_wip_draw_Image
 cv.gapi.wip.draw.Poly = cv.gapi_wip_draw_Poly
 
 cv.gapi.streaming.queue_capacity = cv.gapi_streaming_queue_capacity
+
+cv.gapi.wip.GStreamerPipeline = cv.gapi_wip_gst_GStreamerPipeline
diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp
index a71366250c..b4be0048d0 100644
--- a/modules/gapi/misc/python/pyopencv_gapi.hpp
+++ b/modules/gapi/misc/python/pyopencv_gapi.hpp
@@ -19,6 +19,7 @@ using detail_ExtractArgsCallback    = cv::detail::ExtractArgsCallback;
 using detail_ExtractMetaCallback    = cv::detail::ExtractMetaCallback;
 using vector_GNetParam              = std::vector<cv::gapi::GNetParam>;
 using gapi_streaming_queue_capacity = cv::gapi::streaming::queue_capacity;
+using GStreamerSource_OutputType    = cv::gapi::wip::GStreamerSource::OutputType;
 
 // NB: Python wrapper generate T_U for T<U>
 // This behavior is only observed for inputs
@@ -230,7 +231,7 @@ PyObject* pyopencv_from(const cv::GArg& value)
     {
         HANDLE_CASE(BOOL,      bool);
         HANDLE_CASE(INT,       int);
-        HANDLE_CASE(INT64,   int64_t);
+        HANDLE_CASE(INT64,     int64_t);
         HANDLE_CASE(DOUBLE,    double);
         HANDLE_CASE(FLOAT,     float);
         HANDLE_CASE(STRING,    std::string);
diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py
index d7914c5157..d06447d791 100644
--- a/modules/gapi/misc/python/test/test_gapi_streaming.py
+++ b/modules/gapi/misc/python/test/test_gapi_streaming.py
@@ -34,6 +34,16 @@ try:
             return img
 
 
+    def convertNV12p2BGR(in_nv12):
+        shape = in_nv12.shape
+        y_height = shape[0] // 3 * 2
+        uv_shape = (shape[0] // 3, shape[1])
+        new_uv_shape = (uv_shape[0], uv_shape[1] // 2, 2)
+        return cv.cvtColorTwoPlane(in_nv12[:y_height,  :],
+                                   in_nv12[ y_height:, :].reshape(new_uv_shape),
+                                   cv.COLOR_YUV2BGR_NV12)
+
+
     class test_gapi_streaming(NewOpenCVTests):
 
         def test_image_input(self):
@@ -229,7 +239,6 @@ try:
 
 
         def test_gapi_streaming_meta(self):
-            ksize = 3
             path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']])
 
             # G-API
@@ -350,6 +359,189 @@ try:
                     cv.gapi.compile_args(cv.gapi.streaming.queue_capacity(1)))
 
 
+        def get_gst_source(self, gstpipeline):
+            # NB: Skip test in case gstreamer isn't available.
+            try:
+                return cv.gapi.wip.make_gst_src(gstpipeline)
+            except cv.error as e:
+                if str(e).find('Built without GStreamer support!') == -1:
+                    raise e
+                else:
+                    raise unittest.SkipTest(str(e))
+
+
+        def test_gst_source(self):
+            if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER):
+                raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER")
+
+            gstpipeline = """videotestsrc is-live=true pattern=colors num-buffers=10 !
+                             videorate ! videoscale ! video/x-raw,width=1920,height=1080,
+                             framerate=30/1 ! appsink"""
+
+            g_in = cv.GMat()
+            g_out = cv.gapi.copy(g_in)
+            c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out))
+
+            ccomp = c.compileStreaming()
+
+            source = self.get_gst_source(gstpipeline)
+
+            ccomp.setSource(cv.gin(source))
+            ccomp.start()
+
+            has_frame, output = ccomp.pull()
+            while has_frame:
+                self.assertTrue(output.size != 0)
+                has_frame, output = ccomp.pull()
+
+
+        def open_VideoCapture_gstreamer(self, gstpipeline):
+            try:
+                cap = cv.VideoCapture(gstpipeline, cv.CAP_GSTREAMER)
+            except Exception as e:
+                raise unittest.SkipTest("Backend GSTREAMER can't open the video; " +
+                                        "cause: " + str(e))
+            if not cap.isOpened():
+                raise unittest.SkipTest("Backend GSTREAMER can't open the video")
+            return cap
+
+
+        def test_gst_source_accuracy(self):
+            if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER):
+                raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER")
+
+            path = self.find_file('highgui/video/big_buck_bunny.avi',
+                                  [os.environ['OPENCV_TEST_DATA_PATH']])
+            gstpipeline = """filesrc location=""" + path + """ ! decodebin ! videoconvert !
+                             videoscale ! video/x-raw,format=NV12 ! appsink"""
+
+            # G-API pipeline
+            g_in = cv.GMat()
+            g_out = cv.gapi.copy(g_in)
+            c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out))
+
+            ccomp = c.compileStreaming()
+
+            # G-API Gst-source
+            source = self.get_gst_source(gstpipeline)
+            ccomp.setSource(cv.gin(source))
+            ccomp.start()
+
+            # OpenCV Gst-source
+            cap = self.open_VideoCapture_gstreamer(gstpipeline)
+
+            # Assert
+            max_num_frames = 10
+            for _ in range(max_num_frames):
+                has_expected, expected = cap.read()
+                has_actual,   actual   = ccomp.pull()
+
+                self.assertEqual(has_expected, has_actual)
+
+                if not has_expected:
+                    break
+
+                self.assertEqual(0.0, cv.norm(convertNV12p2BGR(expected), actual, cv.NORM_INF))
+
+
+        def get_gst_pipeline(self, gstpipeline):
+            # NB: Skip test in case gstreamer isn't available.
+            try:
+                return cv.gapi.wip.GStreamerPipeline(gstpipeline)
+            except cv.error as e:
+                if str(e).find('Built without GStreamer support!') == -1:
+                    raise e
+                else:
+                    raise unittest.SkipTest(str(e))
+            except SystemError as e:
+                raise unittest.SkipTest(str(e) + ", casued by " + str(e.__cause__))
+
+
+        def test_gst_multiple_sources(self):
+            if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER):
+                raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER")
+
+            gstpipeline = """videotestsrc is-live=true pattern=colors num-buffers=10 !
+                             videorate ! videoscale !
+                             video/x-raw,width=1920,height=1080,framerate=30/1 !
+                             appsink name=sink1
+                             videotestsrc is-live=true pattern=colors num-buffers=10 !
+                             videorate ! videoscale !
+                             video/x-raw,width=1920,height=1080,framerate=30/1 !
+                             appsink name=sink2"""
+
+            g_in1 = cv.GMat()
+            g_in2 = cv.GMat()
+            g_out = cv.gapi.add(g_in1, g_in2)
+            c = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out))
+
+            ccomp = c.compileStreaming()
+
+            pp = self.get_gst_pipeline(gstpipeline)
+            src1 = cv.gapi.wip.get_streaming_source(pp, "sink1")
+            src2 = cv.gapi.wip.get_streaming_source(pp, "sink2")
+
+            ccomp.setSource(cv.gin(src1, src2))
+            ccomp.start()
+
+            has_frame, out = ccomp.pull()
+            while has_frame:
+                self.assertTrue(out.size != 0)
+                has_frame, out = ccomp.pull()
+
+
+        def test_gst_multiple_sources_accuracy(self):
+            if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER):
+                raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER")
+
+            path = self.find_file('highgui/video/big_buck_bunny.avi',
+                                  [os.environ['OPENCV_TEST_DATA_PATH']])
+            gstpipeline1 = """filesrc location=""" + path + """ ! decodebin ! videoconvert !
+                              videoscale ! video/x-raw,format=NV12 ! appsink"""
+            gstpipeline2 = """filesrc location=""" + path + """ ! decodebin !
+                              videoflip method=clockwise ! videoconvert ! videoscale !
+                              video/x-raw,format=NV12 ! appsink"""
+            gstpipeline_gapi = gstpipeline1 + ' name=sink1 ' + gstpipeline2 + ' name=sink2'
+
+            # G-API pipeline
+            g_in1 = cv.GMat()
+            g_in2 = cv.GMat()
+            g_out1 = cv.gapi.copy(g_in1)
+            g_out2 = cv.gapi.copy(g_in2)
+            c = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out1, g_out2))
+
+            ccomp = c.compileStreaming()
+
+            # G-API Gst-source
+            pp = self.get_gst_pipeline(gstpipeline_gapi)
+
+            src1 = cv.gapi.wip.get_streaming_source(pp, "sink1")
+            src2 = cv.gapi.wip.get_streaming_source(pp, "sink2")
+            ccomp.setSource(cv.gin(src1, src2))
+            ccomp.start()
+
+            # OpenCV Gst-source
+            cap1 = self.open_VideoCapture_gstreamer(gstpipeline1)
+            cap2 = self.open_VideoCapture_gstreamer(gstpipeline2)
+
+            # Assert
+            max_num_frames = 10
+            for _ in range(max_num_frames):
+                has_expected1, expected1 = cap1.read()
+                has_expected2, expected2 = cap2.read()
+                has_actual, (actual1, actual2) = ccomp.pull()
+
+                self.assertEqual(has_expected1, has_expected2)
+                has_expected = has_expected1 and has_expected2
+                self.assertEqual(has_expected, has_actual)
+
+                if not has_expected:
+                    break
+
+                self.assertEqual(0.0, cv.norm(convertNV12p2BGR(expected1), actual1, cv.NORM_INF))
+                self.assertEqual(0.0, cv.norm(convertNV12p2BGR(expected2), actual2, cv.NORM_INF))
+
+
 
 except unittest.SkipTest as e:
 
diff --git a/modules/gapi/perf/common/gapi_core_perf_tests.hpp b/modules/gapi/perf/common/gapi_core_perf_tests.hpp
index 7a1568ad22..60294d2193 100644
--- a/modules/gapi/perf/common/gapi_core_perf_tests.hpp
+++ b/modules/gapi/perf/common/gapi_core_perf_tests.hpp
@@ -78,10 +78,6 @@ namespace opencv_test
     class KMeans2DPerfTest : public TestPerfParams<tuple<int, int, cv::KmeansFlags, cv::GCompileArgs>> {};
     class KMeans3DPerfTest : public TestPerfParams<tuple<int, int, cv::KmeansFlags, cv::GCompileArgs>> {};
     class TransposePerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, cv::GCompileArgs>> {};
-    class ResizePerfTest : public TestPerfParams<tuple<compare_f, MatType, int, cv::Size, cv::Size, cv::GCompileArgs>> {};
-    class BottleneckKernelsConstInputPerfTest : public TestPerfParams<tuple<compare_f, std::string, cv::GCompileArgs>> {};
-    class ResizeFxFyPerfTest : public TestPerfParams<tuple<compare_f, MatType, int, cv::Size, double, double, cv::GCompileArgs>> {};
-    class ResizeInSimpleGraphPerfTest : public TestPerfParams<tuple<compare_f, MatType, cv::Size, double, double,  cv::GCompileArgs>> {};
     class ParseSSDBLPerfTest : public TestPerfParams<tuple<cv::Size, float, int, cv::GCompileArgs>>, public ParserSSDTest {};
     class ParseSSDPerfTest   : public TestPerfParams<tuple<cv::Size, float, bool, bool, cv::GCompileArgs>>, public ParserSSDTest {};
     class ParseYoloPerfTest  : public TestPerfParams<tuple<cv::Size, float, float, int, cv::GCompileArgs>>, public ParserYoloTest {};
diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
index d5a8d95f46..72837da199 100644
--- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
+++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
@@ -436,8 +436,8 @@ PERF_TEST_P_(DivPerfTest, TestPerformance)
     // FIXIT Unstable input data for divide
     initMatsRandU(type, sz, dtype, false);
 
-    //This condition need to workaround bug in OpenCV.
-    //It reinitializes divider matrix without zero values.
+    //This condition need to workaround issue in the OpenCV.
+    //It reinitializes divider matrix without zero values for CV_16S DST type.
     if (dtype == CV_16S && dtype != type)
         cv::randu(in_mat2, cv::Scalar::all(1), cv::Scalar::all(255));
 
@@ -482,6 +482,11 @@ PERF_TEST_P_(DivCPerfTest, TestPerformance)
     // FIXIT Unstable input data for divide
     initMatsRandU(type, sz, dtype, false);
 
+    //This condition need as workaround the issue in the OpenCV.
+    //It reinitializes divider scalar without zero values for CV_16S DST type.
+    if (dtype == CV_16S || (type == CV_16S && dtype == -1))
+        cv::randu(sc, cv::Scalar::all(1), cv::Scalar::all(SHRT_MAX));
+
     // OpenCV code ///////////////////////////////////////////////////////////
     cv::divide(in_mat1, sc, out_mat_ocv, scale, dtype);
 
@@ -2282,187 +2287,6 @@ PERF_TEST_P_(TransposePerfTest, TestPerformance)
 
 //------------------------------------------------------------------------------
 
-PERF_TEST_P_(ResizePerfTest, TestPerformance)
-{
-    compare_f cmpF;
-    MatType type = -1;
-    int interp = 1;
-    cv::Size sz;
-    cv::Size sz_out;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, interp, sz, sz_out, compile_args) = GetParam();
-
-    in_mat1 = cv::Mat(sz, type);
-    cv::Scalar mean = cv::Scalar::all(127);
-    cv::Scalar stddev = cv::Scalar::all(40.f);
-    cv::randn(in_mat1, mean, stddev);
-    out_mat_gapi = cv::Mat(sz_out, type);
-    out_mat_ocv = cv::Mat(sz_out, type);
-
-    // OpenCV code ///////////////////////////////////////////////////////////
-    cv::resize(in_mat1, out_mat_ocv, sz_out, 0.0, 0.0, interp);
-
-    // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in;
-    auto out = cv::gapi::resize(in, sz_out, 0.0, 0.0, interp);
-    cv::GComputation c(in, out);
-
-    // Warm-up graph engine:
-    auto cc = c.compile(descr_of(gin(in_mat1)),
-                        std::move(compile_args));
-    cc(gin(in_mat1), gout(out_mat_gapi));
-
-    TEST_CYCLE()
-    {
-        cc(gin(in_mat1), gout(out_mat_gapi));
-    }
-
-    // Comparison ////////////////////////////////////////////////////////////
-    {
-        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-    }
-
-    SANITY_CHECK_NOTHING();
-}
-
-//------------------------------------------------------------------------------
-
-PERF_TEST_P_(ResizeFxFyPerfTest, TestPerformance)
-{
-    compare_f cmpF;
-    MatType type = -1;
-    int interp = 1;
-    cv::Size sz;
-    double fx = 0.0;
-    double fy = 0.0;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, interp, sz, fx, fy, compile_args) = GetParam();
-
-    in_mat1 = cv::Mat(sz, type);
-    cv::Scalar mean = cv::Scalar::all(127);
-    cv::Scalar stddev = cv::Scalar::all(40.f);
-    cv::randn(in_mat1, mean, stddev);
-    cv::Size sz_out = cv::Size(saturate_cast<int>(sz.width *fx), saturate_cast<int>(sz.height*fy));
-    out_mat_gapi = cv::Mat(sz_out, type);
-    out_mat_ocv = cv::Mat(sz_out, type);
-
-    // OpenCV code ///////////////////////////////////////////////////////////
-    cv::resize(in_mat1, out_mat_ocv, sz_out, fx, fy, interp);
-
-    // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in;
-    auto out = cv::gapi::resize(in, sz_out, fx, fy, interp);
-    cv::GComputation c(in, out);
-
-    // Warm-up graph engine:
-    auto cc = c.compile(descr_of(gin(in_mat1)),
-                        std::move(compile_args));
-    cc(gin(in_mat1), gout(out_mat_gapi));
-
-    TEST_CYCLE()
-    {
-        cc(gin(in_mat1), gout(out_mat_gapi));
-    }
-    // Comparison ////////////////////////////////////////////////////////////
-    {
-        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-    }
-
-    SANITY_CHECK_NOTHING();
-}
-
-//------------------------------------------------------------------------------
-
-// This test cases were created to control performance result of test scenario mentioned here:
-// https://stackoverflow.com/questions/60629331/opencv-gapi-performance-not-good-as-expected
-
-PERF_TEST_P_(BottleneckKernelsConstInputPerfTest, TestPerformance)
-{
-    compare_f cmpF;
-    std::string fileName = "";
-    cv::GCompileArgs compile_args;
-    double fx = 0.5;
-    double fy = 0.5;
-    std::tie(cmpF, fileName, compile_args) = GetParam();
-
-    in_mat1 = cv::imread(findDataFile(fileName));
-
-    cv::Mat cvvga;
-    cv::Mat cvgray;
-    cv::Mat cvblurred;
-
-    cv::resize(in_mat1, cvvga, cv::Size(), fx, fy);
-    cv::cvtColor(cvvga, cvgray, cv::COLOR_BGR2GRAY);
-    cv::blur(cvgray, cvblurred, cv::Size(3, 3));
-    cv::Canny(cvblurred, out_mat_ocv, 32, 128, 3);
-
-    cv::GMat in;
-    cv::GMat vga = cv::gapi::resize(in, cv::Size(), fx, fy, INTER_LINEAR);
-    cv::GMat gray = cv::gapi::BGR2Gray(vga);
-    cv::GMat blurred = cv::gapi::blur(gray, cv::Size(3, 3));
-    cv::GMat out = cv::gapi::Canny(blurred, 32, 128, 3);
-    cv::GComputation ac(in, out);
-
-    auto cc = ac.compile(descr_of(gin(in_mat1)),
-        std::move(compile_args));
-    cc(gin(in_mat1), gout(out_mat_gapi));
-
-    TEST_CYCLE()
-    {
-        cc(gin(in_mat1), gout(out_mat_gapi));
-    }
-
-    // Comparison ////////////////////////////////////////////////////////////
-    {
-        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-    }
-
-    SANITY_CHECK_NOTHING();
-}
-
-//------------------------------------------------------------------------------
-
-PERF_TEST_P_(ResizeInSimpleGraphPerfTest, TestPerformance)
-{
-    compare_f cmpF;
-    MatType type = -1;
-    cv::Size sz;
-    double fx = 0.5;
-    double fy = 0.5;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, sz, fx, fy, compile_args) = GetParam();
-
-    initMatsRandU(type, sz, type, false);
-
-    cv::Mat add_res_ocv;
-
-    cv::add(in_mat1, in_mat2, add_res_ocv);
-    cv::resize(add_res_ocv, out_mat_ocv, cv::Size(), fx, fy);
-
-    cv::GMat in1, in2;
-    cv::GMat add_res_gapi = cv::gapi::add(in1, in2);
-    cv::GMat out = cv::gapi::resize(add_res_gapi, cv::Size(), fx, fy, INTER_LINEAR);
-    cv::GComputation ac(GIn(in1, in2), GOut(out));
-
-    auto cc = ac.compile(descr_of(gin(in_mat1, in_mat2)),
-                         std::move(compile_args));
-    cc(gin(in_mat1, in_mat2), gout(out_mat_gapi));
-
-    TEST_CYCLE()
-    {
-        cc(gin(in_mat1, in_mat2), gout(out_mat_gapi));
-    }
-
-    // Comparison ////////////////////////////////////////////////////////////
-    {
-        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-    }
-
-    SANITY_CHECK_NOTHING();
-}
-
-//------------------------------------------------------------------------------
-
 PERF_TEST_P_(ParseSSDBLPerfTest, TestPerformance)
 {
     cv::Size sz;
diff --git a/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp b/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp
index 16e5f13729..c1afbfea6d 100644
--- a/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp
+++ b/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp
@@ -99,6 +99,10 @@ class YUV2BGRPerfTest     : public TestPerfParams<tuple<compare_f, cv::Size, cv:
 class RGB2HSVPerfTest     : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
 class BayerGR2RGBPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
 class RGB2YUV422PerfTest  : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
+class ResizePerfTest      : public TestPerfParams<tuple<compare_f, MatType, int, cv::Size, cv::Size, cv::GCompileArgs>> {};
+class ResizeFxFyPerfTest  : public TestPerfParams<tuple<compare_f, MatType, int, cv::Size, double, double, cv::GCompileArgs>> {};
+class ResizeInSimpleGraphPerfTest : public TestPerfParams<tuple<compare_f, MatType, cv::Size, double, double,  cv::GCompileArgs>> {};
+class BottleneckKernelsConstInputPerfTest : public TestPerfParams<tuple<compare_f, std::string, cv::GCompileArgs>> {};
 } // opencv_test
 
 #endif //OPENCV_GAPI_IMGPROC_PERF_TESTS_HPP
diff --git a/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp
index 57c8130338..475daa84c1 100644
--- a/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp
+++ b/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp
@@ -1761,5 +1761,187 @@ PERF_TEST_P_(RGB2YUV422PerfTest, TestPerformance)
 
 //------------------------------------------------------------------------------
 
+PERF_TEST_P_(ResizePerfTest, TestPerformance)
+{
+    compare_f cmpF;
+    MatType type = -1;
+    int interp = 1;
+    cv::Size sz;
+    cv::Size sz_out;
+    cv::GCompileArgs compile_args;
+    std::tie(cmpF, type, interp, sz, sz_out, compile_args) = GetParam();
+
+    in_mat1 = cv::Mat(sz, type);
+    cv::Scalar mean = cv::Scalar::all(127);
+    cv::Scalar stddev = cv::Scalar::all(40.f);
+    cv::randn(in_mat1, mean, stddev);
+    out_mat_gapi = cv::Mat(sz_out, type);
+    out_mat_ocv = cv::Mat(sz_out, type);
+
+    // OpenCV code ///////////////////////////////////////////////////////////
+    cv::resize(in_mat1, out_mat_ocv, sz_out, 0.0, 0.0, interp);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::resize(in, sz_out, 0.0, 0.0, interp);
+    cv::GComputation c(in, out);
+
+    // Warm-up graph engine:
+    auto cc = c.compile(descr_of(gin(in_mat1)),
+                        std::move(compile_args));
+    cc(gin(in_mat1), gout(out_mat_gapi));
+
+    TEST_CYCLE()
+    {
+        cc(gin(in_mat1), gout(out_mat_gapi));
+    }
+
+    // Comparison ////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+//------------------------------------------------------------------------------
+
+PERF_TEST_P_(ResizeFxFyPerfTest, TestPerformance)
+{
+    compare_f cmpF;
+    MatType type = -1;
+    int interp = 1;
+    cv::Size sz;
+    double fx = 1.0;
+    double fy = 1.0;
+    cv::GCompileArgs compile_args;
+    std::tie(cmpF, type, interp, sz, fx, fy, compile_args) = GetParam();
+
+    in_mat1 = cv::Mat(sz, type);
+    cv::Scalar mean = cv::Scalar::all(127);
+    cv::Scalar stddev = cv::Scalar::all(40.f);
+    cv::randn(in_mat1, mean, stddev);
+    cv::Size sz_out = cv:: Size(saturate_cast<int>(sz.width*fx), saturate_cast<int>(sz.height*fy));
+    out_mat_gapi = cv::Mat(sz_out, type);
+    out_mat_ocv = cv::Mat(sz_out, type);
+
+    // OpenCV code ///////////////////////////////////////////////////////////
+    cv::resize(in_mat1, out_mat_ocv, sz_out, fx, fy, interp);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::resize(in, sz_out, fx, fy, interp);
+    cv::GComputation c(in, out);
+
+    // Warm-up graph engine:
+    auto cc = c.compile(descr_of(gin(in_mat1)),
+                        std::move(compile_args));
+    cc(gin(in_mat1), gout(out_mat_gapi));
+
+    TEST_CYCLE()
+    {
+        cc(gin(in_mat1), gout(out_mat_gapi));
+    }
+
+    // Comparison ////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+//------------------------------------------------------------------------------
+
+PERF_TEST_P_(ResizeInSimpleGraphPerfTest, TestPerformance)
+{
+    compare_f cmpF;
+    MatType type = -1;
+    cv::Size sz;
+    double fx = 0.5;
+    double fy = 0.5;
+    cv::GCompileArgs compile_args;
+    std::tie(cmpF, type, sz, fx, fy, compile_args) = GetParam();
+
+    initMatsRandU(type, sz, type, false);
+
+    cv::Mat add_res_ocv;
+
+    cv::add(in_mat1, in_mat2, add_res_ocv);
+    cv::resize(add_res_ocv, out_mat_ocv, cv::Size(), fx, fy);
+
+    cv::GMat in1, in2;
+    cv::GMat add_res_gapi = cv::gapi::add(in1, in2);
+    cv::GMat out = cv::gapi::resize(add_res_gapi, cv::Size(), fx, fy, INTER_LINEAR);
+    cv::GComputation ac(GIn(in1, in2), GOut(out));
+
+    auto cc = ac.compile(descr_of(gin(in_mat1, in_mat2)),
+                         std::move(compile_args));
+    cc(gin(in_mat1, in_mat2), gout(out_mat_gapi));
+
+    TEST_CYCLE()
+    {
+        cc(gin(in_mat1, in_mat2), gout(out_mat_gapi));
+    }
+
+    // Comparison ////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+//------------------------------------------------------------------------------
+
+// This test cases were created to control performance result of test scenario mentioned here:
+// https://stackoverflow.com/questions/60629331/opencv-gapi-performance-not-good-as-expected
+
+PERF_TEST_P_(BottleneckKernelsConstInputPerfTest, TestPerformance)
+{
+    compare_f cmpF;
+    std::string fileName = "";
+    cv::GCompileArgs compile_args;
+    double fx = 0.5;
+    double fy = 0.5;
+    std::tie(cmpF, fileName, compile_args) = GetParam();
+
+    in_mat1 = cv::imread(findDataFile(fileName));
+
+    cv::Mat cvvga;
+    cv::Mat cvgray;
+    cv::Mat cvblurred;
+
+    cv::resize(in_mat1, cvvga, cv::Size(), fx, fy);
+    cv::cvtColor(cvvga, cvgray, cv::COLOR_BGR2GRAY);
+    cv::blur(cvgray, cvblurred, cv::Size(3, 3));
+    cv::Canny(cvblurred, out_mat_ocv, 32, 128, 3);
+
+    cv::GMat in;
+    cv::GMat vga = cv::gapi::resize(in, cv::Size(), fx, fy, INTER_LINEAR);
+    cv::GMat gray = cv::gapi::BGR2Gray(vga);
+    cv::GMat blurred = cv::gapi::blur(gray, cv::Size(3, 3));
+    cv::GMat out = cv::gapi::Canny(blurred, 32, 128, 3);
+    cv::GComputation ac(in, out);
+
+    auto cc = ac.compile(descr_of(gin(in_mat1)),
+        std::move(compile_args));
+    cc(gin(in_mat1), gout(out_mat_gapi));
+
+    TEST_CYCLE()
+    {
+        cc(gin(in_mat1), gout(out_mat_gapi));
+    }
+
+    // Comparison ////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+//------------------------------------------------------------------------------
+
 }
 #endif //OPENCV_GAPI_IMGPROC_PERF_TESTS_INL_HPP
diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
index 5323ea8f08..2f91e07e52 100644
--- a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
+++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
@@ -369,38 +369,6 @@ INSTANTIATE_TEST_CASE_P(TransposePerfTestCPU, TransposePerfTest,
                     CV_8UC3, CV_16UC3, CV_16SC3, CV_32FC3),
             Values(cv::compile_args(CORE_CPU))));
 
-INSTANTIATE_TEST_CASE_P(ResizePerfTestCPU, ResizePerfTest,
-    Combine(Values(AbsExact().to_compare_f()),
-            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
-            Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
-            Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values( cv::Size(64, 64),
-                    cv::Size(32, 32)),
-            Values(cv::compile_args(CORE_CPU))));
-
-INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestCPU, BottleneckKernelsConstInputPerfTest,
-    Combine(Values(AbsExact().to_compare_f()),
-            Values( "cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png",
-                    "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"),
-            Values(cv::compile_args(CORE_CPU))));
-
-INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestCPU, ResizeInSimpleGraphPerfTest,
-    Combine(Values(AbsExact().to_compare_f()),
-            Values(CV_8UC3),
-            Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(0.5),
-            Values(0.5),
-            Values(cv::compile_args(CORE_CPU))));
-
-INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestCPU, ResizeFxFyPerfTest,
-    Combine(Values(AbsExact().to_compare_f()),
-            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
-            Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
-            Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(0.5, 0.1),
-            Values(0.5, 0.1),
-            Values(cv::compile_args(CORE_CPU))));
-
 INSTANTIATE_TEST_CASE_P(ParseSSDBLPerfTestCPU, ParseSSDBLPerfTest,
     Combine(Values(sz720p, sz1080p),
             Values(0.3f, 0.7f),
diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
index e25029b835..796d05101e 100644
--- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
+++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp
@@ -49,8 +49,8 @@ INSTANTIATE_TEST_CASE_P(SubPerfTestFluid, SubPerfTest,
 INSTANTIATE_TEST_CASE_P(SubCPerfTestFluid, SubCPerfTest,
     Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
             Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1),
-            Values(-1, CV_8U, CV_32F),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
             Values(cv::compile_args(CORE_FLUID))));
 
 INSTANTIATE_TEST_CASE_P(SubRCPerfTestFluid, SubRCPerfTest,
@@ -78,8 +78,8 @@ INSTANTIATE_TEST_CASE_P(MulDoublePerfTestFluid, MulDoublePerfTest,
 INSTANTIATE_TEST_CASE_P(MulCPerfTestFluid, MulCPerfTest,
     Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
             Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1),
-            Values(-1, CV_8U, CV_32F),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
             Values(cv::compile_args(CORE_FLUID))));
 
 INSTANTIATE_TEST_CASE_P(DivPerfTestFluid, DivPerfTest,
@@ -93,8 +93,8 @@ INSTANTIATE_TEST_CASE_P(DivPerfTestFluid, DivPerfTest,
 INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest,
     Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
             Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1),
-            Values(-1, CV_8U, CV_32F),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
             Values(1.0),
             Values(cv::compile_args(CORE_FLUID))));
 
@@ -328,36 +328,4 @@ INSTANTIATE_TEST_CASE_P(ConvertToPerfTestFluid, ConvertToPerfTest,
             Values(0.0),
             Values(cv::compile_args(CORE_FLUID))));
 
-INSTANTIATE_TEST_CASE_P(ResizePerfTestFluid, ResizePerfTest,
-    Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
-            Values(CV_8UC3),
-            Values(cv::INTER_LINEAR),
-            Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(cv::Size(64, 64),
-                   cv::Size(30, 30)),
-            Values(cv::compile_args(CORE_FLUID))));
-
-#define IMGPROC_FLUID cv::gapi::imgproc::fluid::kernels()
-INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestFluid, BottleneckKernelsConstInputPerfTest,
-    Combine(Values(AbsSimilarPoints(0, 1).to_compare_f()),
-            Values("cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png",
-                   "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"),
-            Values(cv::compile_args(CORE_FLUID, IMGPROC_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPerfTest,
-    Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
-            Values(CV_8UC3),
-            Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(0.5),
-            Values(0.5),
-            Values(cv::compile_args(CORE_FLUID, IMGPROC_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestFluid, ResizeFxFyPerfTest,
-    Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
-            Values(CV_8UC3),
-            Values(cv::INTER_LINEAR),
-            Values(szSmall128, szVGA, sz720p, sz1080p),
-            Values(0.5, 0.1),
-            Values(0.5, 0.1),
-            Values(cv::compile_args(CORE_FLUID))));
 } // opencv_test
diff --git a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp
index d4c37c10af..dc4c65bf74 100644
--- a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp
+++ b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp
@@ -403,4 +403,36 @@ INSTANTIATE_TEST_CASE_P(RGB2YUV422PerfTestCPU, RGB2YUV422PerfTest,
         Combine(Values(ToleranceColor(1e-3).to_compare_f()),
             Values(szVGA, sz720p, sz1080p),
             Values(cv::compile_args(IMGPROC_CPU))));
+
+INSTANTIATE_TEST_CASE_P(ResizePerfTestCPU, ResizePerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
+            Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+            Values(szSmall128, szVGA, sz720p, sz1080p),
+            Values( cv::Size(64, 64),
+                    cv::Size(32, 32)),
+            Values(cv::compile_args(IMGPROC_CPU))));
+
+INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestCPU, BottleneckKernelsConstInputPerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values( "cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png",
+                    "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"),
+            Values(cv::compile_args(IMGPROC_CPU))));
+
+INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestCPU, ResizeInSimpleGraphPerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC3),
+            Values(szSmall128, szVGA, sz720p, sz1080p),
+            Values(0.5),
+            Values(0.5),
+            Values(cv::compile_args(IMGPROC_CPU))));
+
+INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestCPU, ResizeFxFyPerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
+            Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+            Values(szSmall128, szVGA, sz720p, sz1080p),
+            Values(0.5, 0.1),
+            Values(0.5, 0.1),
+            Values(cv::compile_args(IMGPROC_CPU))));
 } // opencv_test
diff --git a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp
index 1ccd763099..b5e72ae4ce 100644
--- a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp
+++ b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp
@@ -9,6 +9,7 @@
 #include "../common/gapi_imgproc_perf_tests.hpp"
 
 #define IMGPROC_FLUID cv::gapi::imgproc::fluid::kernels()
+#define CORE_FLUID cv::gapi::core::fluid::kernels()
 
 namespace opencv_test
 {
@@ -198,4 +199,36 @@ INSTANTIATE_TEST_CASE_P(RGB2LabPerfTestFluid, RGB2LabPerfTest,
             Values(szVGA, sz720p, sz1080p),
             Values(cv::compile_args(IMGPROC_FLUID))));
 
+INSTANTIATE_TEST_CASE_P(ResizePerfTestFluid, ResizePerfTest,
+    Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
+            Values(CV_8UC3),
+            Values(cv::INTER_LINEAR),
+            Values(szSmall128, szVGA, sz720p, sz1080p),
+            Values(cv::Size(64, 64),
+                   cv::Size(30, 30)),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+#define IMGPROC_FLUID cv::gapi::imgproc::fluid::kernels()
+INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestFluid, BottleneckKernelsConstInputPerfTest,
+    Combine(Values(AbsSimilarPoints(0, 1).to_compare_f()),
+            Values("cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png",
+                   "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPerfTest,
+    Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
+            Values(CV_8UC3),
+            Values(szSmall128, szVGA, sz720p, sz1080p),
+            Values(0.5),
+            Values(0.5),
+            Values(cv::compile_args(cv::gapi::combine(IMGPROC_FLUID, CORE_FLUID)))));
+
+INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestFluid, ResizeFxFyPerfTest,
+    Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
+            Values(CV_8UC3),
+            Values(cv::INTER_LINEAR),
+            Values(szSmall128, szVGA, sz720p, sz1080p),
+            Values(0.5, 0.25, 2),
+            Values(0.5, 0.25, 2),
+            Values(cv::compile_args(IMGPROC_FLUID))));
 }
diff --git a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
index 6aaec4d79a..8aaa304e58 100644
--- a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
+++ b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
@@ -79,7 +79,7 @@ INSTANTIATE_TEST_CASE_P(DivPerfTestGPU, DivPerfTest,
             Values(cv::compile_args(CORE_GPU))));
 
 INSTANTIATE_TEST_CASE_P(DivCPerfTestGPU, DivCPerfTest,
-    Combine(Values(AbsExact().to_compare_f()),
+    Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
             Values( szSmall128, szVGA, sz720p, sz1080p ),
             Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
             Values( -1, CV_8U, CV_16U, CV_32F ),
@@ -320,25 +320,7 @@ INSTANTIATE_TEST_CASE_P(TransposePerfTestGPU, TransposePerfTest,
     Combine(Values(AbsExact().to_compare_f()),
             Values(szSmall128, szVGA, sz720p, sz1080p),
             Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1,
-                    CV_8UC2, CV_16UC2, CV_16SC2, CV_32FC2,
-                    CV_8UC3, CV_16UC3, CV_16SC3, CV_32FC3),
-            Values(cv::compile_args(CORE_GPU))));
-
-INSTANTIATE_TEST_CASE_P(ResizePerfTestGPU, ResizePerfTest,
-    Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()),
-            Values(CV_8UC1, CV_16UC1, CV_16SC1),
-            Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
-            Values( szSmall128, szVGA, sz720p, sz1080p ),
-            Values(cv::Size(64,64),
-                    cv::Size(30,30)),
-            Values(cv::compile_args(CORE_GPU))));
-
-INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestGPU, ResizeFxFyPerfTest,
-    Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()),
-            Values(CV_8UC1, CV_16UC1, CV_16SC1),
-            Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
-            Values( szSmall128, szVGA, sz720p, sz1080p ),
-            Values(0.5, 0.1),
-            Values(0.5, 0.1),
+                   CV_8UC2, CV_16UC2, CV_16SC2, CV_32FC2,
+                   CV_8UC3, CV_16UC3, CV_16SC3, CV_32FC3),
             Values(cv::compile_args(CORE_GPU))));
 } // opencv_test
diff --git a/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp b/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp
index 1f4f3883d1..faacf4f254 100644
--- a/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp
+++ b/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp
@@ -204,4 +204,21 @@ INSTANTIATE_TEST_CASE_P(YUV2BGRPerfTestGPU, YUV2BGRPerfTest,
                         Values(szVGA, sz720p, sz1080p),
                         Values(cv::compile_args(IMGPROC_GPU))));
 
+INSTANTIATE_TEST_CASE_P(ResizePerfTestGPU, ResizePerfTest,
+                        Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()),
+                        Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+                        Values( szSmall128, szVGA, sz720p, sz1080p ),
+                        Values(cv::Size(64,64),
+                               cv::Size(30,30)),
+                        Values(cv::compile_args(IMGPROC_GPU))));
+
+INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestGPU, ResizeFxFyPerfTest,
+                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
+                        Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+                        Values(szSmall128, szVGA, sz720p, sz1080p),
+                        Values(0.5, 0.1),
+                        Values(0.5, 0.1),
+                        Values(cv::compile_args(IMGPROC_GPU))));
 }
diff --git a/modules/gapi/samples/data/config_template.yml b/modules/gapi/samples/data/config_template.yml
new file mode 100644
index 0000000000..75012d959d
--- /dev/null
+++ b/modules/gapi/samples/data/config_template.yml
@@ -0,0 +1,192 @@
+%YAML:1.0
+
+# Application running time in milliseconds: integer.
+work_time: 2000
+
+Pipelines:
+    PL1:
+      source:
+        name: 'Src'
+        latency: 33.0
+        output:
+          dims: [1, 3, 1280, 720]
+          precision: 'U8'
+
+      nodes:
+        - name: 'PP'
+          type: 'Dummy'
+          time: 0.2
+          output:
+            dims: [1, 3, 300, 300]
+            precision: 'U8'
+
+        - name: 'Infer'
+          type: 'Infer'
+          xml: 'face-detection-retail-0004.xml'
+          bin: 'face-detection-retail-0004.bin'
+          device: 'CPU'
+          input_layers:
+            - 'data'
+          output_layers:
+            - 'detection_out'
+
+      edges:
+        - from: 'Src'
+          to: 'PP'
+        - from: 'PP'
+          to: 'Infer'
+
+      # Path to the dump file (*.dot)'
+      dump: 'pl1.dot'
+
+    PL2:
+      source:
+        name: 'Src'
+        latency: 50.0
+        output:
+          dims: [1, 3, 1280, 720]
+          precision: 'U8'
+
+      nodes:
+        - name: 'M1_PP'
+          type: 'Dummy'
+          time: 0.2
+          output:
+            dims: [1, 3, 300, 300]
+            precision: 'U8'
+
+        - name: 'M1_Infer'
+          type: 'Infer'
+          xml: 'face-detection-retail-0004.xml'
+          bin: 'face-detection-retail-0004.bin'
+          device: 'CPU'
+          input_layers:
+            - 'data'
+          output_layers:
+            - 'detection_out'
+
+        - name: 'M2_PP'
+          type: 'Dummy'
+          time: 0.2
+          output:
+            dims: [1, 3, 300, 300]
+            precision: 'U8'
+
+        - name: 'M2_Infer'
+          type: 'Infer'
+          xml: 'face-detection-retail-0004.xml'
+          bin: 'face-detection-retail-0004.bin'
+          device: 'CPU'
+          input_layers:
+            - 'data'
+          output_layers:
+            - 'detection_out'
+
+        - name: 'M3_PP'
+          type: 'Dummy'
+          time: 0.2
+          output:
+            dims: [1, 3, 300, 300]
+            precision: 'U8'
+
+        - name: 'M3_Infer'
+          type: 'Infer'
+          xml: 'face-detection-retail-0004.xml'
+          bin: 'face-detection-retail-0004.bin'
+          device: 'CPU'
+          input_layers:
+            - 'data'
+          output_layers:
+            - 'detection_out'
+
+        - name: 'M4_PP'
+          type: 'Dummy'
+          time: 0.2
+          output:
+            dims: [1, 3, 300, 300]
+            precision: 'U8'
+
+        - name: 'M4_Infer'
+          type: 'Infer'
+          xml: 'face-detection-retail-0004.xml'
+          bin: 'face-detection-retail-0004.bin'
+          device: 'CPU'
+          input_layers:
+            - 'data'
+          output_layers:
+            - 'detection_out'
+
+        - name: 'M5_PP'
+          type: 'Dummy'
+          time: 0.2
+          output:
+            dims: [1, 3, 300, 300]
+            precision: 'U8'
+
+        - name: 'M5_Infer'
+          type: 'Infer'
+          xml: 'face-detection-retail-0004.xml'
+          bin: 'face-detection-retail-0004.bin'
+          device: 'CPU'
+          input_layers:
+            - 'data'
+          output_layers:
+            - 'detection_out'
+
+      edges:
+        - from: 'Src'
+          to: 'M1_PP'
+        - from: 'M1_PP'
+          to: 'M1_Infer'
+        - from: 'M1_Infer'
+          to: 'M2_PP'
+        - from: 'M2_PP'
+          to: 'M2_Infer'
+        - from: 'M2_Infer'
+          to: 'M3_PP'
+        - from: 'M3_PP'
+          to: 'M3_Infer'
+        - from: 'M3_Infer'
+          to: 'M4_PP'
+        - from: 'M4_PP'
+          to: 'M4_Infer'
+        - from: 'M4_Infer'
+          to: 'M5_PP'
+        - from: 'M5_PP'
+          to: 'M5_Infer'
+
+      dump: 'pl2.dot'
+
+    PL3:
+      source:
+        name: 'Src'
+        latency: 33.0
+        output:
+          dims: [1, 3, 1280, 720]
+          precision: 'U8'
+
+      nodes:
+        - name: 'PP'
+          type: 'Dummy'
+          time: 0.2
+          output:
+            dims: [1, 3, 300, 300]
+            precision: 'U8'
+
+        - name: 'Infer'
+          type: 'Infer'
+          xml: 'face-detection-retail-0004.xml'
+          bin: 'face-detection-retail-0004.bin'
+          device: 'CPU'
+          input_layers:
+            - 'data'
+          output_layers:
+            - 'detection_out'
+
+      edges:
+        - from: 'Src'
+          to: 'PP'
+        - from: 'PP'
+          to: 'Infer'
+
+      dump: 'pl3.dot'
diff --git a/modules/gapi/samples/oak_rgb_camera_encoding.cpp b/modules/gapi/samples/oak_rgb_camera_encoding.cpp
new file mode 100644
index 0000000000..ac6b5cc5f0
--- /dev/null
+++ b/modules/gapi/samples/oak_rgb_camera_encoding.cpp
@@ -0,0 +1,70 @@
+#include <fstream>
+
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/gframe.hpp>
+
+#include <opencv2/gapi/oak/oak.hpp>
+#include <opencv2/gapi/streaming/format.hpp> // BGR accessor
+
+#include <opencv2/highgui.hpp> // CommandLineParser
+
+const std::string keys =
+    "{ h help  |              | Print this help message }"
+    "{ output  | output.h265  | Path to the output .h265 video file }";
+
+#ifdef HAVE_OAK
+
+int main(int argc, char *argv[]) {
+    cv::CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help")) {
+        cmd.printMessage();
+        return 0;
+    }
+
+    const std::string output_name = cmd.get<std::string>("output");
+
+    cv::gapi::oak::EncoderConfig cfg;
+    cfg.profile = cv::gapi::oak::EncoderConfig::Profile::H265_MAIN;
+
+    cv::GFrame in;
+    cv::GArray<uint8_t> encoded = cv::gapi::oak::encode(in, cfg);
+
+    auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{}, cv::gapi::oak::kernels());
+
+    auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(encoded)).compileStreaming(std::move(args));
+
+    // Graph execution /////////////////////////////////////////////////////////
+    pipeline.setSource(cv::gapi::wip::make_src<cv::gapi::oak::ColorCamera>());
+    pipeline.start();
+
+    std::vector<uint8_t> out_h265_data;
+
+    std::ofstream out_h265_file;
+    out_h265_file.open(output_name, std::ofstream::out | std::ofstream::binary | std::ofstream::trunc);
+
+    // Pull 300 frames from the camera
+    uint32_t frames = 300;
+    uint32_t pulled = 0;
+
+    while (pipeline.pull(cv::gout(out_h265_data))) {
+        if (out_h265_file.is_open()) {
+            out_h265_file.write(reinterpret_cast<const char*>(out_h265_data.data()),
+                                                              out_h265_data.size());
+        }
+        if (pulled++ == frames) {
+            pipeline.stop();
+            break;
+        }
+    }
+
+    std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl;
+}
+#else // HAVE_OAK
+
+int main() {
+    GAPI_Assert(false && "Built without OAK support");
+    return -1;
+}
+
+#endif // HAVE_OAK
diff --git a/modules/gapi/samples/oak_small_hetero_pipeline.cpp b/modules/gapi/samples/oak_small_hetero_pipeline.cpp
new file mode 100644
index 0000000000..dadb9d0f3b
--- /dev/null
+++ b/modules/gapi/samples/oak_small_hetero_pipeline.cpp
@@ -0,0 +1,69 @@
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/cpu/core.hpp>
+#include <opencv2/gapi/gframe.hpp>
+#include <opencv2/gapi/media.hpp>
+
+#include <opencv2/gapi/oak/oak.hpp>
+#include <opencv2/gapi/streaming/format.hpp> // BGR accessor
+
+#include <opencv2/highgui.hpp> // CommandLineParser
+
+const std::string keys =
+    "{ h help  |              | Print this help message }"
+    "{ output  | output.png   | Path to the output file }";
+
+#ifdef HAVE_OAK
+
+int main(int argc, char *argv[]) {
+    cv::CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help")) {
+        cmd.printMessage();
+        return 0;
+    }
+
+    const std::string output_name = cmd.get<std::string>("output");
+
+    std::vector<int> h = {1, 0, -1,
+                          2, 0, -2,
+                          1, 0, -1};
+    std::vector<int> v = { 1,  2,  1,
+                           0,  0,  0,
+                          -1, -2, -1};
+    cv::Mat hk(3, 3, CV_32SC1, h.data());
+    cv::Mat vk(3, 3, CV_32SC1, v.data());
+
+    // Heterogeneous pipeline:
+    // OAK camera -> Sobel -> streaming accessor (CPU)
+    cv::GFrame in;
+    cv::GFrame sobel = cv::gapi::oak::sobelXY(in, hk, vk);
+    // Default camera and then sobel work only with nv12 format
+    cv::GMat out = cv::gapi::streaming::Y(sobel);
+
+    auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{},
+                                 cv::gapi::oak::kernels());
+
+    auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)).compileStreaming(std::move(args));
+
+    // Graph execution /////////////////////////////////////////////////////////
+    cv::Mat out_mat(1920, 1080, CV_8UC1);
+
+    pipeline.setSource(cv::gapi::wip::make_src<cv::gapi::oak::ColorCamera>());
+    pipeline.start();
+
+    // pull 1 frame
+    pipeline.pull(cv::gout(out_mat));
+
+    cv::imwrite(output_name, out_mat);
+
+    std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl;
+}
+
+#else // HAVE_OAK
+
+int main() {
+    GAPI_Assert(false && "Built without OAK support");
+    return -1;
+}
+
+#endif // HAVE_OAK
diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp
index 06950bcabe..80327e2d59 100644
--- a/modules/gapi/samples/onevpl_infer_single_roi.cpp
+++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp
@@ -45,10 +45,15 @@ const std::string keys =
     "{ faced                        | AUTO                                      | Target device for face detection model (e.g. AUTO, GPU, VPU, ...) }"
     "{ cfg_params                   | <prop name>:<value>;<prop name>:<value>   | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }"
     "{ streaming_queue_capacity     | 1                                         | Streaming executor queue capacity. Calculated automaticaly if 0 }"
-    "{ frames_pool_size             | 0                                         | OneVPL source applies this parameter as preallocated frames pool size}";
-
+    "{ frames_pool_size             | 0                                         | OneVPL source applies this parameter as preallocated frames pool size}"
+    "{ vpp_frames_pool_size         | 0                                         | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}"
+    "{ source_preproc_enable        | 0                                         | Turn on OneVPL source frame preprocessing using network input description instead of IE plugin preprocessing}";
 
 namespace {
+bool is_gpu(const std::string &device_name) {
+    return device_name.find("GPU") != std::string::npos;
+}
+
 std::string get_weights_path(const std::string &model_path) {
     const auto EXT_LEN = 4u;
     const auto sz = model_path.size();
@@ -123,8 +128,9 @@ using GRect       = cv::GOpaque<cv::Rect>;
 using GSize       = cv::GOpaque<cv::Size>;
 using GPrims      = cv::GArray<cv::gapi::wip::draw::Prim>;
 
-G_API_OP(LocateROI, <GRect(GSize)>, "sample.custom.locate-roi") {
-    static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &) {
+G_API_OP(LocateROI, <GRect(GSize, std::reference_wrapper<const std::string>)>, "sample.custom.locate-roi") {
+    static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &,
+                                   std::reference_wrapper<const std::string>) {
         return cv::empty_gopaque_desc();
     }
 };
@@ -145,18 +151,30 @@ GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) {
     // but only crops the input image to square (this is
     // the most convenient aspect ratio for detectors to use)
 
-    static void run(const cv::Size& in_size, cv::Rect &out_rect) {
+    static void run(const cv::Size& in_size,
+                    std::reference_wrapper<const std::string> device_id_ref,
+                    cv::Rect &out_rect) {
 
         // Identify the central point & square size (- some padding)
-        const auto center = cv::Point{in_size.width/2, in_size.height/2};
-        auto sqside = std::min(in_size.width, in_size.height);
+        // NB: GPU plugin in InferenceEngine doesn't support ROI at now
+        if (!is_gpu(device_id_ref.get())) {
+            const auto center = cv::Point{in_size.width/2, in_size.height/2};
+            auto sqside = std::min(in_size.width, in_size.height);
 
-        // Now build the central square ROI
-        out_rect = cv::Rect{ center.x - sqside/2
-                           , center.y - sqside/2
-                           , sqside
-                           , sqside
-                           };
+            // Now build the central square ROI
+            out_rect = cv::Rect{ center.x - sqside/2
+                                , center.y - sqside/2
+                                , sqside
+                                , sqside
+                                };
+        } else {
+            // use whole frame for GPU device
+            out_rect = cv::Rect{ 0
+                                , 0
+                                , in_size.width
+                                , in_size.height
+                                };
+        }
     }
 };
 
@@ -193,11 +211,14 @@ int main(int argc, char *argv[]) {
     }
 
     // get file name
-    std::string file_path = cmd.get<std::string>("input");
-    const std::string output = cmd.get<std::string>("output");
+    const auto file_path = cmd.get<std::string>("input");
+    const auto output = cmd.get<std::string>("output");
     const auto face_model_path = cmd.get<std::string>("facem");
     const auto streaming_queue_capacity = cmd.get<uint32_t>("streaming_queue_capacity");
-    const auto source_queue_capacity = cmd.get<uint32_t>("frames_pool_size");
+    const auto source_decode_queue_capacity = cmd.get<uint32_t>("frames_pool_size");
+    const auto source_vpp_queue_capacity = cmd.get<uint32_t>("vpp_frames_pool_size");
+    const auto vpl_source_preproc_enable = cmd.get<uint32_t>("source_preproc_enable");
+    const auto device_id = cmd.get<std::string>("faced");
 
     // check ouput file extension
     if (!output.empty()) {
@@ -214,6 +235,12 @@ int main(int argc, char *argv[]) {
     try {
         std::string line;
         while (std::getline(params_list, line, ';')) {
+            if (vpl_source_preproc_enable == 0) {
+                if (line.find("vpp.") != std::string::npos) {
+                    // skip VPP preprocessing primitives if not requested
+                    continue;
+                }
+            }
             source_cfgs.push_back(cfg::create_from_string(line));
         }
     } catch (const std::exception& ex) {
@@ -221,11 +248,13 @@ int main(int argc, char *argv[]) {
         return -1;
     }
 
-    if (source_queue_capacity != 0) {
-        source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_frames_pool_size(source_queue_capacity));
+    if (source_decode_queue_capacity != 0) {
+        source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_frames_pool_size(source_decode_queue_capacity));
+    }
+    if (source_vpp_queue_capacity != 0) {
+        source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_vpp_frames_pool_size(source_vpp_queue_capacity));
     }
 
-    const std::string& device_id = cmd.get<std::string>("faced");
     auto face_net = cv::gapi::ie::Params<custom::FaceDetector> {
         face_model_path,                 // path to topology IR
         get_weights_path(face_model_path),   // path to weights
@@ -247,7 +276,7 @@ int main(int argc, char *argv[]) {
     auto dx11_dev = createCOMPtrGuard<ID3D11Device>();
     auto dx11_ctx = createCOMPtrGuard<ID3D11DeviceContext>();
 
-    if (device_id.find("GPU") != std::string::npos) {
+    if (is_gpu(device_id)) {
         auto adapter_factory = createCOMPtrGuard<IDXGIFactory>();
         {
             IDXGIFactory* out_factory = nullptr;
@@ -294,11 +323,25 @@ int main(int argc, char *argv[]) {
 #endif // HAVE_D3D11
 #endif // HAVE_DIRECTX
     // set ctx_config for GPU device only - no need in case of CPU device type
-    if (device_id.find("GPU") != std::string::npos) {
+    if (is_gpu(device_id)) {
         InferenceEngine::ParamMap ctx_config({{"CONTEXT_TYPE", "VA_SHARED"},
                                             {"VA_DEVICE", accel_device_ptr} });
 
         face_net.cfgContextParams(ctx_config);
+        face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }});
+
+        std::cout <<"/*******************************************************/\n"
+                    "ATTENTION: GPU Inference Engine preprocessing is not vital as expected!"
+                     " Please consider param \"source_preproc_enable=1\" and specify "
+                     " appropriated media frame transformation using oneVPL::VPP primitives"
+                     " which force onevpl::GSource to produce tranformed media frames."
+                     " For exploring list of supported transformations please find out "
+                     " vpp_* related stuff in"
+                     " gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp"
+                     " Pay attention that to obtain expected result In this case VPP "
+                     " transformation must match network input params."
+                     " Please vote/create issue about exporting network params using GAPI\n"
+                     "/******************************************************/" << std::endl;
     }
 #endif // HAVE_INF_ENGINE
 
@@ -314,7 +357,7 @@ int main(int argc, char *argv[]) {
     // Create source
     cv::Ptr<cv::gapi::wip::IStreamSource> cap;
     try {
-        if (device_id.find("GPU") != std::string::npos) {
+        if (is_gpu(device_id)) {
             cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs,
                                                  device_id,
                                                  accel_device_ptr,
@@ -334,7 +377,7 @@ int main(int argc, char *argv[]) {
     // Now build the graph
     cv::GFrame in;
     auto size = cv::gapi::streaming::size(in);
-    auto roi = custom::LocateROI::on(size);
+    auto roi = custom::LocateROI::on(size, std::cref(device_id));
     auto blob = cv::gapi::infer<custom::FaceDetector>(roi, in);
     cv::GArray<cv::Rect> rcs = cv::gapi::parseSSD(blob, size, 0.5f, true, true);
     auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, roi));
@@ -397,6 +440,8 @@ typename cv::gapi::wip::onevpl::CfgParam create_from_string(const std::string &l
     std::string name = line.substr(0, name_endline_pos);
     std::string value = line.substr(name_endline_pos + 1);
 
-    return cv::gapi::wip::onevpl::CfgParam::create(name, value);
+    return cv::gapi::wip::onevpl::CfgParam::create(name, value,
+                                                   /* vpp params strongly optional */
+                                                   name.find("vpp.") == std::string::npos);
 }
 }
diff --git a/modules/gapi/samples/pipeline_modeling_tool.cpp b/modules/gapi/samples/pipeline_modeling_tool.cpp
new file mode 100644
index 0000000000..ca6187e1ca
--- /dev/null
+++ b/modules/gapi/samples/pipeline_modeling_tool.cpp
@@ -0,0 +1,421 @@
+#include <iostream>
+#include <fstream>
+#include <thread>
+#include <exception>
+#include <unordered_map>
+#include <vector>
+
+#include <opencv2/gapi.hpp>
+#include <opencv2/highgui.hpp> // cv::CommandLineParser
+#include <opencv2/core/utils/filesystem.hpp>
+
+#if defined(_WIN32)
+#include <windows.h>
+#endif
+
+#include "pipeline_modeling_tool/dummy_source.hpp"
+#include "pipeline_modeling_tool/utils.hpp"
+#include "pipeline_modeling_tool/pipeline_builder.hpp"
+
+enum class AppMode {
+    REALTIME,
+    BENCHMARK
+};
+
+static AppMode strToAppMode(const std::string& mode_str) {
+    if (mode_str == "realtime") {
+        return AppMode::REALTIME;
+    } else if (mode_str == "benchmark") {
+        return AppMode::BENCHMARK;
+    } else {
+        throw std::logic_error("Unsupported AppMode: " + mode_str +
+                "\nPlease chose between: realtime and benchmark");
+    }
+}
+
+template <typename T>
+T read(const cv::FileNode& node) {
+    return static_cast<T>(node);
+}
+
+static cv::FileNode check_and_get_fn(const cv::FileNode& fn,
+                                     const std::string&  field,
+                                     const std::string&  uplvl) {
+    const bool is_map = fn.isMap();
+    if (!is_map || fn[field].empty()) {
+        throw std::logic_error(uplvl + " must contain field: " + field);
+    }
+    return fn[field];
+}
+
+static cv::FileNode check_and_get_fn(const cv::FileStorage& fs,
+                                     const std::string&     field,
+                                     const std::string&     uplvl) {
+    auto fn = fs[field];
+    if (fn.empty()) {
+        throw std::logic_error(uplvl + " must contain field: " + field);
+    }
+    return fn;
+}
+
+template <typename T, typename FileT>
+T check_and_read(const FileT& f,
+                 const std::string& field,
+                 const std::string& uplvl) {
+    auto fn = check_and_get_fn(f, field, uplvl);
+    return read<T>(fn);
+}
+
+template <typename T>
+cv::optional<T> readOpt(const cv::FileNode& fn) {
+    return fn.empty() ? cv::optional<T>() : cv::optional<T>(read<T>(fn));
+}
+
+template <typename T>
+std::vector<T> readList(const cv::FileNode& fn,
+                        const std::string& field,
+                        const std::string& uplvl) {
+    auto fn_field = check_and_get_fn(fn, field, uplvl);
+    if (!fn_field.isSeq()) {
+        throw std::logic_error(field + " in " + uplvl + " must be a sequence");
+    }
+
+    std::vector<T> vec;
+    for (auto iter : fn_field) {
+        vec.push_back(read<T>(iter));
+    }
+    return vec;
+}
+
+template <typename T>
+std::vector<T> readVec(const cv::FileNode& fn,
+                       const std::string& field,
+                       const std::string& uplvl) {
+    auto fn_field = check_and_get_fn(fn, field, uplvl);
+
+    std::vector<T> vec;
+    fn_field >> vec;
+    return vec;
+}
+
+static int strToPrecision(const std::string& precision) {
+    static std::unordered_map<std::string, int> str_to_precision = {
+        {"U8", CV_8U}, {"FP32", CV_32F}, {"FP16", CV_16F}
+    };
+    auto it = str_to_precision.find(precision);
+    if (it == str_to_precision.end()) {
+        throw std::logic_error("Unsupported precision: " + precision);
+    }
+    return it->second;
+}
+
+template <>
+OutputDescr read<OutputDescr>(const cv::FileNode& fn) {
+    auto dims      = readVec<int>(fn, "dims", "output");
+    auto str_prec = check_and_read<std::string>(fn, "precision", "output");
+    return OutputDescr{dims, strToPrecision(str_prec)};
+}
+
+template <>
+Edge read<Edge>(const cv::FileNode& fn) {
+    auto from = check_and_read<std::string>(fn, "from", "edge");
+    auto to   = check_and_read<std::string>(fn, "to", "edge");
+
+    auto splitNameAndPort = [](const std::string& str) {
+        auto pos = str.find(':');
+        auto name =
+            pos == std::string::npos ? str : std::string(str.c_str(), pos);
+        size_t port =
+            pos == std::string::npos ? 0 : std::atoi(str.c_str() + pos + 1);
+        return std::make_pair(name, port);
+    };
+
+    auto p1 = splitNameAndPort(from);
+    auto p2 = splitNameAndPort(to);
+    return Edge{Edge::P{p1.first, p1.second}, Edge::P{p2.first, p2.second}};
+}
+
+static std::string getModelsPath() {
+    static char* models_path_c = std::getenv("PIPELINE_MODELS_PATH");
+    static std::string models_path = models_path_c ? models_path_c : ".";
+    return models_path;
+}
+
+template <>
+ModelPath read<ModelPath>(const cv::FileNode& fn) {
+    using cv::utils::fs::join;
+    if (!fn["xml"].empty() && !fn["bin"].empty()) {
+        return ModelPath{LoadPath{join(getModelsPath(), fn["xml"].string()),
+                                  join(getModelsPath(), fn["bin"].string())}};
+    } else if (!fn["blob"].empty()){
+        return ModelPath{ImportPath{join(getModelsPath(), fn["blob"].string())}};
+    } else {
+        const std::string emsg = R""""(
+        Path to OpenVINO model must be specified in either of two formats:
+1.
+  xml: path to *.xml
+  bin: path to *.bin
+2.
+  blob: path to *.blob
+        )"""";
+        throw std::logic_error(emsg);
+    }
+}
+
+static PLMode strToPLMode(const std::string& mode_str) {
+    if (mode_str == "streaming") {
+        return PLMode::STREAMING;
+    } else if (mode_str == "regular") {
+        return PLMode::REGULAR;
+    } else {
+        throw std::logic_error("Unsupported PLMode: " + mode_str +
+                "\nPlease chose between: streaming and regular");
+    }
+}
+
+static std::vector<std::string> parseExecList(const std::string& exec_list) {
+    std::vector<std::string> pl_types;
+    std::stringstream ss(exec_list);
+    std::string pl_type;
+    while (getline(ss, pl_type, ',')) {
+        pl_types.push_back(pl_type);
+    }
+    return pl_types;
+}
+
+static void loadConfig(const std::string&                        filename,
+                             std::map<std::string, std::string>& config) {
+    cv::FileStorage fs(filename, cv::FileStorage::READ);
+    if (!fs.isOpened()) {
+        throw std::runtime_error("Failed to load config: " + filename);
+    }
+
+    cv::FileNode root = fs.root();
+    for (auto it = root.begin(); it != root.end(); ++it) {
+        auto device = *it;
+        if (!device.isMap()) {
+            throw std::runtime_error("Failed to parse config: " + filename);
+        }
+        for (auto item : device) {
+            config.emplace(item.name(), item.string());
+        }
+    }
+}
+
+int main(int argc, char* argv[]) {
+#if defined(_WIN32)
+    timeBeginPeriod(1);
+#endif
+    try {
+        const std::string keys =
+        "{ h help      |           | Print this help message. }"
+        "{ cfg         |           | Path to the config which is either"
+                                   " YAML file or string. }"
+        "{ load_config |           | Optional. Path to XML/YAML/JSON file"
+                                   " to load custom IE parameters. }"
+        "{ cache_dir   |           | Optional. Enables caching of loaded models"
+                                   " to specified directory. }"
+        "{ log_file    |           | Optional. If file is specified, app will"
+                                   " dump expanded execution information. }"
+        "{ pl_mode     | streaming | Optional. Pipeline mode: streaming/regular"
+                                   " if it's specified will be applied for"
+                                   " every pipeline. }"
+        "{ qc          | 1         | Optional. Calculated automatically by G-API"
+                                   " if set to 0. If it's specified will be"
+                                   " applied for every pipeline. }"
+        "{ app_mode    | realtime  | Application mode (realtime/benchmark). }"
+        "{ exec_list   |           | A comma-separated list of pipelines that"
+                                   " will be executed. Spaces around commas"
+                                   " are prohibited. }";
+
+        cv::CommandLineParser cmd(argc, argv, keys);
+        if (cmd.has("help")) {
+            cmd.printMessage();
+            return 0;
+        }
+
+        const auto cfg         = cmd.get<std::string>("cfg");
+        const auto load_config = cmd.get<std::string>("load_config");
+        const auto cached_dir  = cmd.get<std::string>("cache_dir");
+        const auto log_file    = cmd.get<std::string>("log_file");
+        const auto pl_mode     = strToPLMode(cmd.get<std::string>("pl_mode"));
+        const auto qc          = cmd.get<int>("qc");
+        const auto app_mode    = strToAppMode(cmd.get<std::string>("app_mode"));
+        const auto exec_str    = cmd.get<std::string>("exec_list");
+
+        cv::FileStorage fs;
+        if (cfg.empty()) {
+            throw std::logic_error("Config must be specified via --cfg option");
+        }
+        // NB: *.yml
+        if (cfg.size() < 5) {
+            throw std::logic_error("--cfg string must contain at least 5 symbols"
+                                   " to determine if it's a file (*.yml) a or string");
+        }
+        if (cfg.substr(cfg.size() - 4, cfg.size()) == ".yml") {
+            if (!fs.open(cfg, cv::FileStorage::READ)) {
+                throw std::logic_error("Failed to open config file: " + cfg);
+            }
+        } else {
+            fs = cv::FileStorage(cfg, cv::FileStorage::FORMAT_YAML |
+                                      cv::FileStorage::MEMORY);
+        }
+
+        std::map<std::string, std::string> config;
+        if (!load_config.empty()) {
+            loadConfig(load_config, config);
+        }
+        // NB: Takes priority over config from file
+        if (!cached_dir.empty()) {
+            config =
+                std::map<std::string, std::string>{{"CACHE_DIR", cached_dir}};
+        }
+
+        const double work_time_ms =
+            check_and_read<double>(fs, "work_time", "Config");
+        if (work_time_ms < 0) {
+            throw std::logic_error("work_time must be positive");
+        }
+
+        auto pipelines_fn = check_and_get_fn(fs, "Pipelines", "Config");
+        if (!pipelines_fn.isMap()) {
+            throw std::logic_error("Pipelines field must be a map");
+        }
+
+        auto exec_list = !exec_str.empty() ? parseExecList(exec_str)
+                                           : pipelines_fn.keys();
+
+
+        std::vector<Pipeline::Ptr> pipelines;
+        pipelines.reserve(exec_list.size());
+        // NB: Build pipelines based on config information
+        PipelineBuilder builder;
+        for (const auto& name : exec_list) {
+            const auto& pl_fn = check_and_get_fn(pipelines_fn, name, "Pipelines");
+            builder.setName(name);
+            // NB: Set source
+            {
+                const auto& src_fn = check_and_get_fn(pl_fn, "source", name);
+                auto src_name =
+                    check_and_read<std::string>(src_fn, "name", "source");
+                auto latency =
+                    check_and_read<double>(src_fn, "latency", "source");
+                auto output =
+                    check_and_read<OutputDescr>(src_fn, "output", "source");
+                // NB: In case BENCHMARK mode sources work with zero latency.
+                if (app_mode == AppMode::BENCHMARK) {
+                    latency = 0.0;
+                }
+                builder.setSource(src_name, latency, output);
+            }
+
+            const auto& nodes_fn = check_and_get_fn(pl_fn, "nodes", name);
+            if (!nodes_fn.isSeq()) {
+                throw std::logic_error("nodes in " + name + " must be a sequence");
+            }
+            for (auto node_fn : nodes_fn) {
+                auto node_name =
+                    check_and_read<std::string>(node_fn, "name", "node");
+                auto node_type =
+                    check_and_read<std::string>(node_fn, "type", "node");
+                if (node_type == "Dummy") {
+                    auto time =
+                        check_and_read<double>(node_fn, "time", node_name);
+                    if (time < 0) {
+                        throw std::logic_error(node_name + " time must be positive");
+                    }
+                    auto output =
+                        check_and_read<OutputDescr>(node_fn, "output", node_name);
+                    builder.addDummy(node_name, time, output);
+                } else if (node_type == "Infer") {
+                    InferParams params;
+                    params.path   = read<ModelPath>(node_fn);
+                    params.device =
+                        check_and_read<std::string>(node_fn, "device", node_name);
+                    params.input_layers =
+                        readList<std::string>(node_fn, "input_layers", node_name);
+                    params.output_layers =
+                        readList<std::string>(node_fn, "output_layers", node_name);
+                    params.config = config;
+                    builder.addInfer(node_name, params);
+                } else {
+                    throw std::logic_error("Unsupported node type: " + node_type);
+                }
+            }
+
+            const auto edges_fn = check_and_get_fn(pl_fn, "edges", name);
+            if (!edges_fn.isSeq()) {
+                throw std::logic_error("edges in " + name + " must be a sequence");
+            }
+            for (auto edge_fn : edges_fn) {
+                auto edge = read<Edge>(edge_fn);
+                builder.addEdge(edge);
+            }
+
+            // NB: Pipeline mode from config takes priority over cmd.
+            auto mode = readOpt<std::string>(pl_fn["mode"]);
+            builder.setMode(mode.has_value() ? strToPLMode(mode.value()) : pl_mode);
+
+            // NB: Queue capacity from config takes priority over cmd.
+            auto config_qc = readOpt<int>(pl_fn["queue_capacity"]);
+            auto queue_capacity = config_qc.has_value() ? config_qc.value() : qc;
+            // NB: 0 is special constant that means
+            // queue capacity should be calculated automatically.
+            if (queue_capacity != 0) {
+                builder.setQueueCapacity(queue_capacity);
+            }
+
+            auto dump = readOpt<std::string>(pl_fn["dump"]);
+            if (dump) {
+                builder.setDumpFilePath(dump.value());
+            }
+
+            pipelines.emplace_back(builder.build());
+        }
+
+        // NB: Compille pipelines
+        for (size_t i = 0; i < pipelines.size(); ++i) {
+            pipelines[i]->compile();
+        }
+
+        // NB: Execute pipelines
+        std::vector<std::exception_ptr> eptrs(pipelines.size(), nullptr);
+        std::vector<std::thread> threads(pipelines.size());
+        for (size_t i = 0; i < pipelines.size(); ++i) {
+            threads[i] = std::thread([&, i]() {
+                try {
+                    pipelines[i]->run(work_time_ms);
+                } catch (...) {
+                    eptrs[i] = std::current_exception();
+                }
+            });
+        }
+
+        std::ofstream file;
+        if (!log_file.empty()) {
+            file.open(log_file);
+        }
+
+        for (size_t i = 0; i < threads.size(); ++i) {
+            threads[i].join();
+        }
+
+        for (size_t i = 0; i < threads.size(); ++i) {
+            if (eptrs[i] != nullptr) {
+                try {
+                    std::rethrow_exception(eptrs[i]);
+                } catch (std::exception& e) {
+                    throw std::logic_error(pipelines[i]->name() + " failed: " + e.what());
+                }
+            }
+            if (file.is_open()) {
+                file << pipelines[i]->report().toStr(true) << std::endl;
+            }
+            std::cout << pipelines[i]->report().toStr() << std::endl;
+        }
+    } catch (const std::exception& e) {
+        std::cout << e.what() << std::endl;
+        throw;
+    }
+    return 0;
+}
diff --git a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp
new file mode 100644
index 0000000000..1514eb2671
--- /dev/null
+++ b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp
@@ -0,0 +1,63 @@
+#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_DUMMY_SOURCE_HPP
+#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_DUMMY_SOURCE_HPP
+
+#include <thread>
+#include <memory>
+#include <chrono>
+
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/streaming/cap.hpp> // cv::gapi::wip::IStreamSource
+
+#include "utils.hpp"
+
+class DummySource final: public cv::gapi::wip::IStreamSource {
+public:
+    using Ptr = std::shared_ptr<DummySource>;
+    DummySource(const double       latency,
+                const OutputDescr& output);
+    bool pull(cv::gapi::wip::Data& data) override;
+    cv::GMetaArg descr_of() const override;
+
+private:
+    double  m_latency;
+    cv::Mat m_mat;
+    using TimePoint =
+        std::chrono::time_point<std::chrono::high_resolution_clock>;
+    cv::optional<TimePoint> m_prev_pull_tp;
+};
+
+DummySource::DummySource(const double       latency,
+                         const OutputDescr& output)
+    : m_latency(latency) {
+    utils::createNDMat(m_mat, output.dims, output.precision);
+    utils::generateRandom(m_mat);
+}
+
+bool DummySource::pull(cv::gapi::wip::Data& data) {
+    using namespace std::chrono;
+    using namespace cv::gapi::streaming;
+    // NB: In case it's the first pull.
+    if (!m_prev_pull_tp) {
+        m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now());
+    }
+    // NB: Just increase reference counter not to release mat memory
+    // after assigning it to the data.
+    cv::Mat mat = m_mat;
+    auto end = high_resolution_clock::now();
+    auto elapsed =
+        duration_cast<duration<double, std::milli>>(end - *m_prev_pull_tp).count();
+    auto delta = m_latency - elapsed;
+    if (delta > 0) {
+        utils::sleep(delta);
+    }
+    data.meta[meta_tag::timestamp] = int64_t{utils::timestamp<milliseconds>()};
+    data = mat;
+    m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now());
+    return true;
+}
+
+cv::GMetaArg DummySource::descr_of() const {
+    return cv::GMetaArg{cv::descr_of(m_mat)};
+}
+
+#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_DUMMY_SOURCE_HPP
diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp
new file mode 100644
index 0000000000..2951d45610
--- /dev/null
+++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp
@@ -0,0 +1,205 @@
+#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_HPP
+#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_HPP
+
+struct PerfReport {
+    std::string               name;
+    double  avg_latency       = 0.0;
+    double  throughput        = 0.0;
+    int64_t first_run_latency = 0;
+    int64_t elapsed           = 0;
+    int64_t compilation_time  = 0;
+    std::vector<int64_t> latencies;
+
+    std::string toStr(bool expanded = false) const;
+};
+
+std::string PerfReport::toStr(bool expand) const {
+    std::stringstream ss;
+    ss << name << ": Compilation time: " << compilation_time << " ms; "
+       << "Average latency: " << avg_latency << " ms; Throughput: "
+       << throughput << " FPS; First latency: "
+       << first_run_latency << " ms";
+
+    if (expand) {
+        ss << "\nTotal processed frames: " << latencies.size()
+           << "\nTotal elapsed time: "     << elapsed << " ms" << std::endl;
+        for (size_t i = 0; i < latencies.size(); ++i) {
+            ss << std::endl;
+            ss << "Frame:" << i << "\nLatency: "
+               << latencies[i] << " ms";
+        }
+    }
+
+    return ss.str();
+}
+
+class Pipeline {
+public:
+    using Ptr = std::shared_ptr<Pipeline>;
+
+    Pipeline(std::string&&                       name,
+             cv::GComputation&&                  comp,
+             cv::gapi::wip::IStreamSource::Ptr&& src,
+             cv::GCompileArgs&&                  args,
+             const size_t                        num_outputs);
+
+    void compile();
+    void run(double work_time_ms);
+    const PerfReport& report() const;
+    const std::string& name() const { return m_name;}
+
+    virtual ~Pipeline() = default;
+
+protected:
+    struct RunPerf {
+        int64_t              elapsed   = 0;
+        std::vector<int64_t> latencies;
+    };
+
+    virtual void _compile() = 0;
+    virtual RunPerf _run(double work_time_ms) = 0;
+
+    std::string                       m_name;
+    cv::GComputation                  m_comp;
+    cv::gapi::wip::IStreamSource::Ptr m_src;
+    cv::GCompileArgs                  m_args;
+    size_t                            m_num_outputs;
+    PerfReport                        m_perf;
+};
+
+Pipeline::Pipeline(std::string&&                       name,
+                   cv::GComputation&&                  comp,
+                   cv::gapi::wip::IStreamSource::Ptr&& src,
+                   cv::GCompileArgs&&                  args,
+                   const size_t                        num_outputs)
+    : m_name(std::move(name)),
+      m_comp(std::move(comp)),
+      m_src(std::move(src)),
+      m_args(std::move(args)),
+      m_num_outputs(num_outputs) {
+    m_perf.name = m_name;
+}
+
+void Pipeline::compile() {
+    m_perf.compilation_time =
+        utils::measure<std::chrono::milliseconds>([this]() {
+        _compile();
+    });
+}
+
+void Pipeline::run(double work_time_ms) {
+    auto run_perf = _run(work_time_ms);
+
+    m_perf.elapsed   = run_perf.elapsed;
+    m_perf.latencies = std::move(run_perf.latencies);
+
+    m_perf.avg_latency =
+        std::accumulate(m_perf.latencies.begin(),
+                        m_perf.latencies.end(),
+                        0.0) / static_cast<double>(m_perf.latencies.size());
+    m_perf.throughput =
+        (m_perf.latencies.size() / static_cast<double>(m_perf.elapsed)) * 1000;
+
+    m_perf.first_run_latency = m_perf.latencies[0];
+}
+
+const PerfReport& Pipeline::report() const {
+    return m_perf;
+}
+
+class StreamingPipeline : public Pipeline {
+public:
+    using Pipeline::Pipeline;
+
+private:
+    void _compile() override {
+        m_compiled =
+            m_comp.compileStreaming({m_src->descr_of()},
+                                     cv::GCompileArgs(m_args));
+    }
+
+    Pipeline::RunPerf _run(double work_time_ms) override {
+        // NB: Setup.
+        using namespace std::chrono;
+        // NB: N-1 buffers + timestamp.
+        std::vector<cv::Mat> out_mats(m_num_outputs - 1);
+        int64_t start_ts = -1;
+        cv::GRunArgsP pipeline_outputs;
+        for (auto& m : out_mats) {
+            pipeline_outputs += cv::gout(m);
+        }
+        pipeline_outputs += cv::gout(start_ts);
+        m_compiled.setSource(m_src);
+
+        // NB: Start execution & measure performance statistics.
+        Pipeline::RunPerf perf;
+        auto start = high_resolution_clock::now();
+        m_compiled.start();
+        while (m_compiled.pull(cv::GRunArgsP{pipeline_outputs})) {
+            int64_t latency = utils::timestamp<milliseconds>() - start_ts;
+
+            perf.latencies.push_back(latency);
+            perf.elapsed = duration_cast<milliseconds>(
+                    high_resolution_clock::now() - start).count();
+
+            if (perf.elapsed >= work_time_ms) {
+                m_compiled.stop();
+                break;
+            }
+        };
+        return perf;
+    }
+
+    cv::GStreamingCompiled m_compiled;
+};
+
+class RegularPipeline : public Pipeline {
+public:
+    using Pipeline::Pipeline;
+
+private:
+    void _compile() override {
+        m_compiled =
+            m_comp.compile({m_src->descr_of()},
+                            cv::GCompileArgs(m_args));
+    }
+
+    Pipeline::RunPerf _run(double work_time_ms) override {
+        // NB: Setup
+        using namespace std::chrono;
+        cv::gapi::wip::Data d;
+        std::vector<cv::Mat> out_mats(m_num_outputs);
+        cv::GRunArgsP pipeline_outputs;
+        for (auto& m : out_mats) {
+            pipeline_outputs += cv::gout(m);
+        }
+
+        // NB: Start execution & measure performance statistics.
+        Pipeline::RunPerf perf;
+        auto start = high_resolution_clock::now();
+        while (m_src->pull(d)) {
+            auto in_mat = cv::util::get<cv::Mat>(d);
+            int64_t latency = utils::measure<milliseconds>([&]{
+                m_compiled(cv::gin(in_mat), cv::GRunArgsP{pipeline_outputs});
+            });
+
+            perf.latencies.push_back(latency);
+            perf.elapsed = duration_cast<milliseconds>(
+                    high_resolution_clock::now() - start).count();
+
+            if (perf.elapsed >= work_time_ms) {
+                break;
+            }
+        };
+        return perf;
+    }
+
+    cv::GCompiled m_compiled;
+};
+
+enum class PLMode {
+    REGULAR,
+    STREAMING
+};
+
+#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_HPP
diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp
new file mode 100644
index 0000000000..63ada28603
--- /dev/null
+++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp
@@ -0,0 +1,502 @@
+#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_BUILDER_HPP
+#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_BUILDER_HPP
+
+#include <map>
+
+#include <opencv2/gapi/infer.hpp> // cv::gapi::GNetPackage
+#include <opencv2/gapi/streaming/cap.hpp> // cv::gapi::wip::IStreamSource
+#include <opencv2/gapi/infer/ie.hpp> // cv::gapi::ie::Params
+#include <opencv2/gapi/gcommon.hpp> // cv::gapi::GCompileArgs
+#include <opencv2/gapi/cpu/gcpukernel.hpp> // GAPI_OCV_KERNEL
+#include <opencv2/gapi/gkernel.hpp> // G_API_OP
+
+#include "pipeline.hpp"
+#include "utils.hpp"
+
+struct Edge {
+    struct P {
+        std::string name;
+        size_t      port;
+    };
+
+    P src;
+    P dst;
+};
+
+struct CallNode {
+    using F = std::function<void(const cv::GProtoArgs&, cv::GProtoArgs&)>;
+
+    std::string name;
+    F           run;
+};
+
+struct DataNode {
+    cv::optional<cv::GProtoArg> arg;
+};
+
+struct Node {
+    using Ptr  = std::shared_ptr<Node>;
+    using WPtr = std::weak_ptr<Node>;
+    using Kind = cv::util::variant<CallNode, DataNode>;
+
+    std::vector<Node::WPtr> in_nodes;
+    std::vector<Node::Ptr>  out_nodes;
+    Kind kind;
+};
+
+struct DummyCall {
+    G_API_OP(GDummy,
+             <cv::GMat(cv::GMat, double, OutputDescr)>,
+             "custom.dummy") {
+        static cv::GMatDesc outMeta(const cv::GMatDesc& /* in */,
+                                    double              /* time */,
+                                    const OutputDescr& output) {
+            if (output.dims.size() == 2) {
+                return cv::GMatDesc(output.precision,
+                                    1,
+                                    cv::Size(output.dims[0], output.dims[1]));
+            }
+            return cv::GMatDesc(output.precision, output.dims);
+        }
+    };
+
+    struct DummyState {
+        cv::Mat mat;
+    };
+
+    // NB: Generate random mat once and then
+    // copy to dst buffer on every iteration.
+    GAPI_OCV_KERNEL_ST(GCPUDummy, GDummy, DummyState) {
+            static void setup(const cv::GMatDesc&          /*in*/,
+                              double                       /*time*/,
+                              const OutputDescr&           output,
+                              std::shared_ptr<DummyState>& state,
+                              const cv::GCompileArgs&      /*args*/) {
+            state.reset(new DummyState{});
+            utils::createNDMat(state->mat, output.dims, output.precision);
+            utils::generateRandom(state->mat);
+        }
+
+        static void run(const cv::Mat&     /*in_mat*/,
+                        double             time,
+                        const OutputDescr& /*output*/,
+                        cv::Mat&           out_mat,
+                        DummyState&        state) {
+            using namespace std::chrono;
+            double total = 0;
+            auto   start = high_resolution_clock::now();
+            state.mat.copyTo(out_mat);
+            while (total < time) {
+                total = duration_cast<duration<double, std::milli>>(
+                            high_resolution_clock::now() - start).count();
+            }
+        }
+    };
+
+    void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs);
+
+    size_t numInputs()  const { return 1; }
+    size_t numOutputs() const { return 1; }
+
+    double      time;
+    OutputDescr output;
+};
+
+void DummyCall::operator()(const cv::GProtoArgs& inputs,
+                                 cv::GProtoArgs& outputs) {
+    GAPI_Assert(inputs.size() == 1u);
+    GAPI_Assert(cv::util::holds_alternative<cv::GMat>(inputs[0]));
+    GAPI_Assert(outputs.empty());
+    auto in = cv::util::get<cv::GMat>(inputs[0]);
+    outputs.emplace_back(GDummy::on(in, time, output));
+}
+
+struct InferCall {
+    void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs);
+    size_t numInputs()  const { return input_layers.size();  }
+    size_t numOutputs() const { return output_layers.size(); }
+
+    std::string               tag;
+    std::vector<std::string>  input_layers;
+    std::vector<std::string>  output_layers;
+};
+
+void InferCall::operator()(const cv::GProtoArgs& inputs,
+                                 cv::GProtoArgs& outputs) {
+    GAPI_Assert(inputs.size() == input_layers.size());
+    GAPI_Assert(outputs.empty());
+
+    cv::GInferInputs g_inputs;
+    // TODO: Add an opportunity not specify input/output layers in case
+    // there is only single layer.
+    for (size_t i = 0; i < inputs.size(); ++i) {
+        // TODO: Support GFrame as well.
+        GAPI_Assert(cv::util::holds_alternative<cv::GMat>(inputs[i]));
+        auto in = cv::util::get<cv::GMat>(inputs[i]);
+        g_inputs[input_layers[i]] = in;
+    }
+    auto g_outputs = cv::gapi::infer<cv::gapi::Generic>(tag, g_inputs);
+    for (size_t i = 0; i < output_layers.size(); ++i) {
+        outputs.emplace_back(g_outputs.at(output_layers[i]));
+    }
+}
+
+struct SourceCall {
+    void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs);
+    size_t numInputs()  const { return 0; }
+    size_t numOutputs() const { return 1; }
+};
+
+void SourceCall::operator()(const cv::GProtoArgs& inputs,
+                                  cv::GProtoArgs& outputs) {
+    GAPI_Assert(inputs.empty());
+    GAPI_Assert(outputs.empty());
+    // NB: Since NV12 isn't exposed source always produce GMat.
+    outputs.emplace_back(cv::GMat());
+}
+
+struct LoadPath {
+    std::string xml;
+    std::string bin;
+};
+
+struct ImportPath {
+    std::string blob;
+};
+
+using ModelPath = cv::util::variant<ImportPath, LoadPath>;
+
+struct InferParams {
+    std::string name;
+    ModelPath   path;
+    std::string device;
+    std::vector<std::string> input_layers;
+    std::vector<std::string> output_layers;
+    std::map<std::string, std::string> config;
+};
+
+class PipelineBuilder {
+public:
+    PipelineBuilder();
+    void addDummy(const std::string& name,
+                  const double       time,
+                  const OutputDescr& output);
+
+    void addInfer(const std::string& name, const InferParams& params);
+
+    void setSource(const std::string& name,
+                   double latency,
+                   const OutputDescr& output);
+
+    void addEdge(const Edge& edge);
+    void setMode(PLMode mode);
+    void setDumpFilePath(const std::string& dump);
+    void setQueueCapacity(const size_t qc);
+    void setName(const std::string& name);
+
+    Pipeline::Ptr build();
+
+private:
+    template <typename CallT>
+    void addCall(const std::string& name,
+                 CallT&&            call);
+
+    Pipeline::Ptr construct();
+
+    template <typename K, typename V>
+    using M = std::unordered_map<K, V>;
+    struct State {
+        struct NodeEdges {
+            std::vector<Edge> input_edges;
+            std::vector<Edge> output_edges;
+        };
+
+        M<std::string, Node::Ptr>         calls_map;
+        std::vector<Node::Ptr>            all_calls;
+
+        cv::gapi::GNetPackage             networks;
+        cv::gapi::GKernelPackage          kernels;
+        cv::GCompileArgs                  compile_args;
+        cv::gapi::wip::IStreamSource::Ptr src;
+        PLMode                            mode = PLMode::STREAMING;
+        std::string                       name;
+    };
+
+    std::unique_ptr<State> m_state;
+};
+
+PipelineBuilder::PipelineBuilder() : m_state(new State{}) { };
+
+void PipelineBuilder::addDummy(const std::string&  name,
+                               const double        time,
+                               const OutputDescr&  output) {
+    m_state->kernels.include<DummyCall::GCPUDummy>();
+    addCall(name, DummyCall{time, output});
+}
+
+template <typename CallT>
+void PipelineBuilder::addCall(const std::string& name,
+                              CallT&&            call) {
+
+    size_t num_inputs  = call.numInputs();
+    size_t num_outputs = call.numOutputs();
+    Node::Ptr call_node(new Node{{},{},Node::Kind{CallNode{name, std::move(call)}}});
+    // NB: Create placeholders for inputs.
+    call_node->in_nodes.resize(num_inputs);
+    // NB: Create outputs with empty data.
+    for (size_t i = 0; i < num_outputs; ++i) {
+        call_node->out_nodes.emplace_back(new Node{{call_node},
+                                                   {},
+                                                   Node::Kind{DataNode{}}});
+    }
+
+    auto it = m_state->calls_map.find(name);
+    if (it != m_state->calls_map.end()) {
+        throw std::logic_error("Node: " + name + " already exists!");
+    }
+    m_state->calls_map.emplace(name, call_node);
+    m_state->all_calls.emplace_back(call_node);
+}
+
+void PipelineBuilder::addInfer(const std::string& name,
+                               const InferParams& params) {
+    // NB: No default ctor for Params.
+    std::unique_ptr<cv::gapi::ie::Params<cv::gapi::Generic>> pp;
+    if (cv::util::holds_alternative<LoadPath>(params.path)) {
+       auto load_path = cv::util::get<LoadPath>(params.path);
+       pp.reset(new cv::gapi::ie::Params<cv::gapi::Generic>(name,
+                                                            load_path.xml,
+                                                            load_path.bin,
+                                                            params.device));
+    } else {
+        GAPI_Assert(cv::util::holds_alternative<ImportPath>(params.path));
+        auto import_path = cv::util::get<ImportPath>(params.path);
+        pp.reset(new cv::gapi::ie::Params<cv::gapi::Generic>(name,
+                                                             import_path.blob,
+                                                             params.device));
+    }
+
+    pp->pluginConfig(params.config);
+    m_state->networks += cv::gapi::networks(*pp);
+
+    addCall(name, InferCall{name, params.input_layers, params.output_layers});
+}
+
+void PipelineBuilder::addEdge(const Edge& edge) {
+    const auto& src_it = m_state->calls_map.find(edge.src.name);
+    if (src_it == m_state->calls_map.end()) {
+        throw std::logic_error("Failed to find node: " + edge.src.name);
+    }
+    auto src_node = src_it->second;
+    if (src_node->out_nodes.size() <= edge.src.port) {
+        throw std::logic_error("Failed to access node: " + edge.src.name +
+                               " by out port: " + std::to_string(edge.src.port));
+    }
+
+    auto dst_it = m_state->calls_map.find(edge.dst.name);
+    if (dst_it == m_state->calls_map.end()) {
+        throw std::logic_error("Failed to find node: " + edge.dst.name);
+    }
+    auto dst_node = dst_it->second;
+    if (dst_node->in_nodes.size() <= edge.dst.port) {
+        throw std::logic_error("Failed to access node: " + edge.dst.name +
+                               " by in port: " + std::to_string(edge.dst.port));
+    }
+
+    auto  out_data = src_node->out_nodes[edge.src.port];
+    auto& in_data  = dst_node->in_nodes[edge.dst.port];
+    // NB: in_data != nullptr.
+    if (!in_data.expired()) {
+        throw std::logic_error("Node: " + edge.dst.name +
+                               " already connected by in port: " +
+                               std::to_string(edge.dst.port));
+    }
+    dst_node->in_nodes[edge.dst.port] = out_data;
+    out_data->out_nodes.push_back(dst_node);
+}
+
+void PipelineBuilder::setSource(const std::string& name,
+                                double latency,
+                                const OutputDescr& output) {
+    GAPI_Assert(!m_state->src);
+    m_state->src = std::make_shared<DummySource>(latency, output);
+    addCall(name, SourceCall{});
+}
+
+void PipelineBuilder::setMode(PLMode mode) {
+    m_state->mode = mode;
+}
+
+void PipelineBuilder::setDumpFilePath(const std::string& dump) {
+    m_state->compile_args.emplace_back(cv::graph_dump_path{dump});
+}
+
+void PipelineBuilder::setQueueCapacity(const size_t qc) {
+    m_state->compile_args.emplace_back(cv::gapi::streaming::queue_capacity{qc});
+}
+
+void PipelineBuilder::setName(const std::string& name) {
+    m_state->name = name;
+}
+
+static bool visit(Node::Ptr node,
+                  std::vector<Node::Ptr>& sorted,
+                  std::unordered_map<Node::Ptr, int>& visited) {
+    if (!node) {
+        throw std::logic_error("Found null node");
+    }
+
+    visited[node] = 1;
+    for (auto in : node->in_nodes) {
+        auto in_node = in.lock();
+        if (visited[in_node] == 0) {
+            if (visit(in_node, sorted, visited)) {
+                return true;
+            }
+        } else if (visited[in_node] == 1) {
+            return true;
+        }
+    }
+    visited[node] = 2;
+    sorted.push_back(node);
+    return false;
+}
+
+static cv::optional<std::vector<Node::Ptr>>
+toposort(const std::vector<Node::Ptr> nodes) {
+    std::vector<Node::Ptr> sorted;
+    std::unordered_map<Node::Ptr, int> visited;
+    for (auto n : nodes) {
+        if (visit(n, sorted, visited)) {
+            return cv::optional<std::vector<Node::Ptr>>{};
+        }
+    }
+    return cv::util::make_optional(sorted);
+}
+
+Pipeline::Ptr PipelineBuilder::construct() {
+    // NB: Unlike G-API, pipeline_builder_tool graph always starts with CALL node
+    // (not data) that produce datas, so the call node which doesn't have
+    // inputs is considered as "producer" node.
+    //
+    // Graph always starts with CALL node and ends with DATA node.
+    // Graph example: [source] -> (source:0) -> [PP] -> (PP:0)
+    //
+    // The algorithm is quite simple:
+    // 0. Verify that every call input node exists (connected).
+    // 1. Sort all nodes by visiting only call nodes,
+    // since there is no data nodes that's not connected with any call node,
+    // it's guarantee that every node will be visited.
+    // 2. Fillter call nodes.
+    // 3. Go through every call node.
+    // FIXME: Add toposort in case user passed nodes
+    // in arbitrary order which is unlikely happened.
+    // 4. Extract proto input from every input node
+    // 5. Run call and get outputs
+    // 6. If call node doesn't have inputs it means that it's "producer" node,
+    // so collect all outputs to graph_inputs vector.
+    // 7. Assign proto outputs to output data nodes,
+    // so the next calls can use them as inputs.
+    cv::GProtoArgs graph_inputs;
+    cv::GProtoArgs graph_outputs;
+    // 0. Verify that every call input node exists (connected).
+    for (auto call_node : m_state->all_calls) {
+        for (size_t i = 0; i < call_node->in_nodes.size(); ++i) {
+            const auto& in_data_node = call_node->in_nodes[i];
+            // NB: in_data_node == nullptr.
+            if (in_data_node.expired()) {
+                const auto& call = cv::util::get<CallNode>(call_node->kind);
+                throw std::logic_error(
+                        "Node: " + call.name + " in Pipeline: " + m_state->name +
+                        " has dangling input by in port: " + std::to_string(i));
+            }
+        }
+    }
+    // (0) Sort all nodes;
+    auto has_sorted = toposort(m_state->all_calls);
+    if (!has_sorted) {
+       throw std::logic_error(
+               "Pipeline: " + m_state->name + " has cyclic dependencies") ;
+    }
+    auto& sorted = has_sorted.value();
+    // (1). Fillter call nodes.
+    std::vector<Node::Ptr> sorted_calls;
+    for (auto n : sorted) {
+        if (cv::util::holds_alternative<CallNode>(n->kind)) {
+            sorted_calls.push_back(n);
+        }
+    }
+    // (2). Go through every call node.
+    for (auto call_node : sorted_calls) {
+        cv::GProtoArgs outputs;
+        cv::GProtoArgs inputs;
+        for (size_t i = 0; i < call_node->in_nodes.size(); ++i) {
+            auto in_node = call_node->in_nodes.at(i);
+            auto in_data = cv::util::get<DataNode>(in_node.lock()->kind);
+            if (!in_data.arg.has_value()) {
+                throw std::logic_error("data hasn't been provided");
+            }
+            // (3). Extract proto input from every input node.
+            inputs.push_back(in_data.arg.value());
+        }
+        // (4). Run call and get outputs.
+        auto call = cv::util::get<CallNode>(call_node->kind);
+        call.run(inputs, outputs);
+        // (5) If call node doesn't have inputs
+        // it means that it's input producer node (Source).
+        if (call_node->in_nodes.empty()) {
+            for (auto out : outputs) {
+                graph_inputs.push_back(out);
+            }
+        }
+        // (6). Assign proto outputs to output data nodes,
+        // so the next calls can use them as inputs.
+        GAPI_Assert(outputs.size() == call_node->out_nodes.size());
+        for (size_t i = 0; i < outputs.size(); ++i) {
+            auto out_node = call_node->out_nodes[i];
+            auto& out_data = cv::util::get<DataNode>(out_node->kind);
+            out_data.arg = cv::util::make_optional(outputs[i]);
+            if (out_node->out_nodes.empty()) {
+                graph_outputs.push_back(out_data.arg.value());
+            }
+        }
+    }
+
+    m_state->compile_args.emplace_back(m_state->networks);
+    m_state->compile_args.emplace_back(m_state->kernels);
+
+    if (m_state->mode == PLMode::STREAMING) {
+        GAPI_Assert(graph_inputs.size() == 1);
+        GAPI_Assert(cv::util::holds_alternative<cv::GMat>(graph_inputs[0]));
+        // FIXME: Handle GFrame when NV12 comes.
+        const auto& graph_input = cv::util::get<cv::GMat>(graph_inputs[0]);
+        // NB: In case streaming mode need to expose timestamp in order to
+        // calculate performance metrics.
+        graph_outputs.emplace_back(
+                cv::gapi::streaming::timestamp(graph_input).strip());
+
+        return std::make_shared<StreamingPipeline>(std::move(m_state->name),
+                                                   cv::GComputation(
+                                                       cv::GProtoInputArgs{graph_inputs},
+                                                       cv::GProtoOutputArgs{graph_outputs}),
+                                                   std::move(m_state->src),
+                                                   std::move(m_state->compile_args),
+                                                   graph_outputs.size());
+    }
+    GAPI_Assert(m_state->mode == PLMode::REGULAR);
+    return std::make_shared<RegularPipeline>(std::move(m_state->name),
+                                             cv::GComputation(
+                                                 cv::GProtoInputArgs{graph_inputs},
+                                                 cv::GProtoOutputArgs{graph_outputs}),
+                                             std::move(m_state->src),
+                                             std::move(m_state->compile_args),
+                                             graph_outputs.size());
+}
+
+Pipeline::Ptr PipelineBuilder::build() {
+    auto pipeline = construct();
+    m_state.reset(new State{});
+    return pipeline;
+}
+
+#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_BUILDER_HPP
diff --git a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py
new file mode 100644
index 0000000000..ef4bce6476
--- /dev/null
+++ b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py
@@ -0,0 +1,931 @@
+import os
+import subprocess
+
+pipeline_modeling_tool = os.getenv('PIPELINE_MODELING_TOOL')
+
+def get_output(exec_str):
+    try:
+        out = subprocess.check_output(exec_str,
+                                      stderr=subprocess.STDOUT,
+                                      shell=True).strip().decode()
+    except subprocess.CalledProcessError as exc:
+        out = exc.output.strip().decode()
+    return out
+
+
+def test_error_no_config_specified():
+    out = get_output(pipeline_modeling_tool)
+    assert out.startswith('Config must be specified via --cfg option')
+
+
+def test_error_no_config_exists():
+    cfg_file = 'not_existing_cfg.yml'
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert 'Failed to open config file: not_existing_cfg.yml' in out
+
+
+def test_error_no_work_time():
+    cfg_file = """\"%YAML:1.0\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Config must contain field: work_time')
+
+
+def test_error_work_time_not_positive():
+    cfg_file = """\"%YAML:1.0
+work_time: -1\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('work_time must be positive')
+
+
+def test_error_no_pipelines():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Config must contain field: Pipelines')
+
+
+def test_error_pipelines_node_not_map():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Pipelines field must be a map')
+
+
+def test_error_config_not_contain_pl():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:\" """
+
+    exec_str = '{} --cfg={} --exec_list=PL2'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Pipelines must contain field: PL2')
+
+
+def test_error_no_source():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('PL1 must contain field: source')
+
+
+def test_error_source_no_name():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('source must contain field: name')
+
+
+def test_error_source_no_latency():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('source must contain field: latency')
+
+
+def test_error_source_no_output():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('source must contain field: output')
+
+
+def test_error_source_output_no_dims():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('output must contain field: dims')
+
+
+def test_error_source_output_no_precision():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('output must contain field: precision')
+
+
+def test_error_no_nodes():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('PL1 must contain field: nodes')
+
+
+def test_error_nodes_not_sequence():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('nodes in PL1 must be a sequence')
+
+
+def test_error_node_no_name():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      -\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('node must contain field: name')
+
+
+def test_error_node_no_type():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('node must contain field: type')
+
+
+def test_error_node_unknown_type():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Unknown'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Unsupported node type: Unknown')
+
+
+def test_error_node_dummy_no_time():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Node0 must contain field: time')
+
+
+def test_error_node_dummy_not_positive_time():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: -0.2\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Node0 time must be positive')
+
+
+def test_error_node_dummy_no_output():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Node0 must contain field: output')
+
+
+def test_error_node_infer_no_model_path():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Infer'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    error_msg = """Path to OpenVINO model must be specified in either of two formats:
+1.
+  xml: path to *.xml
+  bin: path to *.bin
+2.
+  blob: path to *.blob"""
+    assert out.startswith(error_msg)
+
+
+def test_error_node_infer_no_input_layers():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Infer'
+        blob: model.blob
+        device: 'CPU'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Node0 must contain field: input_layers')
+
+
+def test_error_node_infer_input_layers_are_empty():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Infer'
+        blob: model.blob
+        device: 'CPU'
+        input_layers:
+            \" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('input_layers in Node0 must be a sequence')
+
+
+def test_error_node_infer_no_output_layers():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Infer'
+        blob: model.blob
+        device: 'CPU'
+        input_layers:
+          - 'layer_name'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Node0 must contain field: output_layers')
+
+
+def test_error_node_infer_output_layers_are_empty():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Infer'
+        blob: model.blob
+        device: 'CPU'
+        input_layers:
+          - 'layer_name'
+        output_layers:\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('output_layers in Node0 must be a sequence')
+
+
+def test_error_no_edges():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('PL1 must contain field: edges')
+
+
+def test_error_edges_not_sequence():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('edges in PL1 must be a sequence')
+
+
+def test_error_edges_no_from():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      -\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('edge must contain field: from')
+
+
+def test_error_edges_no_to():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Node0'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('edge must contain field: to')
+
+
+def test_error_edges_from_not_exists():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Node1'
+        to: 'Node2'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Failed to find node: Node1')
+
+
+def test_error_edges_from_port_not_exists():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Node0:10'
+        to: 'Node2'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Failed to access node: Node0 by out port: 10')
+
+
+def test_error_edges_to_not_exists():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Src'
+        to: 'Node2'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Failed to find node: Node2')
+
+
+def test_error_edges_to_port_not_exists():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Src'
+        to: 'Node0:3'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Failed to access node: Node0 by in port: 3')
+
+
+def test_error_connect_to_source():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Node0'
+        to: 'Src'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Failed to access node: Src by in port: 0')
+
+
+def test_error_double_edge():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Src'
+        to: 'Node0'
+      - from: 'Src'
+        to: 'Node0'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Node: Node0 already connected by in port: 0')
+
+
+def test_error_double_edge():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Src'
+        to: 'Node0'
+      - from: 'Src'
+        to: 'Node0'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Node: Node0 already connected by in port: 0')
+
+
+def test_node_has_dangling_input():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+
+      - name: 'Node1'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Node0'
+        to: 'Node1'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+
+    assert out.startswith('Node: Node0 in Pipeline: PL1 has dangling input by in port: 0')
+
+
+def test_error_has_cycle_0():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node'
+        type: 'Infer'
+        blob: 'model.blob'
+        device: 'CPU'
+        input_layers:
+          - 'in_layer_name_0'
+          - 'in_layer_name_1'
+        output_layers:
+          - 'out_layer_name'
+    edges:
+      - from: 'Src'
+        to: 'Node:0'
+      - from: 'Node:0'
+        to: 'Node:1'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Pipeline: PL1 has cyclic dependencies')
+
+
+def test_error_has_cycle_0():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Infer'
+        blob: 'model.blob'
+        device: 'CPU'
+        input_layers:
+          - 'in_layer_name_0'
+          - 'in_layer_name_1'
+        output_layers:
+          - 'out_layer_name'
+
+      - name: 'Node1'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Src'
+        to: 'Node0:0'
+      - from: 'Node0:0'
+        to: 'Node1:0'
+      - from: 'Node1'
+        to: 'Node0:1'\" """
+
+    exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Pipeline: PL1 has cyclic dependencies')
+
+
+def test_error_no_load_config_exists():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Src'
+        to: 'Node0'\" """
+
+    exec_str = '{} --cfg={} --load_config=not_existing.yml'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert 'Failed to load config: not_existing.yml' in out
+
+
+def test_error_invalid_app_mode():
+    cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+  PL1:
+    source:
+      name: 'Src'
+      latency: 20
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+    nodes:
+      - name: 'Node0'
+        type: 'Dummy'
+        time: 0.2
+        output:
+          dims: [1,2,3,4]
+          precision: 'U8'
+    edges:
+      - from: 'Src'
+        to: 'Node0'\" """
+
+    exec_str = '{} --cfg={} --pl_mode=unknown'.format(pipeline_modeling_tool, cfg_file)
+    out = get_output(exec_str)
+    assert out.startswith('Unsupported PLMode: unknown\n'
+                          'Please chose between: streaming and regular')
+
+
+def test_error_invalid_pl_mode():
+  cfg_file = """\"%YAML:1.0
+work_time: 1000
+Pipelines:
+PL1:
+  source:
+    name: 'Src'
+    latency: 20
+    output:
+      dims: [1,2,3,4]
+      precision: 'U8'
+  nodes:
+    - name: 'Node0'
+      type: 'Dummy'
+      time: 0.2
+      output:
+        dims: [1,2,3,4]
+        precision: 'U8'
+  edges:
+    - from: 'Src'
+      to: 'Node0'\" """
+
+  exec_str = '{} --cfg={} --app_mode=unknown'.format(pipeline_modeling_tool, cfg_file)
+  out = get_output(exec_str)
+  assert out.startswith('Unsupported AppMode: unknown\n'
+                        'Please chose between: realtime and benchmark')
diff --git a/modules/gapi/samples/pipeline_modeling_tool/utils.hpp b/modules/gapi/samples/pipeline_modeling_tool/utils.hpp
new file mode 100644
index 0000000000..c110bf3b47
--- /dev/null
+++ b/modules/gapi/samples/pipeline_modeling_tool/utils.hpp
@@ -0,0 +1,96 @@
+#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_UTILS_HPP
+#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_UTILS_HPP
+
+#include <opencv2/core.hpp>
+
+#if defined(_WIN32)
+#include <windows.h>
+#endif
+
+// FIXME: It's better to place it somewhere in common.hpp
+struct OutputDescr {
+    std::vector<int> dims;
+    int              precision;
+};
+
+namespace utils {
+
+inline void createNDMat(cv::Mat& mat, const std::vector<int>& dims, int depth) {
+    GAPI_Assert(!dims.empty());
+    mat.create(dims, depth);
+    if (dims.size() == 1) {
+        //FIXME: Well-known 1D mat WA
+        mat.dims = 1;
+    }
+}
+
+inline void generateRandom(cv::Mat& out) {
+    switch (out.depth()) {
+        case CV_8U:
+            cv::randu(out, 0, 255);
+            break;
+        case CV_32F:
+            cv::randu(out, 0.f, 1.f);
+            break;
+        case CV_16F: {
+            std::vector<int> dims;
+            for (int i = 0; i < out.size.dims(); ++i) {
+                dims.push_back(out.size[i]);
+            }
+            cv::Mat fp32_mat;
+            createNDMat(fp32_mat, dims, CV_32F);
+            cv::randu(fp32_mat, 0.f, 1.f);
+            fp32_mat.convertTo(out, out.type());
+            break;
+        }
+        default:
+            throw std::logic_error("Unsupported preprocessing depth");
+    }
+}
+
+inline void sleep(double ms) {
+#if defined(_WIN32)
+    // NB: It takes portions of 100 nanoseconds.
+    int64_t ns_units = static_cast<int64_t>(ms * 1e4);
+    // FIXME: Wrap it to RAII and instance only once.
+    HANDLE timer = CreateWaitableTimer(NULL, true, NULL);
+    if (!timer) {
+        throw std::logic_error("Failed to create timer");
+    }
+
+    LARGE_INTEGER li;
+    li.QuadPart = -ns_units;
+    if(!SetWaitableTimer(timer, &li, 0, NULL, NULL, false)){
+        CloseHandle(timer);
+        throw std::logic_error("Failed to set timer");
+    }
+    if (WaitForSingleObject(timer, INFINITE) != WAIT_OBJECT_0) {
+        CloseHandle(timer);
+        throw std::logic_error("Failed to wait timer");
+    }
+    CloseHandle(timer);
+#else
+    using namespace std::chrono;
+    std::this_thread::sleep_for(duration<double, std::milli>(ms));
+#endif
+}
+
+template <typename duration_t>
+typename duration_t::rep measure(std::function<void()> f) {
+    using namespace std::chrono;
+    auto start = high_resolution_clock::now();
+    f();
+    return duration_cast<duration_t>(
+            high_resolution_clock::now() - start).count();
+}
+
+template <typename duration_t>
+typename duration_t::rep timestamp() {
+    using namespace std::chrono;
+    auto now = high_resolution_clock::now();
+    return duration_cast<duration_t>(now.time_since_epoch()).count();
+}
+
+} // namespace utils
+
+#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_UTILS_HPP
diff --git a/modules/gapi/src/api/gframe.cpp b/modules/gapi/src/api/gframe.cpp
index 1acaa9b766..b0830b7a63 100644
--- a/modules/gapi/src/api/gframe.cpp
+++ b/modules/gapi/src/api/gframe.cpp
@@ -44,6 +44,7 @@ std::ostream& operator<<(std::ostream& os, const cv::GFrameDesc &d) {
     switch (d.fmt) {
     case MediaFormat::BGR:  os << "BGR"; break;
     case MediaFormat::NV12: os << "NV12"; break;
+    case MediaFormat::GRAY: os << "GRAY"; break;
     default: GAPI_Assert(false && "Invalid media format");
     }
     os << ' ' << d.size << ']';
diff --git a/modules/gapi/src/api/media.cpp b/modules/gapi/src/api/media.cpp
index b1c455d40a..a3643e378c 100644
--- a/modules/gapi/src/api/media.cpp
+++ b/modules/gapi/src/api/media.cpp
@@ -36,7 +36,7 @@ cv::MediaFrame::IAdapter* cv::MediaFrame::getAdapter() const {
 }
 
 void cv::MediaFrame::serialize(cv::gapi::s11n::IOStream& os) const {
-    return m->adapter->serialize(os);
+    m->adapter->serialize(os);
 }
 
 cv::MediaFrame::View::View(Ptrs&& ptrs, Strides&& strs, Callback &&cb)
diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp
index c33129a0f1..22f73e553c 100644
--- a/modules/gapi/src/backends/fluid/gfluidcore.cpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp
@@ -886,25 +886,6 @@ static void run_arithm_s(DST out[], const SRC in[], int width, int chan,
         CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
 }
 
-template<typename DST, typename SRC>
-static void run_absdiffc(Buffer &dst, const View &src, const float scalar[])
-{
-    const auto *in = src.InLine<SRC>(0);
-    auto *out = dst.OutLine<DST>();
-
-    int width = dst.length();
-    int chan = dst.meta().chan;
-    const int length = width * chan;
-
-    int w = 0;
-#if CV_SIMD
-    w = absdiffc_simd(in, scalar, out, length, chan);
-#endif
-
-    for (; w < length; ++w)
-        out[w] = absdiff<DST>(in[w], scalar[w%chan]);
-}
-
 template<typename DST, typename SRC>
 CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float scalar[],
                                    Arithm arithm, float scale=1)
@@ -950,11 +931,6 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca
                 out[chan * w + c] = mul<DST>(in[chan * w + c], scalar[c], scale);
         break;
     }
-    case ARITHM_DIVIDE:
-        for (int w=0; w < width; w++)
-            for (int c=0; c < chan; c++)
-                out[chan*w + c] = div<DST>(in[chan*w + c], scalar[c], scale);
-        break;
     default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
     }
 }
@@ -992,6 +968,14 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A
     }
 }
 
+CV_ALWAYS_INLINE void setScratchSize(Buffer& scratch, const int buflen)
+{
+    cv::Size bufsize(buflen, 1);
+    GMatDesc bufdesc = { CV_32F, 1, bufsize };
+    Buffer buffer(bufdesc);
+    scratch = std::move(buffer);
+}
+
 CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch)
 {
 #if CV_SIMD
@@ -1012,25 +996,47 @@ CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch)
 #else
     constexpr int buflen = 4;
 #endif
-    cv::Size bufsize(buflen, 1);
-    GMatDesc bufdesc = { CV_32F, 1, bufsize };
-    Buffer buffer(bufdesc);
-    scratch = std::move(buffer);
+    setScratchSize(scratch, buflen);
+}
+
+CV_ALWAYS_INLINE void scalar_to_scratch(const cv::Scalar& scalar,
+                                        float scratch[], const int length, const int chan)
+{
+    for (int i = 0; i < length; ++i)
+        scratch[i] = static_cast<float>(scalar[i % chan]);
+}
+
+template<typename DST, typename SRC>
+CV_ALWAYS_INLINE void run_absdiffc(Buffer& dst, const View& src, const float scalar[])
+{
+    const auto* in = src.InLine<SRC>(0);
+    auto* out = dst.OutLine<DST>();
+
+    int width = dst.length();
+    int chan = dst.meta().chan;
+    const int length = width * chan;
+
+    int w = 0;
+#if CV_SIMD
+    w = absdiffc_simd(in, scalar, out, length, chan);
+#endif
+
+    for (; w < length; ++w)
+        out[w] = absdiff<DST>(in[w], scalar[w % chan]);
 }
 
 GAPI_FLUID_KERNEL(GFluidAbsDiffC, cv::gapi::core::GAbsDiffC, true)
 {
     static const int Window = 1;
 
-    static void run(const View &src, const cv::Scalar& _scalar, Buffer &dst, Buffer& scratch)
+    static void run(const View& src, const cv::Scalar& _scalar, Buffer& dst, Buffer& scratch)
     {
         if (dst.y() == 0)
         {
             const int chan = src.meta().chan;
-            float* sc = scratch.OutLine<float>();
+            float* _scratch = scratch.OutLine<float>();
 
-            for (int i = 0; i < scratch.length(); ++i)
-                sc[i] = static_cast<float>(_scalar[i % chan]);
+            scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
         }
 
         const float* scalar = scratch.OutLine<float>();
@@ -1058,17 +1064,16 @@ GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, true)
 {
     static const int Window = 1;
 
-    static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst, Buffer &scratch)
+    static void run(const View& src, const cv::Scalar& _scalar, int /*dtype*/, Buffer& dst, Buffer& scratch)
     {
         GAPI_Assert(src.meta().chan <= 4);
 
         if (dst.y() == 0)
         {
             const int chan = src.meta().chan;
-            float* sc = scratch.OutLine<float>();
+            float* _scratch = scratch.OutLine<float>();
 
-            for (int i = 0; i < scratch.length(); ++i)
-                sc[i] = static_cast<float>(_scalar[i % chan]);
+            scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
         }
 
         const float* scalar = scratch.OutLine<float>();
@@ -1115,10 +1120,9 @@ GAPI_FLUID_KERNEL(GFluidSubC, cv::gapi::core::GSubC, true)
         if (dst.y() == 0)
         {
             const int chan = src.meta().chan;
-            float* sc = scratch.OutLine<float>();
+            float* _scratch = scratch.OutLine<float>();
 
-            for (int i = 0; i < scratch.length(); ++i)
-                sc[i] = static_cast<float>(_scalar[i % chan]);
+            scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
         }
 
         const float* scalar = scratch.OutLine<float>();
@@ -1165,10 +1169,9 @@ GAPI_FLUID_KERNEL(GFluidSubRC, cv::gapi::core::GSubRC, true)
         if (dst.y() == 0)
         {
             const int chan = src.meta().chan;
-            float* sc = scratch.OutLine<float>();
+            float* _scratch = scratch.OutLine<float>();
 
-            for (int i = 0; i < scratch.length(); ++i)
-                sc[i] = static_cast<float>(_scalar[i % chan]);
+            scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
         }
 
         const float* scalar = scratch.OutLine<float>();
@@ -1216,10 +1219,9 @@ GAPI_FLUID_KERNEL(GFluidMulC, cv::gapi::core::GMulC, true)
         if (dst.y() == 0)
         {
             const int chan = src.meta().chan;
-            float* sc = scratch.OutLine<float>();
+            float* _scratch = scratch.OutLine<float>();
 
-            for (int i = 0; i < scratch.length(); ++i)
-                sc[i] = static_cast<float>(_scalar[i % chan]);
+            scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
         }
         const float* scalar = scratch.OutLine<float>();
         const float scale = 1.0;
@@ -1259,7 +1261,7 @@ GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, true)
 {
     static const int Window = 1;
 
-    static void run(const View &src, double _scalar, int /*dtype*/, Buffer &dst, Buffer& scratch)
+    static void run(const View& src, double _scalar, int /*dtype*/, Buffer& dst, Buffer& scratch)
     {
         GAPI_Assert(src.meta().chan <= 4);
 
@@ -1295,32 +1297,109 @@ GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, true)
     }
 };
 
-GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, false)
+template<typename DST, typename SRC>
+CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch,
+                               float scale)
+{
+    const auto* in = src.InLine<SRC>(0);
+    auto* out = dst.OutLine<DST>();
+    const float* scalar = scratch.OutLine<float>();
+
+    int width = dst.length();
+    int chan = dst.meta().chan;
+    const int length = width * chan;
+
+    int w = 0;
+#if CV_SIMD
+    int scratch_length = scratch.length();
+    int indicator_offset = scratch_length - 1;
+    const int set_mask_indicator = static_cast<int>(*(scratch.OutLine<float>() + (indicator_offset)));
+
+    w = divc_simd(in, scalar, out, length, chan, scale, set_mask_indicator);
+#endif
+
+    for (; w < length; ++w)
+        out[w] = div<DST>(in[w], scalar[w % chan], scale);
+}
+
+GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
 {
     static const int Window = 1;
 
-    static void run(const View &src, const cv::Scalar &_scalar, double _scale, int /*dtype*/,
-                    Buffer &dst)
+    static void run(const View& src, const cv::Scalar& _scalar, double _scale, int /*dtype*/,
+                    Buffer& dst, Buffer& scratch)
     {
-        const float scalar[4] = {
-            static_cast<float>(_scalar[0]),
-            static_cast<float>(_scalar[1]),
-            static_cast<float>(_scalar[2]),
-            static_cast<float>(_scalar[3])
-        };
-        const float scale = static_cast<float>(_scale);
+        GAPI_Assert(src.meta().chan <= 4);
+
+        if (dst.y() == 0)
+        {
+            const int chan = src.meta().chan;
+            float* _scratch = scratch.OutLine<float>();
+            int scratch_length = scratch.length();
+
+            scalar_to_scratch(_scalar, _scratch, scratch_length - 1, chan);
+
+            _scratch[scratch_length - 1] = 0.0;
+            for (int j = 0; j < chan; ++j)
+            {
+                if (std::fabs(static_cast<float>(_scalar[j])) <= FLT_EPSILON)
+                {
+                    _scratch[scratch_length - 1] = 1.0;
+                    break;
+                }
+            }
+        }
+
+        float scale = static_cast<float>(_scale);
 
         //     DST     SRC     OP            __VA_ARGS__
-        UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
-        UNARY_(uchar ,  short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
-        UNARY_(uchar ,  float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
-        UNARY_( short,  short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
-        UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
-        UNARY_( float,  short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
-        UNARY_( float,  float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
+        UNARY_(uchar,  uchar,  run_divc, dst, src, scratch, scale);
+        UNARY_(uchar,  ushort, run_divc, dst, src, scratch, scale);
+        UNARY_(uchar,  short,  run_divc, dst, src, scratch, scale);
+        UNARY_(uchar,  float,  run_divc, dst, src, scratch, scale);
+        UNARY_(ushort, ushort, run_divc, dst, src, scratch, scale);
+        UNARY_(ushort, uchar,  run_divc, dst, src, scratch, scale);
+        UNARY_(ushort, short,  run_divc, dst, src, scratch, scale);
+        UNARY_(ushort, float,  run_divc, dst, src, scratch, scale);
+        UNARY_(short,  short,  run_divc, dst, src, scratch, scale);
+        UNARY_(short,  ushort, run_divc, dst, src, scratch, scale);
+        UNARY_(short,  uchar,  run_divc, dst, src, scratch, scale);
+        UNARY_(short,  float,  run_divc, dst, src, scratch, scale);
+        UNARY_(float,  uchar,  run_divc, dst, src, scratch, scale);
+        UNARY_(float,  short,  run_divc, dst, src, scratch, scale);
+        UNARY_(float,  ushort, run_divc, dst, src, scratch, scale);
+        UNARY_(float,  float,  run_divc, dst, src, scratch, scale);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
+
+    static void initScratch(const GMatDesc&, const GScalarDesc&, double, int, Buffer& scratch)
+    {
+#if CV_SIMD
+            // 512 bits / 32 bits = 16 elements of float32 a AVX512 SIMD vector can contain.
+            constexpr int maxNlanes = 16;
+
+            // +2 is offset for 3-channel case.
+            // Offset is need to right load coefficients from scalar array to SIMD vectors for 3-channel case.
+            // Scalar array looks like: scalar[] = {C1, C2, C3, C1, C2, C3, ...}
+            // The first scalar SIMD vector should looks like:
+            // C1 C2 C3 C1
+            // The second:
+            // C2 C3 C1 C2
+            // The third:
+            // C3 C1 C2 C3
+            constexpr int offset = 2;
+            constexpr int zero_scalar_elem_indicator = 1;
+            constexpr int buflen = maxNlanes + offset + zero_scalar_elem_indicator;
+#else
+            constexpr int buflen = 4;
+#endif
+            setScratchSize(scratch, buflen);
+    }
+
+    static void resetScratch(Buffer& /*scratch*/)
+    {
+    }
 };
 
 GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false)
@@ -2509,26 +2588,18 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)
 
     static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)
     {
-        const auto *in   =  src.InLine<uchar>(0);
+        const auto *in   = src.InLine<uchar>(0);
               auto *out1 = dst1.OutLine<uchar>();
               auto *out2 = dst2.OutLine<uchar>();
               auto *out3 = dst3.OutLine<uchar>();
 
         GAPI_Assert(3 == src.meta().chan);
         int width = src.length();
+        int w = 0;
 
-        int w = 0; // cycle counter
-
-    #if CV_SIMD128
-        for (; w <= width-16; w+=16)
-        {
-            v_uint8x16 a, b, c;
-            v_load_deinterleave(&in[3*w], a, b, c);
-            v_store(&out1[w], a);
-            v_store(&out2[w], b);
-            v_store(&out3[w], c);
-        }
-    #endif
+#if CV_SIMD
+        w = split3_simd(in, out1, out2, out3, width);
+#endif
 
         for (; w < width; w++)
         {
@@ -2545,7 +2616,7 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)
 
     static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4)
     {
-        const auto *in   =  src.InLine<uchar>(0);
+        const auto *in   = src.InLine<uchar>(0);
               auto *out1 = dst1.OutLine<uchar>();
               auto *out2 = dst2.OutLine<uchar>();
               auto *out3 = dst3.OutLine<uchar>();
@@ -2553,19 +2624,10 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)
 
         GAPI_Assert(4 == src.meta().chan);
         int width = src.length();
+        int w = 0;
 
-        int w = 0; // cycle counter
-
-    #if CV_SIMD128
-        for (; w <= width-16; w+=16)
-        {
-            v_uint8x16 a, b, c, d;
-            v_load_deinterleave(&in[4*w], a, b, c, d);
-            v_store(&out1[w], a);
-            v_store(&out2[w], b);
-            v_store(&out3[w], c);
-            v_store(&out4[w], d);
-        }
+    #if CV_SIMD
+        w = split4_simd(in, out1, out2, out3, out4, width);
     #endif
 
         for (; w < width; w++)
@@ -2591,18 +2653,10 @@ GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false)
 
         GAPI_Assert(3 == dst.meta().chan);
         int width = dst.length();
+        int w = 0;
 
-        int w = 0; // cycle counter
-
-    #if CV_SIMD128
-        for (; w <= width-16; w+=16)
-        {
-            v_uint8x16 a, b, c;
-            a = v_load(&in1[w]);
-            b = v_load(&in2[w]);
-            c = v_load(&in3[w]);
-            v_store_interleave(&out[3*w], a, b, c);
-        }
+    #if CV_SIMD
+        w = merge3_simd(in1, in2, in3, out, width);
     #endif
 
         for (; w < width; w++)
@@ -2632,16 +2686,8 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)
 
         int w = 0; // cycle counter
 
-    #if CV_SIMD128
-        for (; w <= width-16; w+=16)
-        {
-            v_uint8x16 a, b, c, d;
-            a = v_load(&in1[w]);
-            b = v_load(&in2[w]);
-            c = v_load(&in3[w]);
-            d = v_load(&in4[w]);
-            v_store_interleave(&out[4*w], a, b, c, d);
-        }
+    #if CV_SIMD
+        w = merge4_simd(in1, in2, in3, in4, out, width);
     #endif
 
         for (; w < width; w++)
diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
index 348c00ed12..9afac9ceb4 100644
--- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp
@@ -192,6 +192,34 @@ MULC_SIMD(float, float)
 
 #undef MULC_SIMD
 
+#define DIVC_SIMD(SRC, DST)                                                              \
+int divc_simd(const SRC in[], const float scalar[], DST out[],                           \
+              const int length, const int chan, const float scale,                       \
+              const int set_mask_flag)                                                   \
+{                                                                                        \
+    CV_CPU_DISPATCH(divc_simd, (in, scalar, out, length, chan, scale, set_mask_flag),    \
+                    CV_CPU_DISPATCH_MODES_ALL);                                          \
+}
+
+DIVC_SIMD(uchar, uchar)
+DIVC_SIMD(ushort, uchar)
+DIVC_SIMD(short, uchar)
+DIVC_SIMD(float, uchar)
+DIVC_SIMD(short, short)
+DIVC_SIMD(ushort, short)
+DIVC_SIMD(uchar, short)
+DIVC_SIMD(float, short)
+DIVC_SIMD(ushort, ushort)
+DIVC_SIMD(uchar, ushort)
+DIVC_SIMD(short, ushort)
+DIVC_SIMD(float, ushort)
+DIVC_SIMD(uchar, float)
+DIVC_SIMD(ushort, float)
+DIVC_SIMD(short, float)
+DIVC_SIMD(float, float)
+
+#undef DIVC_SIMD
+
 #define ABSDIFFC_SIMD(SRC)                                               \
 int absdiffc_simd(const SRC in[], const float scalar[], SRC out[],       \
                   const int length, const int chan)                      \
@@ -207,6 +235,34 @@ ABSDIFFC_SIMD(float)
 
 #undef ABSDIFFC_SIMD
 
+int split3_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], const int width)
+{
+    CV_CPU_DISPATCH(split3_simd, (in, out1, out2, out3, width),
+                    CV_CPU_DISPATCH_MODES_ALL);
+}
+
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], uchar out4[], const int width)
+{
+    CV_CPU_DISPATCH(split4_simd, (in, out1, out2, out3, out4, width),
+                    CV_CPU_DISPATCH_MODES_ALL);
+}
+
+int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+                uchar out[], const int width)
+{
+    CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width),
+                    CV_CPU_DISPATCH_MODES_ALL);
+}
+
+int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+                const uchar in4[], uchar out[], const int width)
+{
+    CV_CPU_DISPATCH(merge4_simd, (in1, in2, in3, in4, out, width),
+                    CV_CPU_DISPATCH_MODES_ALL);
+}
+
 } // namespace fluid
 } // namespace gapi
 } // namespace cv
diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp
index 6023a879d9..868923932d 100644
--- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp
@@ -152,6 +152,30 @@ MULC_SIMD(float, float)
 
 #undef MULC_SIMD
 
+#define DIVC_SIMD(SRC, DST)                                                              \
+int divc_simd(const SRC in[], const float scalar[], DST out[],                           \
+              const int length, const int chan, const float scale,                       \
+              const int set_mask_flag);
+
+DIVC_SIMD(uchar, uchar)
+DIVC_SIMD(ushort, uchar)
+DIVC_SIMD(short, uchar)
+DIVC_SIMD(float, uchar)
+DIVC_SIMD(short, short)
+DIVC_SIMD(ushort, short)
+DIVC_SIMD(uchar, short)
+DIVC_SIMD(float, short)
+DIVC_SIMD(ushort, ushort)
+DIVC_SIMD(uchar, ushort)
+DIVC_SIMD(short, ushort)
+DIVC_SIMD(float, ushort)
+DIVC_SIMD(uchar, float)
+DIVC_SIMD(ushort, float)
+DIVC_SIMD(short, float)
+DIVC_SIMD(float, float)
+
+#undef DIVC_SIMD
+
 #define ABSDIFFC_SIMD(T)                                            \
 int absdiffc_simd(const T in[], const float scalar[], T out[],      \
                   const int length, const int chan);
@@ -163,6 +187,18 @@ ABSDIFFC_SIMD(float)
 
 #undef ABSDIFFC_SIMD
 
+int split3_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], const int width);
+
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], uchar out4[], const int width);
+
+int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+               uchar out[], const int width);
+
+int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+                const uchar in4[], uchar out[], const int width);
+
 }  // namespace fluid
 }  // namespace gapi
 }  // namespace cv
diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
index 38c47072f4..2424a57677 100644
--- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp
@@ -173,6 +173,30 @@ MULC_SIMD(float, float)
 
 #undef MULC_SIMD
 
+#define DIVC_SIMD(SRC, DST)                                                              \
+int divc_simd(const SRC in[], const float scalar[], DST out[],                           \
+              const int length, const int chan, const float scale,                       \
+              const int set_mask_flag);
+
+DIVC_SIMD(uchar, uchar)
+DIVC_SIMD(ushort, uchar)
+DIVC_SIMD(short, uchar)
+DIVC_SIMD(float, uchar)
+DIVC_SIMD(short, short)
+DIVC_SIMD(ushort, short)
+DIVC_SIMD(uchar, short)
+DIVC_SIMD(float, short)
+DIVC_SIMD(ushort, ushort)
+DIVC_SIMD(uchar, ushort)
+DIVC_SIMD(short, ushort)
+DIVC_SIMD(float, ushort)
+DIVC_SIMD(uchar, float)
+DIVC_SIMD(ushort, float)
+DIVC_SIMD(short, float)
+DIVC_SIMD(float, float)
+
+#undef DIVC_SIMD
+
 #define ABSDIFFC_SIMD(T)                                            \
 int absdiffc_simd(const T in[], const float scalar[], T out[],      \
                   const int length, const int chan);
@@ -184,6 +208,18 @@ ABSDIFFC_SIMD(float)
 
 #undef ABSDIFFC_SIMD
 
+int split3_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], const int width);
+
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], uchar out4[], const int width);
+
+int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+                uchar out[], const int width);
+
+int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+                const uchar in4[], uchar out[], const int width);
+
 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
 struct scale_tag {};
@@ -935,6 +971,7 @@ struct add_tag {};
 struct sub_tag {};
 struct subr_tag {};
 struct mul_tag {};
+struct div_tag {};
 struct absdiff_tag {};
 
 CV_ALWAYS_INLINE void arithmOpScalar_pack_store_c3(short* outx,       const v_int32& c1,
@@ -979,6 +1016,21 @@ CV_ALWAYS_INLINE v_float32 oper(mul_tag, const v_float32& a, const v_float32& sc
     return a * sc;
 }
 
+CV_ALWAYS_INLINE v_float32 oper_scaled(mul_tag, const v_float32& a, const v_float32& v_scalar, const v_float32& v_scale)
+{
+    return v_scale * a * v_scalar;
+}
+
+CV_ALWAYS_INLINE v_float32 oper(div_tag, const v_float32& a, const v_float32& sc)
+{
+    return a / sc;
+}
+
+CV_ALWAYS_INLINE v_float32 oper_scaled(div_tag, const v_float32& a, const v_float32& v_scalar, const v_float32& v_scale)
+{
+    return a*v_scale / v_scalar;
+}
+
 CV_ALWAYS_INLINE v_float32 oper(absdiff_tag, const v_float32& a, const v_float32& sc)
 {
     return v_absdiff(a, sc);
@@ -1288,16 +1340,17 @@ SUBRC_SIMD(float, float)
 
 //-------------------------
 //
-// Fluid kernels: MulC
+// Fluid kernels: MulC, DivC
 //
 //-------------------------
 
-template<typename SRC, typename DST>
+template<typename oper_tag, typename SRC, typename DST>
 CV_ALWAYS_INLINE
 typename std::enable_if<std::is_same<DST, short>::value ||
                         std::is_same<DST, ushort>::value, void>::type
-mulc_scale_simd_c3_impl(const SRC* inx, DST* outx, const v_float32& s1, const v_float32& s2,
-                        const v_float32& s3, const v_float32& scale, const int nlanes)
+arithmOpScalarScaled_simd_c3_impl(oper_tag op, SRC* inx, DST* outx, const v_float32& s1,
+                                  const v_float32& s2, const v_float32& s3,
+                                  const v_float32& v_scale, const int nlanes)
 {
     v_float32 a1 = vg_load_f32(inx);
     v_float32 a2 = vg_load_f32(&inx[nlanes / 2]);
@@ -1306,62 +1359,64 @@ mulc_scale_simd_c3_impl(const SRC* inx, DST* outx, const v_float32& s1, const v_
     v_float32 a5 = vg_load_f32(&inx[2 * nlanes]);
     v_float32 a6 = vg_load_f32(&inx[5 * nlanes / 2]);
 
-    arithmOpScalar_pack_store_c3(outx, v_round(scale*a1*s1),
-                                       v_round(scale*a2*s2),
-                                       v_round(scale*a3*s3),
-                                       v_round(scale*a4*s1),
-                                       v_round(scale*a5*s2),
-                                       v_round(scale*a6*s3));
+    arithmOpScalar_pack_store_c3(outx, v_round(oper_scaled(op, a1, s1, v_scale)),
+                                       v_round(oper_scaled(op, a2, s2, v_scale)),
+                                       v_round(oper_scaled(op, a3, s3, v_scale)),
+                                       v_round(oper_scaled(op, a4, s1, v_scale)),
+                                       v_round(oper_scaled(op, a5, s2, v_scale)),
+                                       v_round(oper_scaled(op, a6, s3, v_scale)));
 }
 
 //-------------------------------------------------------------------------------------------------
 
-template<typename SRC>
-CV_ALWAYS_INLINE void mulc_scale_simd_c3_impl(const SRC* inx, uchar* outx,
-                                              const v_float32& s1, const v_float32& s2,
-                                              const v_float32& s3, const v_float32& scale, const int nlanes)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_c3_impl(oper_tag op, const SRC* inx, uchar* outx,
+                                                        const v_float32& s1, const v_float32& s2,
+                                                        const v_float32& s3, const v_float32& v_scale,
+                                                        const int nlanes)
 {
     vx_store(outx,
-               v_pack_u(v_pack(v_round(scale * vg_load_f32(inx)* s1),
-                               v_round(scale * vg_load_f32(&inx[nlanes/4])* s2)),
-                        v_pack(v_round(scale * vg_load_f32(&inx[nlanes/2])* s3),
-                               v_round(scale * vg_load_f32(&inx[3*nlanes/4])* s1))));
+               v_pack_u(v_pack(v_round(oper_scaled(op, vg_load_f32(inx), s1, v_scale)),
+                               v_round(oper_scaled(op, vg_load_f32(&inx[nlanes/4]), s2, v_scale))),
+                        v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[nlanes/2]), s3, v_scale)),
+                               v_round(oper_scaled(op, vg_load_f32(&inx[3*nlanes/4]), s1, v_scale)))));
 
     vx_store(&outx[nlanes],
-                v_pack_u(v_pack(v_round(scale * vg_load_f32(&inx[nlanes])* s2),
-                                v_round(scale * vg_load_f32(&inx[5*nlanes/4])* s3)),
-                         v_pack(v_round(scale * vg_load_f32(&inx[3*nlanes/2])* s1),
-                                v_round(scale * vg_load_f32(&inx[7*nlanes/4])* s2))));
+                v_pack_u(v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[nlanes]), s2, v_scale)),
+                                v_round(oper_scaled(op, vg_load_f32(&inx[5*nlanes/4]), s3, v_scale))),
+                         v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[3*nlanes/2]), s1, v_scale)),
+                                v_round(oper_scaled(op, vg_load_f32(&inx[7*nlanes/4]), s2, v_scale)))));
 
     vx_store(&outx[2 * nlanes],
-                v_pack_u(v_pack(v_round(scale * vg_load_f32(&inx[2*nlanes])* s3),
-                                v_round(scale * vg_load_f32(&inx[9*nlanes/4])* s1)),
-                         v_pack(v_round(scale * vg_load_f32(&inx[5*nlanes/2])* s2),
-                                v_round(scale * vg_load_f32(&inx[11*nlanes/4])* s3))));
+                v_pack_u(v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[2*nlanes]), s3, v_scale)),
+                                v_round(oper_scaled(op, vg_load_f32(&inx[9*nlanes/4]), s1, v_scale))),
+                         v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[5*nlanes/2]), s2, v_scale)),
+                                v_round(oper_scaled(op, vg_load_f32(&inx[11*nlanes/4]), s3, v_scale)))));
 }
 
 //-------------------------------------------------------------------------------------------------
 
-template<typename SRC>
-CV_ALWAYS_INLINE void mulc_scale_simd_c3_impl(const SRC* in, float* out,
-                                        const v_float32& s1, const v_float32& s2,
-                                        const v_float32& s3, const v_float32& scale, const int nlanes)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_c3_impl(oper_tag op, const SRC* in, float* out,
+                                                        const v_float32& s1, const v_float32& s2,
+                                                        const v_float32& s3, const v_float32& v_scale,
+                                                        const int nlanes)
 {
     v_float32 a1 = vg_load_f32(in);
     v_float32 a2 = vg_load_f32(&in[nlanes]);
     v_float32 a3 = vg_load_f32(&in[2*nlanes]);
 
-    vx_store(out, scale * a1* s1);
-    vx_store(&out[nlanes], scale * a2* s2);
-    vx_store(&out[2*nlanes], scale * a3* s3);
+    vx_store(out, oper_scaled(op, a1, s1, v_scale));
+    vx_store(&out[nlanes], oper_scaled(op, a2, s2, v_scale));
+    vx_store(&out[2*nlanes], oper_scaled(op, a3, s3, v_scale));
 }
 
 //-------------------------------------------------------------------------------------------------
 
-template<typename SRC, typename DST>
-CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[],
-                                        const float scalar[], DST out[],
-                                        const int length, const float _scale)
+template<typename oper_tag, typename SRC, typename DST>
+CV_ALWAYS_INLINE int arithmOpScalarScaled_simd_c3(oper_tag op, const SRC in[],
+                                                  const float scalar[], DST out[],
+                                                  const int length, const float scale)
 {
     constexpr int chan = 3;
     constexpr int nlanes = vector_type_of_t<DST>::nlanes;
@@ -1370,7 +1425,7 @@ CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[],
     if (length < lanes)
         return 0;
 
-    v_float32 scale = vx_setall_f32(_scale);
+    v_float32 v_scale = vx_setall_f32(scale);
 
     v_float32 s1 = vx_load(scalar);
 #if CV_SIMD_WIDTH == 32
@@ -1386,7 +1441,7 @@ CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[],
     {
         for (; x <= length - lanes; x += lanes)
         {
-            mulc_scale_simd_c3_impl(&in[x], &out[x], s1, s2, s3, scale, nlanes);
+            arithmOpScalarScaled_simd_c3_impl(op, &in[x], &out[x], s1, s2, s3, v_scale, nlanes);
         }
 
         if (x < length)
@@ -1401,70 +1456,70 @@ CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[],
 
 //-------------------------------------------------------------------------------------------------
 
-template<typename SRC, typename DST>
+template<typename oper_tag, typename SRC, typename DST>
 CV_ALWAYS_INLINE
 typename std::enable_if<(std::is_same<DST, ushort>::value ||
                          std::is_same<DST, short>::value), void>::type
-mulc_scale_simd_common_impl(const SRC* inx, DST* outx,
-                            const v_float32& sc, const v_float32& scale,
-                            const int nlanes)
+arithmOpScalarScaled_simd_common_impl(oper_tag op, const SRC* inx, DST* outx,
+                                      const v_float32& v_scalar, const v_float32& v_scale,
+                                      const int nlanes)
 {
     v_float32 a1 = vg_load_f32(inx);
     v_float32 a2 = vg_load_f32(&inx[nlanes/2]);
 
-    v_store_i16(outx, v_round(scale * a1* sc), v_round(scale * a2* sc));
+    v_store_i16(outx, v_round(oper_scaled(op, a1, v_scalar, v_scale)), v_round(oper_scaled(op, a2, v_scalar, v_scale)));
 }
 
 //-------------------------------------------------------------------------------------------------
 
-template<typename SRC>
-CV_ALWAYS_INLINE void mulc_scale_simd_common_impl(const SRC* inx,
-                                                  uchar* outx, const v_float32& sc,
-                                                  const v_float32& scale, const int nlanes)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_common_impl(oper_tag op, const SRC* inx,
+                                                            uchar* outx, const v_float32& v_scalar,
+                                                            const v_float32& v_scale, const int nlanes)
 {
     v_float32 a1 = vg_load_f32(inx);
     v_float32 a2 = vg_load_f32(&inx[nlanes/4]);
     v_float32 a3 = vg_load_f32(&inx[nlanes/2]);
     v_float32 a4 = vg_load_f32(&inx[3 * nlanes/4]);
 
-    vx_store(outx, v_pack_u(v_pack(v_round(scale * a1* sc),
-                                   v_round(scale * a2* sc)),
-                            v_pack(v_round(scale * a3* sc),
-                                   v_round(scale * a4* sc))));
+    vx_store(outx, v_pack_u(v_pack(v_round(oper_scaled(op, a1, v_scalar, v_scale)),
+                                   v_round(oper_scaled(op, a2, v_scalar, v_scale))),
+                            v_pack(v_round(oper_scaled(op, a3, v_scalar, v_scale)),
+                                   v_round(oper_scaled(op, a4, v_scalar, v_scale)))));
 }
 
 //-------------------------------------------------------------------------------------------------
 
-template<typename SRC>
-CV_ALWAYS_INLINE void mulc_scale_simd_common_impl(const SRC* inx,
-                                                  float* outx, const v_float32& sc,
-                                                  const v_float32& scale, const int)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_common_impl(oper_tag op, const SRC* inx,
+                                                            float* outx, const v_float32& v_scalar,
+                                                            const v_float32& v_scale, const int)
 {
-    v_float32 a1 = vg_load_f32(inx);
-    vx_store(outx, scale * a1* sc);
+    v_float32 a = vg_load_f32(inx);
+    vx_store(outx, oper_scaled(op, a, v_scalar, v_scale));
 }
 
 //-------------------------------------------------------------------------------------------------
 
-template<typename SRC, typename DST>
-CV_ALWAYS_INLINE int mulc_scale_simd_common(const SRC in[],
-                                            const float scalar[], DST out[],
-                                            const int length, const float _scale)
+template<typename oper_tag, typename SRC, typename DST>
+CV_ALWAYS_INLINE int arithmOpScalarScaled_simd_common(oper_tag op, const SRC in[],
+                                                      const float scalar[], DST out[],
+                                                      const int length, const float scale)
 {
     constexpr int nlanes = vector_type_of_t<DST>::nlanes;
 
     if (length < nlanes)
         return 0;
 
-    v_float32 _scalar = vx_load(scalar);
-    v_float32 scale = vx_setall_f32(_scale);
+    v_float32 v_scalar = vx_load(scalar);
+    v_float32 v_scale = vx_setall_f32(scale);
 
     int x = 0;
     for (;;)
     {
         for (; x <= length - nlanes; x += nlanes)
         {
-            mulc_scale_simd_common_impl(&in[x], &out[x], _scalar, scale, nlanes);
+            arithmOpScalarScaled_simd_common_impl(op, &in[x], &out[x], v_scalar, v_scale, nlanes);
         }
 
         if (x < length)
@@ -1477,6 +1532,8 @@ CV_ALWAYS_INLINE int mulc_scale_simd_common(const SRC in[],
     return x;
 }
 
+//-------------------------------------------------------------------------------------------------
+
 #define MULC_SIMD(SRC, DST)                                                    \
 int mulc_simd(const SRC in[], const float scalar[], DST out[],                 \
               const int length, const int chan, const float scale)             \
@@ -1495,7 +1552,8 @@ int mulc_simd(const SRC in[], const float scalar[], DST out[],                 \
         }                                                                      \
         else                                                                   \
         {                                                                      \
-            return mulc_scale_simd_common(in, scalar, out, length, scale);     \
+            return arithmOpScalarScaled_simd_common(op_t, in, scalar, out,     \
+                   length, scale);                                             \
         }                                                                      \
     }                                                                          \
     case 3:                                                                    \
@@ -1507,7 +1565,8 @@ int mulc_simd(const SRC in[], const float scalar[], DST out[],                 \
         }                                                                      \
         else                                                                   \
         {                                                                      \
-            return mulc_scale_simd_c3(in, scalar, out, length, scale);         \
+            return arithmOpScalarScaled_simd_c3(op_t, in, scalar, out,         \
+                                                length, scale);                \
         }                                                                      \
     }                                                                          \
     default:                                                                   \
@@ -1536,6 +1595,355 @@ MULC_SIMD(float, float)
 
 #undef MULC_SIMD
 
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC, typename DST>
+CV_ALWAYS_INLINE
+typename std::enable_if<(std::is_same<DST, ushort>::value ||
+                         std::is_same<DST, short>::value), int>::type
+divc_simd_common_impl(scale_tag_t s_tag, const SRC in[], DST out[],
+                      const v_float32& v_scalar, const v_float32& v_scale,
+                      const int length)
+{
+    constexpr int nlanes = vector_type_of_t<DST>::nlanes;
+
+    v_float32 v_zero = vx_setzero_f32();
+    v_float32 v_mask = (v_scalar == v_zero);
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - nlanes; x += nlanes)
+        {
+            v_float32 a1 = vg_load_f32(&in[x]);
+            v_float32 a2 = vg_load_f32(&in[x + nlanes/2]);
+
+            v_store_i16(&out[x], v_round(v_select(v_mask, v_zero, div_op(s_tag, a1, v_scalar, v_scale))),
+                                 v_round(v_select(v_mask, v_zero, div_op(s_tag, a2, v_scalar, v_scale))));
+        }
+
+        if (x < length)
+        {
+            x = length - nlanes;
+            continue;  // process unaligned tail
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC>
+CV_ALWAYS_INLINE int divc_simd_common_impl(scale_tag_t s_tag, const SRC in[],
+                                           uchar out[], const v_float32& v_scalar,
+                                           const v_float32& v_scale, const int length)
+{
+    constexpr int nlanes = v_uint8::nlanes;
+
+    v_float32 v_zero = vx_setzero_f32();
+    v_float32 v_mask = (v_scalar == v_zero);
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - nlanes; x += nlanes)
+        {
+            v_float32 a1 = vg_load_f32(&in[x]);
+            v_float32 a2 = vg_load_f32(&in[x + nlanes/4]);
+            v_float32 a3 = vg_load_f32(&in[x + nlanes/2]);
+            v_float32 a4 = vg_load_f32(&in[x + 3 * nlanes/4]);
+
+            vx_store(&out[x], v_pack_u(v_pack(v_round(v_select(v_mask, v_zero, div_op(s_tag, a1, v_scalar, v_scale))),
+                                              v_round(v_select(v_mask, v_zero, div_op(s_tag, a2, v_scalar, v_scale)))),
+                                       v_pack(v_round(v_select(v_mask, v_zero, div_op(s_tag, a3, v_scalar, v_scale))),
+                                              v_round(v_select(v_mask, v_zero, div_op(s_tag, a4, v_scalar, v_scale))))));
+        }
+
+        if (x < length)
+        {
+            x = length - nlanes;
+            continue;  // process unaligned tail
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC>
+CV_ALWAYS_INLINE int divc_simd_common_impl(scale_tag_t s_tag, const SRC in[],
+                                           float out[], const v_float32& v_scalar,
+                                           const v_float32& v_scale, const int length)
+{
+    constexpr int nlanes = v_float32::nlanes;
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - nlanes; x += nlanes)
+        {
+            v_float32 a1 = vg_load_f32(&in[x]);
+            vx_store(&out[x], div_op(s_tag, a1, v_scalar, v_scale));
+        }
+
+        if (x < length)
+        {
+            x = length - nlanes;
+            continue;  // process unaligned tail
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC, typename DST>
+CV_ALWAYS_INLINE int divc_mask_simd_common(scale_tag_t tag, const SRC in[],
+                                           const float scalar[], DST out[],
+                                           const int length, const float scale)
+{
+    constexpr int nlanes = vector_type_of_t<DST>::nlanes;
+
+    if (length < nlanes)
+        return 0;
+
+    v_float32 v_scalar = vx_load(scalar);
+    v_float32 v_scale = vx_setall_f32(scale);
+    return divc_simd_common_impl(tag, in, out, v_scalar, v_scale, length);
+}
+
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC, typename DST>
+CV_ALWAYS_INLINE
+typename std::enable_if<std::is_same<DST, short>::value ||
+                        std::is_same<DST, ushort>::value, int>::type
+divc_simd_c3_impl(scale_tag_t s_tag, SRC in[], DST out[], const v_float32& s1,
+                  const v_float32& s2, const v_float32& s3,
+                  const v_float32& v_scale, const int length,
+                  const int nlanes, const int lanes)
+{
+    v_float32 v_zero = vx_setzero_f32();
+    v_float32 v_mask1 = (s1 == v_zero);
+    v_float32 v_mask2 = (s2 == v_zero);
+    v_float32 v_mask3 = (s3 == v_zero);
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - lanes; x += lanes)
+        {
+            v_float32 a1 = vg_load_f32(&in[x]);
+            v_float32 a2 = vg_load_f32(&in[x + nlanes / 2]);
+            v_float32 a3 = vg_load_f32(&in[x + nlanes]);
+            v_float32 a4 = vg_load_f32(&in[x + 3 * nlanes / 2]);
+            v_float32 a5 = vg_load_f32(&in[x + 2 * nlanes]);
+            v_float32 a6 = vg_load_f32(&in[x + 5 * nlanes / 2]);
+
+            arithmOpScalar_pack_store_c3(&out[x], v_round(v_select(v_mask1, v_zero, div_op(s_tag, a1, s1, v_scale))),
+                                               v_round(v_select(v_mask2, v_zero, div_op(s_tag, a2, s2, v_scale))),
+                                               v_round(v_select(v_mask3, v_zero, div_op(s_tag, a3, s3, v_scale))),
+                                               v_round(v_select(v_mask1, v_zero, div_op(s_tag, a4, s1, v_scale))),
+                                               v_round(v_select(v_mask2, v_zero, div_op(s_tag, a5, s2, v_scale))),
+                                               v_round(v_select(v_mask3, v_zero, div_op(s_tag, a6, s3, v_scale))));
+        }
+
+        if (x < length)
+        {
+            x = length - lanes;
+            continue;  // process unaligned tail
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC>
+CV_ALWAYS_INLINE int divc_simd_c3_impl(scale_tag_t s_tag, const SRC* in, uchar* out,
+                                       const v_float32& s1, const v_float32& s2,
+                                       const v_float32& s3, const v_float32& v_scale,
+                                       const int length, const int nlanes, const int lanes)
+{
+    v_float32 v_zero = vx_setzero_f32();
+    v_float32 v_mask1 = (s1 == v_zero);
+    v_float32 v_mask2 = (s2 == v_zero);
+    v_float32 v_mask3 = (s3 == v_zero);
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - lanes; x += lanes)
+        {
+            vx_store(&out[x],
+                       v_pack_u(v_pack(v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x]), s1, v_scale))),
+                                       v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + nlanes/4]), s2, v_scale)))),
+                                v_pack(v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + nlanes/2]), s3, v_scale))),
+                                       v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x + 3*nlanes/4]), s1, v_scale))))));
+
+            vx_store(&out[x + nlanes],
+                        v_pack_u(v_pack(v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + nlanes]), s2, v_scale))),
+                                        v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + 5*nlanes/4]), s3, v_scale)))),
+                                 v_pack(v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x + 3*nlanes/2]), s1, v_scale))),
+                                        v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + 7*nlanes/4]), s2, v_scale))))));
+
+            vx_store(&out[x + 2 * nlanes],
+                        v_pack_u(v_pack(v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + 2*nlanes]), s3, v_scale))),
+                                        v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x + 9*nlanes/4]), s1, v_scale)))),
+                                 v_pack(v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + 5*nlanes/2]), s2, v_scale))),
+                                        v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + 11*nlanes/4]), s3, v_scale))))));
+        }
+
+        if (x < length)
+        {
+            x = length - lanes;
+            continue;  // process unaligned tail
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC>
+CV_ALWAYS_INLINE int divc_simd_c3_impl(scale_tag_t s_tag, const SRC* in, float* out,
+                                       const v_float32& s1, const v_float32& s2,
+                                       const v_float32& s3, const v_float32& v_scale, const int length,
+                                       const int nlanes, const int lanes)
+{
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= length - lanes; x += lanes)
+        {
+            v_float32 a1 = vg_load_f32(&in[x]);
+            v_float32 a2 = vg_load_f32(&in[x + nlanes]);
+            v_float32 a3 = vg_load_f32(&in[x + 2*nlanes]);
+
+            vx_store(&out[x], div_op(s_tag, a1, s1, v_scale));
+            vx_store(&out[x + nlanes], div_op(s_tag, a2, s2, v_scale));
+            vx_store(&out[x + 2*nlanes], div_op(s_tag, a3, s3, v_scale));
+        }
+
+        if (x < length)
+        {
+            x = length - lanes;
+            continue;  // process unaligned tail
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------------------------------------------------------------------------------
+
+template<typename scale_tag_t, typename SRC, typename DST>
+CV_ALWAYS_INLINE int divc_mask_simd_c3(scale_tag_t s_tag, const SRC in[],
+                                       const float scalar[], DST out[],
+                                       const int length, const float scale)
+{
+    constexpr int chan = 3;
+    constexpr int nlanes = vector_type_of_t<DST>::nlanes;
+    constexpr int lanes = chan * nlanes;
+
+    if (length < lanes)
+        return 0;
+
+    v_float32 v_scale = vx_setall_f32(scale);
+
+    v_float32 s1 = vx_load(scalar);
+#if CV_SIMD_WIDTH == 32
+    v_float32 s2 = vx_load(&scalar[2]);
+    v_float32 s3 = vx_load(&scalar[1]);
+#else
+    v_float32 s2 = vx_load(&scalar[1]);
+    v_float32 s3 = vx_load(&scalar[2]);
+#endif
+     return divc_simd_c3_impl(s_tag, in, out, s1, s2, s3, v_scale, length, nlanes, lanes);
+}
+
+//-------------------------------------------------------------------------------------------------
+
+#define DIVC_SIMD(SRC, DST)                                                    \
+int divc_simd(const SRC in[], const float scalar[], DST out[],                 \
+              const int length, const int chan, const float scale,             \
+              const int set_mask_flag)                                         \
+{                                                                              \
+    switch (chan)                                                              \
+    {                                                                          \
+    case 1:                                                                    \
+    case 2:                                                                    \
+    case 4:                                                                    \
+    {                                                                          \
+        if (std::fabs(scale - 1.0f) <= FLT_EPSILON)                            \
+        {                                                                      \
+            if (set_mask_flag == 1)                                            \
+                return divc_mask_simd_common(not_scale_tag{}, in, scalar,      \
+                                             out, length, scale);              \
+            else                                                               \
+                return arithmOpScalar_simd_common(div_tag{}, in, scalar,       \
+                                                  out, length);                \
+        }                                                                      \
+        else                                                                   \
+        {   if (set_mask_flag == 1)                                            \
+                return divc_mask_simd_common(scale_tag{}, in, scalar,          \
+                                             out, length, scale);              \
+            else                                                               \
+                return arithmOpScalarScaled_simd_common(div_tag{}, in, scalar, \
+                                                        out, length, scale);   \
+        }                                                                      \
+    }                                                                          \
+    case 3:                                                                    \
+    {                                                                          \
+        if (std::fabs(scale - 1.0f) <= FLT_EPSILON)                            \
+        {                                                                      \
+            if (set_mask_flag == 1)                                            \
+                return divc_mask_simd_c3(not_scale_tag{}, in, scalar,          \
+                                             out, length, scale);              \
+            else                                                               \
+                return arithmOpScalar_simd_c3(div_tag{}, in, scalar,           \
+                                              out, length);                    \
+        }                                                                      \
+        else                                                                   \
+        {                                                                      \
+            if (set_mask_flag == 1)                                            \
+                return divc_mask_simd_c3(scale_tag{}, in, scalar,              \
+                                         out, length, scale);                  \
+            else                                                               \
+                return arithmOpScalarScaled_simd_c3(div_tag{}, in, scalar, out,\
+                                                    length, scale);            \
+        }                                                                      \
+    }                                                                          \
+    default:                                                                   \
+        GAPI_Assert(chan <= 4);                                                \
+        break;                                                                 \
+    }                                                                          \
+    return 0;                                                                  \
+}
+
+DIVC_SIMD(uchar, uchar)
+DIVC_SIMD(ushort, uchar)
+DIVC_SIMD(short, uchar)
+DIVC_SIMD(float, uchar)
+DIVC_SIMD(short, short)
+DIVC_SIMD(ushort, short)
+DIVC_SIMD(uchar, short)
+DIVC_SIMD(float, short)
+DIVC_SIMD(ushort, ushort)
+DIVC_SIMD(uchar, ushort)
+DIVC_SIMD(short, ushort)
+DIVC_SIMD(float, ushort)
+DIVC_SIMD(uchar, float)
+DIVC_SIMD(ushort, float)
+DIVC_SIMD(short, float)
+DIVC_SIMD(float, float)
+
+#undef DIVC_SIMD
+
 //-------------------------
 //
 // Fluid kernels: AbsDiffC
@@ -1544,7 +1952,7 @@ MULC_SIMD(float, float)
 
 #define ABSDIFFC_SIMD(SRC)                                                          \
 int absdiffc_simd(const SRC in[], const float scalar[], SRC out[],                  \
-              const int length, const int chan)                                     \
+                  const int length, const int chan)                                 \
 {                                                                                   \
     switch (chan)                                                                   \
     {                                                                               \
@@ -1568,6 +1976,144 @@ ABSDIFFC_SIMD(float)
 
 #undef ABSDIFFC_SIMD
 
+//-------------------------
+//
+// Fluid kernels: Split3
+//
+//-------------------------
+
+int split3_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], const int width)
+{
+    constexpr int nlanes = v_uint8::nlanes;
+    if (width < nlanes)
+        return 0;
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= width - nlanes; x += nlanes)
+        {
+            v_uint8 a, b, c;
+            v_load_deinterleave(&in[3 * x], a, b, c);
+            vx_store(&out1[x], a);
+            vx_store(&out2[x], b);
+            vx_store(&out3[x], c);
+        }
+        if (x < width)
+        {
+            x = width - nlanes;
+            continue;
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------
+//
+// Fluid kernels: Split4
+//
+//-------------------------
+
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+                uchar out3[], uchar out4[], const int width)
+{
+    constexpr int nlanes = v_uint8::nlanes;
+    if (width < nlanes)
+        return 0;
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= width - nlanes; x += nlanes)
+        {
+            v_uint8 a, b, c, d;
+            v_load_deinterleave(&in[4 * x], a, b, c, d);
+            vx_store(&out1[x], a);
+            vx_store(&out2[x], b);
+            vx_store(&out3[x], c);
+            vx_store(&out4[x], d);
+        }
+        if (x < width)
+        {
+            x = width - nlanes;
+            continue;
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------
+//
+// Fluid kernels: Merge3
+//
+//-------------------------
+
+int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+                uchar out[], const int width)
+{
+    constexpr int nlanes = v_uint8::nlanes;
+    if (width < nlanes)
+        return 0;
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= width - nlanes; x += nlanes)
+        {
+            v_uint8 a, b, c;
+            a = vx_load(&in1[x]);
+            b = vx_load(&in2[x]);
+            c = vx_load(&in3[x]);
+            v_store_interleave(&out[3 * x], a, b, c);
+        }
+        if (x < width)
+        {
+            x = width - nlanes;
+            continue;
+        }
+        break;
+    }
+    return x;
+}
+
+//-------------------------
+//
+// Fluid kernels: Merge4
+//
+//-------------------------
+
+int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[],
+                const uchar in4[], uchar out[], const int width)
+{
+    constexpr int nlanes = v_uint8::nlanes;
+    if (width < nlanes)
+        return 0;
+
+    int x = 0;
+    for (;;)
+    {
+        for (; x <= width - nlanes; x += nlanes)
+        {
+            v_uint8 a, b, c, d;
+            a = vx_load(&in1[x]);
+            b = vx_load(&in2[x]);
+            c = vx_load(&in3[x]);
+            d = vx_load(&in4[x]);
+            v_store_interleave(&out[4 * x], a, b, c, d);
+        }
+        if (x < width)
+        {
+            x = width - nlanes;
+            continue;
+        }
+        break;
+    }
+    return x;
+}
+
 #endif  // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
 CV_CPU_OPTIMIZATION_NAMESPACE_END
diff --git a/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp b/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp
index 02fff30977..3f2012807e 100644
--- a/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp
+++ b/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp
@@ -28,7 +28,7 @@
 namespace cv {
 namespace gapi {
 namespace fluid {
-namespace sse42 {
+namespace sse41 {
 
 CV_ALWAYS_INLINE void v_gather_pixel_map(v_uint8x16& vec, const uchar src[], const short* index, const int pos)
 {
@@ -216,8 +216,8 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[],
                                               const int      lpi) {
     bool xRatioEq = inSz.width == outSz.width;
     bool yRatioEq = inSz.height == outSz.height;
-    constexpr int nlanes = 16;
-    constexpr int half_nlanes = 16 / 2;
+    constexpr int nlanes = 16; // number of 8-bit integers that fit into a 128-bit SIMD vector.
+    constexpr int half_nlanes = nlanes / 2;
     constexpr int chanNum = 3;
 
     if (!xRatioEq && !yRatioEq) {
@@ -235,7 +235,7 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[],
 
             for (int w = 0; w < inSz.width * chanNum; ) {
                 for (; w <= inSz.width * chanNum - half_nlanes && w >= 0; w += half_nlanes) {
-#ifdef __i386__
+#if defined(__i386__) || defined(_M_IX86)
                     __m128i val0lo = _mm_castpd_si128(_mm_loadh_pd(
                                                       _mm_load_sd(reinterpret_cast<const double*>(&src0[0][w])),
                                                                   reinterpret_cast<const double*>(&src0[1][w])));
@@ -298,84 +298,36 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[],
 
             // horizontal pass
             __m128i horizontal_shuf_mask = _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
-
-            for (int x = 0; outSz.width >= nlanes; )
+            __m128i horizontal_shuf_mask1 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
+            constexpr int nproc_pixels = 5;
+            for (int x = 0; ; )
             {
-                for (; x <= outSz.width - nlanes; x += nlanes)
+                for (; x <= outSz.width - (nproc_pixels + 1); x += nproc_pixels)
                 {
-#ifdef _WIN64
+#ifdef _MSC_VER
                     __m128i a00 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * x]), *reinterpret_cast<const int64_t*>(&clone[4 * x]));
-                    __m128i a01 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * x]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 1)]));
-                    __m128i a11 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 1)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 1)]));
-                    __m128i a22 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 2)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 2)]));
-                    __m128i a23 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 2)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 3)]));
-                    __m128i a33 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 3)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 3)]));
-                    __m128i a44 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 4)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 4)]));
-                    __m128i a45 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 4)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 5)]));
-                    __m128i a55 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 5)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 5)]));
-                    __m128i a66 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 6)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 6)]));
-                    __m128i a67 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 6)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 7)]));
-                    __m128i a77 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 7)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 7)]));
-                    __m128i a88 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 8)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 8)]));
-                    __m128i a89 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 8)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 9)]));
-                    __m128i a99 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 9)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 9)]));
-                    __m128i a1010 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 10)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 10)]));
-                    __m128i a1011 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 10)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 11)]));
-                    __m128i a1111 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 11)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 11)]));
-                    __m128i a1212 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 12)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 12)]));
-                    __m128i a1213 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 12)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 13)]));
-                    __m128i a1313 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 13)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 13)]));
-                    __m128i a1414 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 14)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 14)]));
-                    __m128i a1415 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 14)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 15)]));
-                    __m128i a1515 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 15)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 15)]));
 #else
                     __m128i a00 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * x]), *reinterpret_cast<const __m64*>(&clone[4 * x]));
-                    __m128i a01 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * x]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 1)]));
-                    __m128i a11 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 1)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 1)]));
-                    __m128i a22 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 2)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 2)]));
-                    __m128i a23 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 2)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 3)]));
-                    __m128i a33 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 3)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 3)]));
-                    __m128i a44 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 4)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 4)]));
-                    __m128i a45 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 4)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 5)]));
-                    __m128i a55 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 5)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 5)]));
-                    __m128i a66 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 6)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 6)]));
-                    __m128i a67 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 6)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 7)]));
-                    __m128i a77 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 7)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 7)]));
-                    __m128i a88 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 8)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 8)]));
-                    __m128i a89 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 8)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 9)]));
-                    __m128i a99 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 9)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 9)]));
-                    __m128i a1010 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 10)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 10)]));
-                    __m128i a1011 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 10)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 11)]));
-                    __m128i a1111 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 11)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 11)]));
-                    __m128i a1212 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 12)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 12)]));
-                    __m128i a1213 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 12)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 13)]));
-                    __m128i a1313 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 13)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 13)]));
-                    __m128i a1414 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 14)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 14)]));
-                    __m128i a1415 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 14)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 15)]));
-                    __m128i a1515 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 15)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 15)]));
+#endif
+                    __m128i pix1 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x])]));
+                    __m128i pix2 = _mm_setzero_si128();
+#if defined(__i386__) || defined(_M_IX86)
+                    pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double*>(&tmp[4 * (chanNum * (mapsx[x] + 1))])));
+#else
+                    pix2 = _mm_insert_epi64(pix2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x] + 1))]), 0);
 #endif
 
-                    // load 3 channels of first pixel from first pair of 4-couple scope
-                    __m128i pix1 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x])]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix1 = _mm_insert_epi32(pix1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 1])]), 3);
-
-                    // load 3 channels of neighbor pixel from first pair of 4-couple scope
-                    __m128i pix2 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x] + 1))]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix2 = _mm_insert_epi32(pix2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 1] + 1))]), 3);
+                    pix2 = _mm_insert_epi32(pix2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x] + 1)) + 8]), 2);
 
                     // expand 8-bit data to 16-bit
                     __m128i val_0 = _mm_unpacklo_epi8(pix1, zero);
                     __m128i val_1 = _mm_unpacklo_epi8(pix2, zero);
-
-                    // expand 8-bit data to 16-bit
                     __m128i val_2 = _mm_unpackhi_epi8(pix1, zero);
                     __m128i val_3 = _mm_unpackhi_epi8(pix2, zero);
 
                     // the main calculations
                     __m128i t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a00);
-                    __m128i t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a01);
+                    __m128i t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a00);
                     __m128i r0_0 = _mm_add_epi16(val_1, t0_0);
                     __m128i r1_0 = _mm_add_epi16(val_3, t1_0);
 
@@ -384,111 +336,129 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[],
                     // gather data from the same lines together
                     __m128i res1 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask);
 
-                    val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * mapsx[x + 1] + 1)]), 0), zero);
-                    val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 1] + 1) + 1)]), 0), zero);
+#ifdef _MSC_VER
+                    __m128i a11 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 1)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 1)]));
+#else
+                    __m128i a11 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 1)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 1)]));
+#endif
 
-                    val_2 = _mm_insert_epi64(val_2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * mapsx[x + 2])]), 0);
-                    val_3 = _mm_insert_epi64(val_3, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 2] + 1))]), 0);
-
-                    val_2 = _mm_unpacklo_epi8(val_2, zero);
-                    val_3 = _mm_unpacklo_epi8(val_3, zero);
-
-                    __m128i t0_1 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a11);
-                    __m128i t1_1 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a22);
-                    __m128i r0_1 = _mm_add_epi16(val_1, t0_1);
-                    __m128i r1_1 = _mm_add_epi16(val_3, t1_1);
-
-                    __m128i q0_1 = _mm_packus_epi16(r0_1, r1_1);
-                    __m128i res2 = _mm_shuffle_epi8(q0_1, horizontal_shuf_mask);
-
-                    __m128i pix7 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x + 3] - 1) + 2)]));
-                    pix7 = _mm_insert_epi32(pix7, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 2] + 2)]), 0);
-
-                    __m128i pix8 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 3] + 2)]));
-                    pix8 = _mm_insert_epi32(pix8, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 2] + 1) + 2)]), 0);
-
-                    val_0 = _mm_unpacklo_epi8(pix7, zero);
-                    val_1 = _mm_unpacklo_epi8(pix8, zero);
-
-                    val_2 = _mm_unpackhi_epi8(pix7, zero);
-                    val_3 = _mm_unpackhi_epi8(pix8, zero);
-
-                    // the main calculations
-                    __m128i t0_2 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a23);
-                    __m128i t1_2 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a33);
-                    __m128i r0_2 = _mm_add_epi16(val_1, t0_2);
-                    __m128i r1_2 = _mm_add_epi16(val_3, t1_2);
-
-                    // pack 16-bit data to 8-bit
-                    __m128i q0_2 = _mm_packus_epi16(r0_2, r1_2);
-                    __m128i res3 = _mm_shuffle_epi8(q0_2, horizontal_shuf_mask);
-
-                    __m128i pix9 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 4])]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix9 = _mm_insert_epi32(pix9, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 5])]), 3);
-
-                    // load 3 channels of neighbor pixel from first pair of 4-couple scope
-                    __m128i pix10 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x + 4] + 1))]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix10 = _mm_insert_epi32(pix10, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 5] + 1))]), 3);
+                    pix1 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 1])]));
+#if defined(__i386__) || defined(_M_IX86)
+                    pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double*>(&tmp[4 * (chanNum * (mapsx[x + 1] + 1))])));
+#else
+                    pix2 = _mm_insert_epi64(pix2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 1] + 1))]), 0);
+#endif
+                    pix2 = _mm_insert_epi32(pix2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 1] + 1)) + 8]), 2);
 
                     // expand 8-bit data to 16-bit
-                    val_0 = _mm_unpacklo_epi8(pix9, zero);
-                    val_1 = _mm_unpacklo_epi8(pix10, zero);
-
-                    // expand 8-bit data to 16-bit
-                    val_2 = _mm_unpackhi_epi8(pix9, zero);
-                    val_3 = _mm_unpackhi_epi8(pix10, zero);
+                    val_0 = _mm_unpacklo_epi8(pix1, zero);
+                    val_1 = _mm_unpacklo_epi8(pix2, zero);
+                    val_2 = _mm_unpackhi_epi8(pix1, zero);
+                    val_3 = _mm_unpackhi_epi8(pix2, zero);
 
                     // the main calculations
-                    __m128i t0_3 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a44);
-                    __m128i t1_3 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a45);
-                    __m128i r0_3 = _mm_add_epi16(val_1, t0_3);
-                    __m128i r1_3 = _mm_add_epi16(val_3, t1_3);
+                    t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a11);
+                    t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a11);
+                    r0_0 = _mm_add_epi16(val_1, t0_0);
+                    r1_0 = _mm_add_epi16(val_3, t1_0);
 
                     // pack 16-bit data to 8-bit
-                    __m128i q0_3 = _mm_packus_epi16(r0_3, r1_3);
+                    q0_0 = _mm_packus_epi16(r0_0, r1_0);
                     // gather data from the same lines together
-                    __m128i res4 = _mm_shuffle_epi8(q0_3, horizontal_shuf_mask);
+                    __m128i res2 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask);
 
-                    val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum *  mapsx[x + 5]      + 1)]), 0), zero);
-                    val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 5] + 1) + 1)]), 0), zero);
+#ifdef _MSC_VER
+                    __m128i a22 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 2)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 2)]));
+#else
+                    __m128i a22 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 2)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 2)]));
+#endif
 
-                    val_2 = _mm_insert_epi64(val_2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * mapsx[x + 6])]), 0);
-                    val_3 = _mm_insert_epi64(val_3, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 6] + 1))]), 0);
+                    pix1 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 2])]));
+#if defined(__i386__) || defined(_M_IX86)
+                    pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double*>(&tmp[4 * (chanNum * (mapsx[x + 2] + 1))])));
+#else
+                    pix2 = _mm_insert_epi64(pix2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 2] + 1))]), 0);
+#endif
+                    pix2 = _mm_insert_epi32(pix2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 2] + 1)) + 8]), 2);
 
-                    val_2 = _mm_unpacklo_epi8(val_2, zero);
-                    val_3 = _mm_unpacklo_epi8(val_3, zero);
-
-                    __m128i t0_4 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a55);
-                    __m128i t1_4 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a66);
-                    __m128i r0_4 = _mm_add_epi16(val_1, t0_4);
-                    __m128i r1_4 = _mm_add_epi16(val_3, t1_4);
-
-                    __m128i q0_4 = _mm_packus_epi16(r0_4, r1_4);
-                    __m128i res5 = _mm_shuffle_epi8(q0_4, horizontal_shuf_mask);
-
-                    __m128i pix15 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x + 7] - 1) + 2)]));
-                    pix15 = _mm_insert_epi32(pix15, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 6] + 2)]), 0);
-
-                    __m128i pix16 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 7]   + 2)]));
-                    pix16 = _mm_insert_epi32(pix16, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 6] + 1) + 2)]), 0);
-
-                    val_0 = _mm_unpacklo_epi8(pix15, zero);
-                    val_1 = _mm_unpacklo_epi8(pix16, zero);
-
-                    val_2 = _mm_unpackhi_epi8(pix15, zero);
-                    val_3 = _mm_unpackhi_epi8(pix16, zero);
+                    // expand 8-bit data to 16-bit
+                    val_0 = _mm_unpacklo_epi8(pix1, zero);
+                    val_1 = _mm_unpacklo_epi8(pix2, zero);
+                    val_2 = _mm_unpackhi_epi8(pix1, zero);
+                    val_3 = _mm_unpackhi_epi8(pix2, zero);
 
                     // the main calculations
-                    __m128i t0_5 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a67);
-                    __m128i t1_5 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a77);
-                    __m128i r0_5 = _mm_add_epi16(val_1, t0_5);
-                    __m128i r1_5 = _mm_add_epi16(val_3, t1_5);
+                    t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a22);
+                    t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a22);
+                    r0_0 = _mm_add_epi16(val_1, t0_0);
+                    r1_0 = _mm_add_epi16(val_3, t1_0);
 
                     // pack 16-bit data to 8-bit
-                    __m128i q0_5 = _mm_packus_epi16(r0_5, r1_5);
-                    __m128i res6 = _mm_shuffle_epi8(q0_5, horizontal_shuf_mask);
+                    q0_0 = _mm_packus_epi16(r0_0, r1_0);
+                    // gather data from the same lines together
+                    __m128i res3 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask);
+
+#ifdef _MSC_VER
+                    __m128i a33 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 3)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 3)]));
+#else
+                    __m128i a33 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 3)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 3)]));
+#endif
+
+                    pix1 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 3])]));
+#if defined(__i386__) || defined(_M_IX86)
+                    pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double*>(&tmp[4 * (chanNum * (mapsx[x + 3] + 1))])));
+#else
+                    pix2 = _mm_insert_epi64(pix2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 3] + 1))]), 0);
+#endif
+                    pix2 = _mm_insert_epi32(pix2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 3] + 1)) + 8]), 2);
+
+                    // expand 8-bit data to 16-bit
+                    val_0 = _mm_unpacklo_epi8(pix1, zero);
+                    val_1 = _mm_unpacklo_epi8(pix2, zero);
+                    val_2 = _mm_unpackhi_epi8(pix1, zero);
+                    val_3 = _mm_unpackhi_epi8(pix2, zero);
+
+                    // the main calculations
+                    t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a33);
+                    t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a33);
+                    r0_0 = _mm_add_epi16(val_1, t0_0);
+                    r1_0 = _mm_add_epi16(val_3, t1_0);
+
+                    // pack 16-bit data to 8-bit
+                    q0_0 = _mm_packus_epi16(r0_0, r1_0);
+                    // gather data from the same lines together
+                    __m128i res4 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask);
+
+#ifdef _MSC_VER
+                    __m128i a44 = _mm_setr_epi64x(*reinterpret_cast<const int64_t*>(&clone[4 * (x + 4)]), *reinterpret_cast<const int64_t*>(&clone[4 * (x + 4)]));
+#else
+                    __m128i a44 = _mm_setr_epi64(*reinterpret_cast<const __m64*>(&clone[4 * (x + 4)]), *reinterpret_cast<const __m64*>(&clone[4 * (x + 4)]));
+#endif
+
+                    pix1 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 4])]));
+#if defined(__i386__) || defined(_M_IX86)
+                    pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double*>(&tmp[4 * (chanNum * (mapsx[x + 4] + 1))])));
+#else
+                    pix2 = _mm_insert_epi64(pix2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 4] + 1))]), 0);
+#endif
+                    pix2 = _mm_insert_epi32(pix2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 4] + 1)) + 8]), 2);
+
+                    // expand 8-bit data to 16-bit
+                    val_0 = _mm_unpacklo_epi8(pix1, zero);
+                    val_1 = _mm_unpacklo_epi8(pix2, zero);
+                    val_2 = _mm_unpackhi_epi8(pix1, zero);
+                    val_3 = _mm_unpackhi_epi8(pix2, zero);
+
+                    // the main calculations
+                    t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a44);
+                    t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a44);
+                    r0_0 = _mm_add_epi16(val_1, t0_0);
+                    r1_0 = _mm_add_epi16(val_3, t1_0);
+
+                    // pack 16-bit data to 8-bit
+                    q0_0 = _mm_packus_epi16(r0_0, r1_0);
+                    // gather data from the same lines together
+                    __m128i res5 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask);
 
                     __m128i bl1 = _mm_blend_epi16(res1, _mm_slli_si128(res2, 4), 0xCC /*0b11001100*/);
                     __m128i bl2 = _mm_blend_epi16(_mm_srli_si128(res1, 4), res2, 0xCC /*0b11001100*/);
@@ -496,189 +466,47 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[],
                     __m128i bl3 = _mm_blend_epi16(res3, _mm_slli_si128(res4, 4), 0xCC /*0b11001100*/);
                     __m128i bl4 = _mm_blend_epi16(_mm_srli_si128(res3, 4), res4, 0xCC /*0b11001100*/);
 
-                    __m128i bl5 = _mm_blend_epi16(res5, _mm_slli_si128(res6, 4), 0xCC /*0b11001100*/);
-                    __m128i bl6 = _mm_blend_epi16(_mm_srli_si128(res5, 4), res6, 0xCC /*0b11001100*/);
-
                     __m128i bl13 = _mm_blend_epi16(bl1, _mm_slli_si128(bl3, 8), 0xF0 /*0b11110000*/);
                     __m128i bl31 = _mm_blend_epi16(_mm_srli_si128(bl1, 8), bl3, 0xF0 /*0b11110000*/);
 
                     __m128i bl24 = _mm_blend_epi16(bl2, _mm_slli_si128(bl4, 8), 0xF0 /*0b11110000*/);
                     __m128i bl42 = _mm_blend_epi16(_mm_srli_si128(bl2, 8), bl4, 0xF0 /*0b11110000*/);
 
-                    // load 3 channels of first pixel from first pair of 4-couple scope
-                    __m128i pix17 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 8])]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix17 = _mm_insert_epi32(pix17, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 9])]), 3);
+                    bl1 = _mm_blend_epi16(_mm_shuffle_epi8(bl13, horizontal_shuf_mask1),
+                                          _mm_slli_si128(res5, 12), 192 /*0b11000000*/);
+                    bl2 = _mm_blend_epi16(_mm_shuffle_epi8(bl24, horizontal_shuf_mask1),
+                                          _mm_slli_si128(res5, 8), 192 /*0b11000000*/);
+                    bl3 = _mm_blend_epi16(_mm_shuffle_epi8(bl31, horizontal_shuf_mask1),
+                                          _mm_slli_si128(res5, 4), 192 /*0b11000000*/);
+                    bl4 = _mm_blend_epi16(_mm_shuffle_epi8(bl42, horizontal_shuf_mask1),
+                                          res5, 192 /*0b11000000*/);
 
-                    // load 3 channels of neighbor pixel from first pair of 4-couple scope
-                    __m128i pix18 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x + 8] + 1))]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix18 = _mm_insert_epi32(pix18, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 9] + 1))]), 3);
-
-                    // expand 8-bit data to 16-bit
-                    val_0 = _mm_unpacklo_epi8(pix17, zero);
-                    val_1 = _mm_unpacklo_epi8(pix18, zero);
-
-                    // expand 8-bit data to 16-bit
-                    val_2 = _mm_unpackhi_epi8(pix17, zero);
-                    val_3 = _mm_unpackhi_epi8(pix18, zero);
-
-                    // the main calculations
-                    __m128i t0_6 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a88);
-                    __m128i t1_6 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a89);
-                    __m128i r0_6 = _mm_add_epi16(val_1, t0_6);
-                    __m128i r1_6 = _mm_add_epi16(val_3, t1_6);
-
-                    // pack 16-bit data to 8-bit
-                    __m128i q0_6 = _mm_packus_epi16(r0_6, r1_6);
-                    // gather data from the same lines together
-                    __m128i res7 = _mm_shuffle_epi8(q0_6, horizontal_shuf_mask);
-
-                    val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * mapsx[x + 9] + 1)]), 0), zero);
-                    val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 9] + 1) + 1)]), 0), zero);
-
-                    val_2 = _mm_insert_epi64(val_2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * mapsx[x + 10])]), 0);
-                    val_3 = _mm_insert_epi64(val_3, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 10] + 1))]), 0);
-
-                    val_2 = _mm_unpacklo_epi8(val_2, zero);
-                    val_3 = _mm_unpacklo_epi8(val_3, zero);
-
-                    __m128i t0_7 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a99);
-                    __m128i t1_7 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1010);
-                    __m128i r0_7 = _mm_add_epi16(val_1, t0_7);
-                    __m128i r1_7 = _mm_add_epi16(val_3, t1_7);
-
-                    __m128i q0_7 = _mm_packus_epi16(r0_7, r1_7);
-                    __m128i res8 = _mm_shuffle_epi8(q0_7, horizontal_shuf_mask);
-
-                    __m128i pix21 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x + 11] - 1) + 2)]));
-                    pix21 = _mm_insert_epi32(pix21, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 10] + 2)]), 0);
-
-                    __m128i pix22 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 11] + 2)]));
-                    pix22 = _mm_insert_epi32(pix22, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 10] + 1) + 2)]), 0);
-
-                    val_0 = _mm_unpacklo_epi8(pix21, zero);
-                    val_1 = _mm_unpacklo_epi8(pix22, zero);
-
-                    val_2 = _mm_unpackhi_epi8(pix21, zero);
-                    val_3 = _mm_unpackhi_epi8(pix22, zero);
-
-                    // the main calculations
-                    __m128i t0_8 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1011);
-                    __m128i t1_8 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1111);
-                    __m128i r0_8 = _mm_add_epi16(val_1, t0_8);
-                    __m128i r1_8 = _mm_add_epi16(val_3, t1_8);
-
-                    // pack 16-bit data to 8-bit
-                    __m128i q0_8 = _mm_packus_epi16(r0_8, r1_8);
-                    __m128i res9 = _mm_shuffle_epi8(q0_8, horizontal_shuf_mask);
-
-                    __m128i pix23 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 12])]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix23 = _mm_insert_epi32(pix23, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 13])]), 3);
-
-                    // load 3 channels of neighbor pixel from first pair of 4-couple scope
-                    __m128i pix24 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x + 12] + 1))]));
-                    // insert first channel from next couple of pixels to completely fill the simd vector
-                    pix24 = _mm_insert_epi32(pix24, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 13] + 1))]), 3);
-
-                    // expand 8-bit data to 16-bit
-                    val_0 = _mm_unpacklo_epi8(pix23, zero);
-                    val_1 = _mm_unpacklo_epi8(pix24, zero);
-
-                    // expand 8-bit data to 16-bit
-                    val_2 = _mm_unpackhi_epi8(pix23, zero);
-                    val_3 = _mm_unpackhi_epi8(pix24, zero);
-
-                    // the main calculations
-                    __m128i t0_9 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1212);
-                    __m128i t1_9 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1213);
-                    __m128i r0_9 = _mm_add_epi16(val_1, t0_9);
-                    __m128i r1_9 = _mm_add_epi16(val_3, t1_9);
-
-                    // pack 16-bit data to 8-bit
-                    __m128i q0_9 = _mm_packus_epi16(r0_9, r1_9);
-                    // gather data from the same lines together
-                    __m128i res10 = _mm_shuffle_epi8(q0_9, horizontal_shuf_mask);
-
-                    val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * mapsx[x + 13] + 1)]), 0), zero);
-                    val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 13] + 1) + 1)]), 0), zero);
-
-                    val_2 = _mm_insert_epi64(val_2, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * mapsx[x + 14])]), 0);
-                    val_3 = _mm_insert_epi64(val_3, *reinterpret_cast<const int64_t*>(&tmp[4 * (chanNum * (mapsx[x + 14] + 1))]), 0);
-
-                    val_2 = _mm_unpacklo_epi8(val_2, zero);
-                    val_3 = _mm_unpacklo_epi8(val_3, zero);
-
-                    __m128i t0_10 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1313);
-                    __m128i t1_10 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1414);
-                    __m128i r0_10 = _mm_add_epi16(val_1, t0_10);
-                    __m128i r1_10 = _mm_add_epi16(val_3, t1_10);
-
-                    __m128i q0_10 = _mm_packus_epi16(r0_10, r1_10);
-                    __m128i res11 = _mm_shuffle_epi8(q0_10, horizontal_shuf_mask);
-
-                    __m128i pix27 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * (mapsx[x + 15] - 1) + 2)]));
-                    pix27 = _mm_insert_epi32(pix27, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * mapsx[x + 14] + 2)]), 0);
-
-                    __m128i pix28 = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(&tmp[4 * (chanNum * mapsx[x + 15] + 2)]));
-                    pix28 = _mm_insert_epi32(pix28, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + 14] + 1) + 2)]), 0);
-
-                    val_0 = _mm_unpacklo_epi8(pix27, zero);
-                    val_1 = _mm_unpacklo_epi8(pix28, zero);
-
-                    val_2 = _mm_unpackhi_epi8(pix27, zero);
-                    val_3 = _mm_unpackhi_epi8(pix28, zero);
-
-                    // the main calculations
-                    __m128i t0_11 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1415);
-                    __m128i t1_11 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1515);
-                    __m128i r0_11 = _mm_add_epi16(val_1, t0_11);
-                    __m128i r1_11 = _mm_add_epi16(val_3, t1_11);
-
-                    // pack 16-bit data to 8-bit
-                    __m128i q0_11 = _mm_packus_epi16(r0_11, r1_11);
-                    __m128i res12 = _mm_shuffle_epi8(q0_11, horizontal_shuf_mask);
-
-                    __m128i bl7 = _mm_blend_epi16(res7, _mm_slli_si128(res8, 4), 0xCC /*0b11001100*/);
-                    __m128i bl8 = _mm_blend_epi16(_mm_srli_si128(res7, 4), res8, 0xCC /*0b11001100*/);
-
-                    __m128i bl9 = _mm_blend_epi16(res9, _mm_slli_si128(res10, 4), 0xCC /*0b11001100*/);
-                    __m128i bl10 = _mm_blend_epi16(_mm_srli_si128(res9, 4), res10, 0xCC /*0b11001100*/);
-
-                    __m128i bl11 = _mm_blend_epi16(res11, _mm_slli_si128(res12, 4), 0xCC /*0b11001100*/);
-                    __m128i bl12 = _mm_blend_epi16(_mm_srli_si128(res11, 4), res12, 0xCC /*0b11001100*/);
-
-                    __m128i bl57 = _mm_blend_epi16(bl5, _mm_slli_si128(bl7, 8), 0xF0 /*0b11110000*/);
-                    __m128i bl75 = _mm_blend_epi16(_mm_srli_si128(bl5, 8), bl7, 0xF0 /*0b11110000*/);
-
-                    __m128i bl68 = _mm_blend_epi16(bl6, _mm_slli_si128(bl8, 8), 0xF0 /*0b11110000*/);
-                    __m128i bl86 = _mm_blend_epi16(_mm_srli_si128(bl6, 8), bl8, 0xF0 /*0b11110000*/);
-
-                    __m128i bl911 = _mm_blend_epi16(bl9, _mm_slli_si128(bl11, 8), 0xF0 /*0b11110000*/);
-                    __m128i bl119 = _mm_blend_epi16(_mm_srli_si128(bl9, 8), bl11, 0xF0 /*0b11110000*/);
-
-                    __m128i bl1012 = _mm_blend_epi16(bl10, _mm_slli_si128(bl12, 8), 0xF0 /*0b11110000*/);
-                    __m128i bl1210 = _mm_blend_epi16(_mm_srli_si128(bl10, 8), bl12, 0xF0 /*0b11110000*/);
-
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][3 * x]), bl13);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][3 * x]), bl24);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][3 * x]), bl31);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][3 * x]), bl42);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][3 * x + 16]), bl57);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][3 * x + 16]), bl68);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][3 * x + 16]), bl75);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][3 * x + 16]), bl86);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][3 * x + 32]), bl911);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][3 * x + 32]), bl1012);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][3 * x + 32]), bl119);
-                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][3 * x + 32]), bl1210);
+                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][chanNum * x]), bl1);
+                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][chanNum * x]), bl2);
+                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][chanNum * x]), bl3);
+                    _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][chanNum * x]), bl4);
                 }
 
-                if (x < outSz.width) {
-                    x = outSz.width - nlanes;
-                    continue;
+                for (; x < outSz.width; ++x)
+                {
+                    constexpr static const int ONE = 1 << 15;
+                    constexpr static const int half = 1 << 14;
+                    auto alpha0 = alpha[x];
+                    auto alpha1 = saturate_cast<short>(ONE - alpha[x]);
+
+                    for (int c = 0; c < chanNum; ++c)
+                    {
+                        dst[0][chanNum * x + c] = (tmp[4 * (chanNum *  mapsx[x]      + c)    ] * alpha0 +
+                                                   tmp[4 * (chanNum * (mapsx[x] + 1) + c)    ] * alpha1 + half) >> 15;
+                        dst[1][chanNum * x + c] = (tmp[4 * (chanNum *  mapsx[x]      + c) + 1] * alpha0 +
+                                                   tmp[4 * (chanNum * (mapsx[x] + 1) + c) + 1] * alpha1 + half) >> 15;
+                        dst[2][chanNum * x + c] = (tmp[4 * (chanNum *  mapsx[x]      + c) + 2] * alpha0 +
+                                                   tmp[4 * (chanNum * (mapsx[x] + 1) + c) + 2] * alpha1 + half) >> 15;
+                        dst[3][chanNum * x + c] = (tmp[4 * (chanNum *  mapsx[x]      + c) + 3] * alpha0 +
+                                                   tmp[4 * (chanNum * (mapsx[x] + 1) + c) + 3] * alpha1 + half) >> 15;
+                    }
                 }
+
                 break;
             }
         }
diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp
index df44e57259..aca2dcca6f 100644
--- a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp
+++ b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp
@@ -1026,8 +1026,8 @@ GAPI_FLUID_KERNEL(GFluidSobel, cv::gapi::imgproc::GSobel, true)
         auto *kx = scratch.OutLine<float>();
         auto *ky = kx + ksz;
 
-        Mat kxmat(1, ksize, CV_32FC1, kx);
-        Mat kymat(ksize, 1, CV_32FC1, ky);
+        Mat kxmat(1, ksz, CV_32FC1, kx);
+        Mat kymat(ksz, 1, CV_32FC1, ky);
         getDerivKernels(kxmat, kymat, dx, dy, ksize);
     }
 
@@ -1185,12 +1185,12 @@ GAPI_FLUID_KERNEL(GFluidSobelXY, cv::gapi::imgproc::GSobelXY, true)
         auto *kx_dy = buf_helper.kx_dy;
         auto *ky_dy = buf_helper.ky_dy;
 
-        Mat kxmatX(1, ksize, CV_32FC1, kx_dx);
-        Mat kymatX(ksize, 1, CV_32FC1, ky_dx);
+        Mat kxmatX(1, ksz, CV_32FC1, kx_dx);
+        Mat kymatX(ksz, 1, CV_32FC1, ky_dx);
         getDerivKernels(kxmatX, kymatX, order, 0, ksize);
 
-        Mat kxmatY(1, ksize, CV_32FC1, kx_dy);
-        Mat kymatY(ksize, 1, CV_32FC1, ky_dy);
+        Mat kxmatY(1, ksz, CV_32FC1, kx_dy);
+        Mat kymatY(ksz, 1, CV_32FC1, ky_dy);
         getDerivKernels(kxmatY, kymatY, 0, order, ksize);
     }
 
@@ -2017,14 +2017,13 @@ static void calcRowLinearC(const cv::gapi::fluid::View  & in,
         dst[l] = out.OutLine<T>(l);
     }
 
-#if 0 // Disabling SSE4.1 path due to Valgrind issues: https://github.com/opencv/opencv/issues/21097
 #if CV_SSE4_1
     const auto* clone = scr.clone;
     auto* tmp = scr.tmp;
 
     if (inSz.width >= 16 && outSz.width >= 16)
     {
-        sse42::calcRowLinear_8UC_Impl_<numChan>(reinterpret_cast<uint8_t**>(dst),
+        sse41::calcRowLinear_8UC_Impl_<numChan>(reinterpret_cast<uint8_t**>(dst),
                                                 reinterpret_cast<const uint8_t**>(src0),
                                                 reinterpret_cast<const uint8_t**>(src1),
                                                 reinterpret_cast<const short*>(alpha),
@@ -2037,7 +2036,6 @@ static void calcRowLinearC(const cv::gapi::fluid::View  & in,
         return;
     }
 #endif // CV_SSE4_1
-#endif
     int length = out.length();
     for (int l = 0; l < lpi; l++) {
         constexpr static const auto unity = Mapper::unity;
@@ -2080,8 +2078,8 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true)
        int outSz_h;
        if (outSz.width == 0 || outSz.height == 0)
        {
-           outSz_w = static_cast<int>(round(in.size.width * fx));
-           outSz_h = static_cast<int>(round(in.size.height * fy));
+           outSz_w = saturate_cast<int>(in.size.width * fx);
+           outSz_h = saturate_cast<int>(in.size.height * fy);
        }
        else
        {
diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp
index 929a18f721..b155ff0aea 100644
--- a/modules/gapi/src/backends/ie/giebackend.cpp
+++ b/modules/gapi/src/backends/ie/giebackend.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018-2021 Intel Corporation
+// Copyright (C) 2018-2022 Intel Corporation
 
 #include "precomp.hpp"
 
@@ -19,6 +19,8 @@
 #include <functional>
 #include <unordered_set>
 #include <atomic>
+#include <tuple>
+
 
 #include <ade/util/algorithm.hpp>
 
@@ -180,6 +182,10 @@ inline IE::Blob::Ptr wrapIE(const cv::MediaFrame::View& view,
             auto uv_plane = cv::Mat(desc.size / 2, CV_8UC2, view.ptr[1], view.stride[1]);
             return cv::gapi::ie::util::to_ie(y_plane, uv_plane);
         }
+        case cv::MediaFormat::GRAY: {
+            auto gray = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]);
+            return wrapIE(gray, cv::gapi::ie::TraitAs::IMAGE);
+        }
         default:
             GAPI_Assert(false && "Unsupported media format for IE backend");
     }
@@ -210,6 +216,39 @@ inline void copyFromIE(const IE::Blob::Ptr &blob, MatType &mat) {
     }
 }
 
+template <typename MapT>
+void checkLayerNames(const MapT&                     network_map,
+                     const std::vector<std::string>& layer_names,
+                     const std::string&              layer_type) {
+    for (const auto& layer_name : layer_names) {
+        const auto it = network_map.find(layer_name);
+        if (it == network_map.end()) {
+            std::stringstream ss;
+            ss << "Failed to find " << layer_type << " layer with name: "
+               << "\"" << layer_name << "\"" << std::endl;
+            ss << "Network " << layer_type << " layers: " << std::endl;
+            for (const auto& p : network_map) {
+                const auto& desc = p.second->getTensorDesc();
+                ss << p.first << " : " << desc.getPrecision()
+                   << " / " << desc.getLayout() << std::endl;
+            }
+            throw std::logic_error(ss.str());
+        }
+    }
+}
+
+template <typename MapT>
+void checkInputLayerNames(const MapT&                     network_map,
+                          const std::vector<std::string>& layer_names) {
+    checkLayerNames(network_map, layer_names, "input");
+}
+
+template <typename MapT>
+void checkOutputLayerNames(const MapT&                     network_map,
+                          const std::vector<std::string>& layer_names) {
+    checkLayerNames(network_map, layer_names, "output");
+}
+
 // IE-specific metadata, represents a network with its parameters
 struct IEUnit {
     static const char *name() { return "IEModelConfig"; }
@@ -287,6 +326,16 @@ struct IEUnit {
                          params.num_in &&
                         "Number of layers to reshape must be less than or equal to number of inputs");
         }
+
+        if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) {
+            checkInputLayerNames(net.getInputsInfo(), params.input_names);
+            checkOutputLayerNames(net.getOutputsInfo(), params.output_names);
+        } else if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import) {
+            checkInputLayerNames(this_network.GetInputsInfo(), params.input_names);
+            checkOutputLayerNames(this_network.GetOutputsInfo(), params.output_names);
+        } else {
+            cv::util::throw_error(std::logic_error("Unsupported ParamDesc::Kind"));
+        }
     }
 
     // This method is [supposed to be] called at Island compilation stage
@@ -505,20 +554,27 @@ inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i) {
                 "Remote blob is supported for MediaFrame only");
 
     cv::util::any any_blob_params = ctx.inFrame(i).blobParams();
-    auto ie_core = cv::gimpl::ie::wrap::getCore();
 
-    using ParamType = std::pair<InferenceEngine::TensorDesc,
-                                InferenceEngine::ParamMap>;
+    using ParamType = std::pair<InferenceEngine::TensorDesc, InferenceEngine::ParamMap>;
+    using NV12ParamType = std::pair<ParamType, ParamType>;
 
-    ParamType* blob_params = cv::util::any_cast<ParamType>(&any_blob_params);
+    NV12ParamType* blob_params = cv::util::any_cast<NV12ParamType>(&any_blob_params);
     if (blob_params == nullptr) {
-        GAPI_Assert(false && "Incorrect type of blobParams: "
-                              "expected std::pair<InferenceEngine::TensorDesc,"
-                                                 "InferenceEngine::ParamMap>");
+        GAPI_Assert(false && "Incorrect type of blobParams:"
+                             "expected std::pair<ParamType, ParamType>,"
+                             "with ParamType std::pair<InferenceEngine::TensorDesc,"
+                                                      "InferenceEngine::ParamMap >>");
     }
 
-    return ctx.uu.rctx->CreateBlob(blob_params->first,
-                                   blob_params->second);
+    //The parameters are TensorDesc and ParamMap for both y and uv blobs
+    auto y_blob = ctx.uu.rctx->CreateBlob(blob_params->first.first, blob_params->first.second);
+    auto uv_blob = ctx.uu.rctx->CreateBlob(blob_params->second.first, blob_params->second.second);
+
+#if INF_ENGINE_RELEASE >= 2021010000
+    return IE::make_shared_blob<IE::NV12Blob>(y_blob, uv_blob);
+#else
+    return IE::make_shared_blob<InferenceEngine::NV12Blob>(y_blob, uv_blob);
+#endif
 }
 
 inline IE::Blob::Ptr extractBlob(IECallContext& ctx,
@@ -560,6 +616,19 @@ static void setBlob(InferenceEngine::InferRequest& req,
     }
 }
 
+static void setROIBlob(InferenceEngine::InferRequest& req,
+                       const std::string&             layer_name,
+                       const IE::Blob::Ptr&           blob,
+                       const cv::Rect &roi,
+                       const IECallContext&           ctx) {
+    if (ctx.uu.params.device_id.find("GPU") != std::string::npos) {
+        GAPI_LOG_DEBUG(nullptr, "Skip ROI blob creation for device_id: " <<
+                       ctx.uu.params.device_id << ", layer: " << layer_name);
+        setBlob(req, layer_name, blob, ctx);
+    } else {
+        setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx);
+    }
+}
 } // anonymous namespace
 
 std::vector<InferenceEngine::InferRequest> cv::gimpl::ie::IECompiled::createInferRequests() {
@@ -601,7 +670,10 @@ public:
     void waitAll();
 
 private:
-    void callback(Task task, InferenceEngine::InferRequest& request, size_t id);
+    void callback(Task task,
+                  size_t id,
+                  IE::InferRequest request,
+                  IE::StatusCode code);
     void setup();
 
     QueueClass<size_t>                         m_idle_ids;
@@ -626,21 +698,38 @@ void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t) {
 
     auto& request = m_requests[id];
 
+    using namespace std::placeholders;
+    using callback_t = std::function<void(IE::InferRequest, IE::StatusCode)>;
     request.SetCompletionCallback(
-            std::bind(&cv::gimpl::ie::RequestPool::callback, this, t, std::ref(request), id));
+            static_cast<callback_t>(
+                std::bind(&cv::gimpl::ie::RequestPool::callback, this,
+                          t, id, _1, _2)));
     t.run(request);
 }
 
 void cv::gimpl::ie::RequestPool::callback(cv::gimpl::ie::RequestPool::Task task,
-                                          InferenceEngine::InferRequest& request,
-                                          size_t id) {
-    task.callback(request);
-    // NB: IE::InferRequest keeps the callback until the new one is set.
-    // Since user's callback might keep resources that should be released,
-    // need to destroy its after execution.
-    // Let's set the empty one to cause the destruction of a callback.
-    request.SetCompletionCallback([](){});
-    m_idle_ids.push(id);
+                                          size_t id,
+                                          IE::InferRequest request,
+                                          IE::StatusCode code) {
+    // FIXME: Any exception which is arrised here must not leave this callback,
+    // because it won't be handled.
+    try {
+        if (code != IE::StatusCode::OK) {
+            throw std::logic_error("IE::InferRequest finished with not OK status");
+        }
+        task.callback(request);
+        // NB: IE::InferRequest keeps the callback until the new one is set.
+        // Since user's callback might keep resources that should be released,
+        // need to destroy its after execution.
+        // Let's set the empty one to cause the destruction of a callback.
+        request.SetCompletionCallback([](){});
+        m_idle_ids.push(id);
+    } catch (const std::exception& e) {
+        GAPI_LOG_FATAL(NULL, "Callback failed with error: " << e.what());
+        //FIXME: Exception CAN't be rethrown here, since this callback works
+        // in separate IE thread and such scenarios aren't handled properly in
+        // G-API so far.
+    }
 }
 
 // NB: Not thread-safe.
@@ -815,6 +904,9 @@ static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg
                 case cv::MediaFormat::BGR:
                     // NB: Do nothing
                     break;
+                case cv::MediaFormat::GRAY:
+                    // NB: Do nothing
+                    break;
                 default:
                     GAPI_Assert(false && "Unsupported media format for IE backend");
             }
@@ -826,6 +918,13 @@ static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg
     }
 }
 
+static bool isApplicableForResize(const IE::TensorDesc& desc) {
+    const auto layout = desc.getLayout();
+    const auto prec   = desc.getPrecision();
+    return (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) &&
+           (prec == IE::Precision::FP32 || prec == IE::Precision::U8);
+}
+
 static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii,
                                                const cv::GMetaArg&        mm) {
     IE::PreProcessInfo info;
@@ -835,9 +934,7 @@ static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii,
             info.setColorFormat(IE::ColorFormat::NV12);
         }
     }
-    const auto layout = ii->getTensorDesc().getLayout();
-    if (layout == IE::Layout::NCHW ||
-        layout == IE::Layout::NHWC) {
+    if (isApplicableForResize(ii->getTensorDesc())) {
         info.setResizeAlgorithm(IE::RESIZE_BILINEAR);
     }
     return info;
@@ -957,11 +1054,7 @@ struct Infer: public cv::detail::KernelTag {
                         configureInputReshapeByImage(ii, mm, input_reshape_table);
                     }
 
-                    // NB: Configure resize only for NCHW/NHWC layout,
-                    // since it isn't supposed to work with others.
-                    auto layout = ii->getTensorDesc().getLayout();
-                    if (layout == IE::Layout::NCHW ||
-                        layout == IE::Layout::NHWC) {
+                    if (isApplicableForResize(ii->getTensorDesc())) {
                         ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
                     }
             }
@@ -1066,7 +1159,9 @@ struct InferROI: public cv::detail::KernelTag {
                 uu.params.layer_names_to_reshape.end()) {
                 configureInputReshapeByImage(ii, mm, input_reshape_table);
             }
-            ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+            if (isApplicableForResize(ii->getTensorDesc())) {
+                ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+            }
 
             // FIXME: This isn't the best place to call reshape function.
             // Сorrect solution would be to do this in compile() method of network,
@@ -1114,10 +1209,9 @@ struct InferROI: public cv::detail::KernelTag {
                         // it should be treated as image
                         IE::Blob::Ptr this_blob =
                             extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE);
-                        setBlob(req,
-                                *(ctx->uu.params.input_names.begin()),
-                                IE::make_shared_blob(this_blob, toIE(this_roi)),
-                                *ctx);
+                        setROIBlob(req,
+                                   *(ctx->uu.params.input_names.begin()),
+                                   this_blob, this_roi, *ctx);
                         // FIXME: Should it be done by kernel ?
                         // What about to do that in RequestPool ?
                         req.StartAsync();
@@ -1165,7 +1259,9 @@ struct InferList: public cv::detail::KernelTag {
                     uu.params.layer_names_to_reshape.end()) {
                     configureInputReshapeByImage(ii, mm, input_reshape_table);
                 }
-                ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+                if (isApplicableForResize(ii->getTensorDesc())) {
+                    ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+                }
             }
 
             // FIXME: This isn't the best place to call reshape function.
@@ -1318,7 +1414,9 @@ struct InferList2: public cv::detail::KernelTag {
                         uu.params.layer_names_to_reshape.end()) {
                         configureInputReshapeByImage(ii, mm_0, input_reshape_table);
                     }
-                    ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+                    if (isApplicableForResize(ii->getTensorDesc())) {
+                        ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+                    }
 
                     // FIXME: This isn't the best place to call reshape function.
                     // Сorrect solution would be to do this in compile() method of network,
diff --git a/modules/gapi/src/backends/oak/goak.cpp b/modules/gapi/src/backends/oak/goak.cpp
new file mode 100644
index 0000000000..6d9044aefa
--- /dev/null
+++ b/modules/gapi/src/backends/oak/goak.cpp
@@ -0,0 +1,47 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#include <opencv2/gapi/oak/oak.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+
+#include "oak_media_adapter.hpp"
+
+#include <thread>
+#include <chrono>
+
+namespace cv {
+namespace gapi {
+namespace oak {
+
+GArray<uint8_t> encode(const GFrame& in, const EncoderConfig& cfg) {
+    return GEncFrame::on(in, cfg);
+}
+
+GFrame sobelXY(const GFrame& in, const cv::Mat& hk, const cv::Mat& vk) {
+    return GSobelXY::on(in, hk, vk);
+}
+
+// This is a dummy oak::ColorCamera class that just makes our pipelining
+// machinery work. The real data comes from the physical camera which
+// is handled by DepthAI library.
+ColorCamera::ColorCamera()
+    : m_dummy(cv::MediaFrame::Create<cv::gapi::oak::OAKMediaAdapter>()) {
+}
+
+bool ColorCamera::pull(cv::gapi::wip::Data &data) {
+    // FIXME: Avoid passing this formal frame to the pipeline
+    std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    data = m_dummy;
+    return true;
+}
+
+cv::GMetaArg ColorCamera::descr_of() const {
+    return cv::GMetaArg{cv::descr_of(m_dummy)};
+}
+
+} // namespace oak
+} // namespace gapi
+} // namespace cv
diff --git a/modules/gapi/src/backends/oak/goak_media_adapter.cpp b/modules/gapi/src/backends/oak/goak_media_adapter.cpp
new file mode 100644
index 0000000000..c8e6bbb59b
--- /dev/null
+++ b/modules/gapi/src/backends/oak/goak_media_adapter.cpp
@@ -0,0 +1,32 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#include "oak_media_adapter.hpp"
+
+namespace cv {
+namespace gapi {
+namespace oak {
+
+OAKMediaAdapter::OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector<uint8_t>&& buffer) {
+    GAPI_Assert(fmt == cv::MediaFormat::NV12 && "OAKMediaAdapter only supports NV12 format for now");
+    m_sz = sz;
+    m_fmt = fmt;
+    m_buffer = buffer;
+}
+
+MediaFrame::View OAKMediaAdapter::OAKMediaAdapter::access(MediaFrame::Access) {
+    uint8_t* y_ptr = m_buffer.data();
+    uint8_t* uv_ptr = m_buffer.data() + static_cast<long>(m_buffer.size() / 3 * 2);
+    return MediaFrame::View{cv::MediaFrame::View::Ptrs{y_ptr, uv_ptr},
+                            cv::MediaFrame::View::Strides{static_cast<long unsigned int>(m_sz.width),
+                                                          static_cast<long unsigned int>(m_sz.width)}};
+}
+
+cv::GFrameDesc OAKMediaAdapter::OAKMediaAdapter::meta() const { return {m_fmt, m_sz}; }
+
+} // namespace oak
+} // namespace gapi
+} // namespace cv
diff --git a/modules/gapi/src/backends/oak/goakbackend.cpp b/modules/gapi/src/backends/oak/goakbackend.cpp
new file mode 100644
index 0000000000..dc0daaead3
--- /dev/null
+++ b/modules/gapi/src/backends/oak/goakbackend.cpp
@@ -0,0 +1,711 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+#ifdef HAVE_OAK
+
+#include <cstring>
+#include <unordered_set>
+#include <algorithm> // any_of
+#include <functional> // reference_wrapper
+
+#include <ade/util/zip_range.hpp>
+
+#include <api/gbackend_priv.hpp>
+#include <backends/common/gbackend.hpp>
+
+#include <opencv2/gapi/streaming/meta.hpp> // streaming::meta_tag
+
+#include "depthai/depthai.hpp"
+
+#include <opencv2/gapi/oak/oak.hpp>
+#include "oak_media_adapter.hpp"
+
+namespace cv { namespace gimpl {
+
+// Forward declaration
+class GOAKContext;
+struct OAKNodeInfo;
+
+class GOAKExecutable final: public GIslandExecutable {
+    friend class GOAKContext;
+    virtual void run(std::vector<InObj>&&,
+                     std::vector<OutObj>&&) override {
+        GAPI_Assert(false && "Not implemented");
+    }
+
+    virtual void run(GIslandExecutable::IInput &in,
+                     GIslandExecutable::IOutput &out) override;
+
+    void LinkToParents(ade::NodeHandle handle);
+
+    class ExtractTypeHelper : protected dai::Node {
+    public:
+        using Input = dai::Node::Input;
+        using Output = dai::Node::Output;
+        using InputPtr = dai::Node::Input*;
+        using OutputPtr = dai::Node::Output*;
+    };
+
+    struct OAKNodeInfo {
+        std::shared_ptr<dai::Node> node = nullptr;
+        std::vector<ExtractTypeHelper::InputPtr> inputs = {};
+        std::vector<ExtractTypeHelper::OutputPtr> outputs = {};
+    };
+
+    struct OAKOutQueueInfo {
+        std::shared_ptr<dai::node::XLinkOut> xlink_output;
+        std::shared_ptr<dai::DataOutputQueue> out_queue;
+        std::string out_queue_name;
+    };
+
+    cv::GArg packInArg(const GArg &arg, std::vector<ExtractTypeHelper::InputPtr>& oak_ins);
+    void packOutArg(const RcDesc &rc, std::vector<ExtractTypeHelper::OutputPtr>& oak_outs);
+
+    const ade::Graph& m_g;
+    GModel::ConstGraph m_gm;
+    cv::GCompileArgs m_args;
+
+    std::unordered_map<ade::NodeHandle,
+                       OAKNodeInfo,
+                       ade::HandleHasher<ade::Node>> m_oak_nodes;
+
+    // Will be reworked later when XLinkIn will be introduced as input
+    std::shared_ptr<dai::node::ColorCamera> m_camera_input;
+    cv::Size m_camera_size;
+
+    // Backend outputs
+    std::vector<OAKOutQueueInfo> m_out_queues;
+
+    // Backend inputs
+    std::vector<std::pair<std::string, dai::Buffer>> m_in_queues;
+
+    // Note: dai::Pipeline should be the only one for the whole pipeline,
+    // so there is no way to insert any non-OAK node in graph between other OAK nodes.
+    // The only heterogeneous case possible is if we insert other backends after or before
+    // OAK island.
+    std::unique_ptr<dai::Device> m_device;
+    std::unique_ptr<dai::Pipeline> m_pipeline;
+
+public:
+    GOAKExecutable(const ade::Graph& g,
+                   const cv::GCompileArgs& args,
+                   const std::vector<ade::NodeHandle>& nodes,
+                   const std::vector<cv::gimpl::Data>& ins_data,
+                   const std::vector<cv::gimpl::Data>& outs_data);
+    ~GOAKExecutable() = default;
+
+    // FIXME: could it reshape?
+    virtual bool canReshape() const override { return false; }
+    virtual void reshape(ade::Graph&, const GCompileArgs&) override {
+        GAPI_Assert(false && "GOAKExecutable::reshape() is not supported");
+    }
+
+    virtual void handleNewStream() override;
+    virtual void handleStopStream() override;
+};
+
+class GOAKContext {
+public:
+    // FIXME: make private?
+    using Input = GOAKExecutable::ExtractTypeHelper::Input;
+    using Output = GOAKExecutable::ExtractTypeHelper::Output;
+    using InputPtr = GOAKExecutable::ExtractTypeHelper::Input*;
+    using OutputPtr = GOAKExecutable::ExtractTypeHelper::Output*;
+
+    GOAKContext(const std::unique_ptr<dai::Pipeline>& pipeline,
+                const cv::Size& camera_size,
+                std::vector<cv::GArg>& args,
+                std::vector<OutputPtr>& results);
+
+    // Generic accessor API
+    template<typename T>
+    T& inArg(int input) { return m_args.at(input).get<T>(); }
+
+    // FIXME: consider not using raw pointers
+    InputPtr& in(int input);
+    OutputPtr& out(int output);
+
+    const std::unique_ptr<dai::Pipeline>& pipeline();
+    const cv::Size& camera_size() const;
+
+private:
+    const std::unique_ptr<dai::Pipeline>& m_pipeline;
+    const cv::Size& m_camera_size;
+    std::vector<cv::GArg>& m_args;
+    std::vector<OutputPtr>& m_outputs;
+};
+
+GOAKContext::GOAKContext(const std::unique_ptr<dai::Pipeline>& pipeline,
+                         const cv::Size& camera_size,
+                         std::vector<cv::GArg>& args,
+                         std::vector<OutputPtr>& results)
+    : m_pipeline(pipeline), m_camera_size(camera_size), m_args(args), m_outputs(results) {}
+
+const std::unique_ptr<dai::Pipeline>& GOAKContext::pipeline() {
+    return m_pipeline;
+}
+
+const cv::Size& GOAKContext::camera_size() const {
+    return m_camera_size;
+}
+
+GOAKContext::InputPtr& GOAKContext::in(int input) {
+    return inArg<std::reference_wrapper<GOAKContext::InputPtr>>(input).get();
+}
+
+GOAKContext::OutputPtr& GOAKContext::out(int output) {
+    return m_outputs.at(output);
+}
+
+namespace detail {
+template<class T> struct get_in;
+template<> struct get_in<cv::GFrame> {
+    static GOAKContext::InputPtr& get(GOAKContext &ctx, int idx) { return ctx.in(idx); }
+};
+template<class T> struct get_in {
+    static T get(GOAKContext &ctx, int idx) { return ctx.inArg<T>(idx); }
+};
+// FIXME: add support of other types
+
+template<class T> struct get_out;
+template<> struct get_out<cv::GFrame> {
+    static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); }
+};
+template<typename U> struct get_out<cv::GArray<U>> {
+    static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); }
+};
+// FIXME: add support of other types
+
+struct OAKKernelParams {
+    const std::unique_ptr<dai::Pipeline>& pipeline;
+    const cv::Size& camera_size;
+    std::vector<std::pair<std::string, dai::Buffer>>& m_in_queues;
+};
+
+template<typename, typename, typename>
+struct OAKCallHelper;
+
+template<typename Impl, typename... Ins, typename... Outs>
+struct OAKCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> > {
+    template<int... IIs, int... OIs>
+    static std::shared_ptr<dai::Node> construct_impl(  GOAKContext &ctx
+                                                     , std::vector<std::pair<std::string,
+                                                                             dai::Buffer>>& in_queues_params
+                                                     , cv::detail::Seq<IIs...>
+                                                     , cv::detail::Seq<OIs...>) {
+        return Impl::put(OAKKernelParams{ctx.pipeline(),
+                                         ctx.camera_size(),
+                                         in_queues_params},
+                         get_in<Ins>::get(ctx, IIs)...,
+                         get_out<Outs>::get(ctx, OIs)...);
+    }
+
+    static std::shared_ptr<dai::Node> construct(GOAKContext &ctx,
+                                                std::vector<std::pair<std::string,
+                                                                      dai::Buffer>>& in_queues_params) {
+        return construct_impl(ctx,
+                              in_queues_params,
+                              typename cv::detail::MkSeq<sizeof...(Ins)>::type(),
+                              typename cv::detail::MkSeq<sizeof...(Outs)>::type());
+    }
+};
+
+} // namespace detail
+
+struct GOAKKernel {
+    using F = std::function<std::shared_ptr<dai::Node>(GOAKContext&,
+                                                       std::vector<std::pair<std::string, dai::Buffer>>&)>;
+    explicit GOAKKernel(const F& f) : m_put_f(f) {}
+    const F m_put_f;
+};
+
+struct OAKComponent
+{
+    static const char *name() { return "OAK Component"; }
+    GOAKKernel k;
+};
+
+}} // namespace gimpl // namespace cv
+
+using OAKGraph = ade::TypedGraph
+    < cv::gimpl::OAKComponent
+    // FIXME: extend
+    >;
+
+using ConstOAKGraph = ade::ConstTypedGraph
+    < cv::gimpl::OAKComponent
+    // FIXME: extend
+    >;
+
+// This function links dai operation nodes - parent's output to child's input.
+// It utilizes G-API graph to search for operation's node it's previous operation in graph
+// when links them in dai graph.
+void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle)
+{
+    ade::NodeHandle parent;
+    for (const auto& data_nh : handle.get()->inNodes()) {
+        // Data node has only 1 input
+        GAPI_Assert(data_nh.get()->inNodes().size() == 1);
+        parent = data_nh.get()->inNodes().front();
+
+        // Assuming that OAK nodes are aligned for linking.
+        // FIXME: potential rework might be needed then
+        //        counterexample is found.
+        GAPI_Assert(m_oak_nodes.at(handle).inputs.size() ==
+                    m_oak_nodes.at(parent).outputs.size() &&
+                    "Internal OAK nodes are not aligned for linking");
+        for (auto && it : ade::util::zip(ade::util::toRange(m_oak_nodes.at(parent).outputs),
+                                         ade::util::toRange(m_oak_nodes.at(handle).inputs)))
+        {
+            auto &out = std::get<0>(it);
+            auto &in = std::get<1>(it);
+            out->link(*in);
+        }
+    }
+}
+
+cv::GArg
+cv::gimpl::GOAKExecutable::packInArg(const GArg &arg,
+                                     std::vector<ExtractTypeHelper::InputPtr>& oak_ins) {
+    if (arg.kind != cv::detail::ArgKind::GOBJREF) {
+        GAPI_Assert(   arg.kind != cv::detail::ArgKind::GMAT
+                    && arg.kind != cv::detail::ArgKind::GSCALAR
+                    && arg.kind != cv::detail::ArgKind::GARRAY
+                    && arg.kind != cv::detail::ArgKind::GOPAQUE
+                    && arg.kind != cv::detail::ArgKind::GFRAME);
+        // All other cases - pass as-is, with no transformations to
+        // GArg contents.
+        return const_cast<cv::GArg&>(arg);
+    }
+    const cv::gimpl::RcDesc &ref = arg.get<cv::gimpl::RcDesc>();
+    switch (ref.shape) {
+    case GShape::GFRAME:
+        oak_ins.push_back(nullptr);
+        return GArg(std::reference_wrapper<ExtractTypeHelper::InputPtr>(oak_ins.back()));
+        break;
+    default:
+        util::throw_error(std::logic_error("Unsupported GShape type in OAK backend"));
+        break;
+    }
+}
+
+void cv::gimpl::GOAKExecutable::packOutArg(const RcDesc &rc,
+                                           std::vector<ExtractTypeHelper::OutputPtr>& oak_outs) {
+    switch (rc.shape) {
+    case GShape::GFRAME:
+        oak_outs.push_back(nullptr);
+        break;
+    case GShape::GARRAY:
+        oak_outs.push_back(nullptr);
+        break;
+    default:
+        util::throw_error(std::logic_error("Unsupported GShape type in OAK backend"));
+        break;
+    }
+}
+
+cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g,
+                                          const cv::GCompileArgs &args,
+                                          const std::vector<ade::NodeHandle>& nodes,
+                                          const std::vector<cv::gimpl::Data>& ins_data,
+                                          const std::vector<cv::gimpl::Data>& outs_data)
+    : m_g(g), m_gm(m_g), m_args(args),
+      m_device(nullptr), m_pipeline(new dai::Pipeline)
+    {
+        // FIXME: currently OAK backend only works with camera as input,
+        //        so it must be a single object
+        GAPI_Assert(ins_data.size() == 1);
+
+        // Check that there is only one OAK island in graph since there
+        // can only be one instance of dai::Pipeline in the application
+        auto isl_graph = m_gm.metadata().get<IslandModel>().model;
+        GIslandModel::Graph gim(*isl_graph);
+        size_t oak_islands = 0;
+
+        for (const auto& nh : gim.nodes())
+        {
+            if (gim.metadata(nh).get<NodeKind>().k == NodeKind::ISLAND)
+            {
+                const auto isl = gim.metadata(nh).get<FusedIsland>().object;
+                if (isl->backend() == cv::gapi::oak::backend())
+                {
+                    ++oak_islands;
+                }
+                if (oak_islands > 1) {
+                    util::throw_error
+                        (std::logic_error
+                            ("There can only be one OAK island in graph"));
+                }
+            }
+        }
+
+        // FIXME: change the hard-coded behavior (XLinkIn path)
+        auto camRgb = m_pipeline->create<dai::node::ColorCamera>();
+        // FIXME: extract camera compile arguments here and properly convert them for dai
+        camRgb->setBoardSocket(dai::CameraBoardSocket::RGB);
+        camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
+
+        // Set camera output. Fixme: consider working with other camera outputs
+        m_camera_input = camRgb;
+        // FIXME: change when other camera censors are introduced
+        std::tuple<int, int> video_size = camRgb->getVideoSize();
+        m_camera_size = cv::Size{std::get<0>(video_size), std::get<1>(video_size)};
+
+        // Prepare XLinkOut nodes for each output object in graph
+        for (size_t i = 0; i < outs_data.size(); ++i) {
+            auto xout = m_pipeline->create<dai::node::XLinkOut>();
+            std::string xout_name = "xout" + std::to_string(i);
+            xout->setStreamName(xout_name);
+            m_out_queues.push_back({xout, nullptr, xout_name});
+        }
+
+        // Create OAK node for each node in this backend
+        for (const auto& nh : nodes) {
+            if (m_gm.metadata(nh).get<NodeType>().t == NodeType::OP) {
+                const auto& op = m_gm.metadata(nh).get<Op>();
+                const auto &u = ConstOAKGraph(m_g).metadata(nh).get<OAKComponent>();
+                // pass kernel input args and compile args to prepare OAK node and
+                // store it to link later
+                m_oak_nodes[nh] = {};
+                m_oak_nodes.at(nh).inputs.reserve(op.args.size());
+                m_oak_nodes.at(nh).outputs.reserve(op.outs.size());
+
+                std::vector<cv::GArg> in_ctx_args;
+                in_ctx_args.reserve(op.args.size());
+                for (auto &op_arg : op.args) in_ctx_args.push_back(packInArg(op_arg,
+                                                                           m_oak_nodes.at(nh).inputs));
+                for (auto &&op_out : op.outs) packOutArg(op_out, m_oak_nodes.at(nh).outputs);
+                GAPI_Assert(!m_oak_nodes.at(nh).inputs.empty());
+                GAPI_Assert(!m_oak_nodes.at(nh).outputs.empty());
+
+                GOAKContext ctx(m_pipeline, m_camera_size, in_ctx_args, m_oak_nodes.at(nh).outputs);
+                m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues);
+                GAPI_Assert(m_oak_nodes.at(nh).node != nullptr);
+
+                // Check that all inputs and outputs are properly filled after constructing kernels
+                // to then link it together
+                // FIXME: add more logging
+                const auto& node = m_oak_nodes.at(nh);
+                if (std::any_of(node.inputs.cbegin(), node.inputs.cend(),
+                                [](ExtractTypeHelper::InputPtr ptr) {
+                        return ptr == nullptr;
+                    })) {
+                    GAPI_Assert(false && "DAI input are not set");
+                }
+                if (std::any_of(node.outputs.cbegin(), node.outputs.cend(),
+                                [](ExtractTypeHelper::OutputPtr ptr) {
+                        return ptr == nullptr;
+                    })) {
+                    GAPI_Assert(false && "DAI outputs are not set");
+                }
+            }
+        }
+
+        // Prepare nodes for linking
+        std::unordered_set<ade::NodeHandle,
+                           ade::HandleHasher<ade::Node>> in_nodes;
+        std::unordered_set<ade::NodeHandle,
+                           ade::HandleHasher<ade::Node>> out_nodes;
+        std::unordered_set<ade::NodeHandle,
+                           ade::HandleHasher<ade::Node>> inter_nodes;
+
+        // TODO: optimize this loop
+        for (const auto& node : m_oak_nodes) {
+            auto nh = node.first;
+            // Fill input op nodes
+            for (const auto& d : ins_data) {
+                for (const auto& indata : nh.get()->inNodes()) {
+                    auto rc = m_gm.metadata(indata).get<cv::gimpl::Data>().rc;
+                    if (rc == d.rc) {
+                        in_nodes.insert(nh);
+                    }
+                }
+            }
+            // Fill output op nodes
+            for (const auto& d : outs_data) {
+                for (const auto& outdata : nh.get()->outNodes()) {
+                    auto rc = m_gm.metadata(outdata).get<cv::gimpl::Data>().rc;
+                    if (rc == d.rc) {
+                        out_nodes.insert(nh);
+                    }
+                }
+            }
+            // Fill internal op nodes
+            if (in_nodes.find(nh) == in_nodes.end() &&
+                out_nodes.find(nh) == in_nodes.end()) {
+                inter_nodes.insert(nh);
+            }
+        }
+
+        // Properly link all nodes
+        // 1. Link input nodes to camera
+        for (const auto& nh : in_nodes) {
+            GAPI_Assert(m_oak_nodes.at(nh).inputs.size() == 1);
+            // FIXME: covert other camera outputs
+            m_camera_input->video.link(*(m_oak_nodes.at(nh).inputs[0]));
+        }
+
+        // 2. Link output nodes to XLinkOut nodes
+        size_t out_counter = 0;
+        for (const auto& nh : out_nodes) {
+            GAPI_Assert(out_counter + m_oak_nodes.at(nh).outputs.size() <= m_out_queues.size());
+            for (const auto& out : m_oak_nodes.at(nh).outputs) {
+                out->link(m_out_queues[out_counter++].xlink_output->input);
+            }
+            // Input nodes in OAK doesn't have parent operation - just camera (for now)
+            if (in_nodes.find(nh) == in_nodes.end()) {
+                LinkToParents(nh);
+            }
+        }
+
+        // 3. Link internal nodes to their parents
+        for (const auto& nh : inter_nodes) {
+            // Input nodes in OAK doesn't have parent operation - just camera (for now)
+            if (in_nodes.find(nh) == in_nodes.end()) {
+                LinkToParents(nh);
+            }
+        }
+
+        m_device = std::unique_ptr<dai::Device>(new dai::Device(*m_pipeline));
+
+        // Prepare OAK output queues
+        GAPI_Assert(m_out_queues.size() == outs_data.size());
+        for (const auto out_it : ade::util::indexed(outs_data))
+        {
+            auto& q = m_out_queues[ade::util::index(out_it)];
+            GAPI_Assert(q.out_queue == nullptr); // shouldn't be not filled till this point
+            // FIXME: add queue parameters
+            // Currently: 30 - max DAI queue capacity, true - blocking queue
+            q.out_queue = m_device->getOutputQueue(q.out_queue_name, 30, true);
+        }
+    }
+
+void cv::gimpl::GOAKExecutable::handleNewStream() {
+    // do nothing
+}
+
+void cv::gimpl::GOAKExecutable::handleStopStream() {
+    // do nothing
+}
+
+void cv::gimpl::GOAKExecutable::run(GIslandExecutable::IInput  &in,
+                                    GIslandExecutable::IOutput &out) {
+    const auto in_msg = in.get();
+
+    if (cv::util::holds_alternative<cv::gimpl::EndOfStream>(in_msg)) {
+        out.post(cv::gimpl::EndOfStream{});
+        return;
+    }
+
+    for (const auto& in_q : m_in_queues) {
+        auto q = m_device->getInputQueue(in_q.first);
+        q->send(in_q.second);
+    }
+
+    for (size_t i = 0; i < m_out_queues.size(); ++i) {
+        auto q = m_out_queues[i].out_queue;
+        // TODO: support other DAI types if needed
+        // Note: we utilize getData() method that returns std::vector of data
+        //       on which we gain ownership
+        auto oak_frame = q->get<dai::ImgFrame>();
+
+        auto out_arg = out.get(i);
+
+        switch(out_arg.index()) {
+        case cv::GRunArgP::index_of<cv::MediaFrame*>():
+            // FIXME: hard-coded NV12
+            *cv::util::get<cv::MediaFrame*>(out_arg) =
+                    cv::MediaFrame::Create<cv::gapi::oak::OAKMediaAdapter>(
+                            cv::Size(static_cast<int>(oak_frame->getWidth()),
+                                     static_cast<int>(oak_frame->getHeight())),
+                            cv::MediaFormat::NV12,
+                            std::move(oak_frame->getData()));
+            break;
+        case cv::GRunArgP::index_of<cv::detail::VectorRef>():
+            cv::util::get<cv::detail::VectorRef>(out_arg).wref<uint8_t>() = std::move(oak_frame->getData());
+            break;
+        // FIXME: Add support for remaining types
+        default:
+            GAPI_Assert(false && "Unsupported type in OAK backend");
+        }
+
+        using namespace cv::gapi::streaming::meta_tag;
+        cv::GRunArg::Meta meta;
+        meta[timestamp] = oak_frame->getTimestamp();
+        meta[seq_id]    = oak_frame->getSequenceNum();
+
+        out.meta(out_arg, meta);
+        out.post(std::move(out_arg));
+    }
+}
+
+// Built-in kernels for OAK /////////////////////////////////////////////////////
+
+class GOAKBackendImpl final : public cv::gapi::GBackend::Priv {
+    virtual void unpackKernel(ade::Graph            &graph,
+                              const ade::NodeHandle &op_node,
+                              const cv::GKernelImpl &impl) override {
+        OAKGraph gm(graph);
+
+        const auto &kimpl  = cv::util::any_cast<cv::gimpl::GOAKKernel>(impl.opaque);
+        gm.metadata(op_node).set(cv::gimpl::OAKComponent{kimpl});
+    }
+
+    virtual EPtr compile(const ade::Graph &graph,
+                         const cv::GCompileArgs &args,
+                         const std::vector<ade::NodeHandle> &nodes,
+                         const std::vector<cv::gimpl::Data>& ins_data,
+                         const std::vector<cv::gimpl::Data>& outs_data) const override {
+        cv::gimpl::GModel::ConstGraph gm(graph);
+        // FIXME: pass streaming/non-streaming option to support non-camera case
+        // NB: how could we have non-OAK source in streaming mode, then OAK backend in
+        //     streaming mode but without camera input?
+        if (!gm.metadata().contains<cv::gimpl::Streaming>()) {
+            GAPI_Assert(false && "OAK backend only supports Streaming mode for now");
+        }
+        return EPtr{new cv::gimpl::GOAKExecutable(graph, args, nodes, ins_data, outs_data)};
+    }
+};
+
+cv::gapi::GBackend cv::gapi::oak::backend() {
+    static cv::gapi::GBackend this_backend(std::make_shared<GOAKBackendImpl>());
+    return this_backend;
+}
+
+namespace cv {
+namespace gimpl {
+namespace oak {
+
+namespace {
+static dai::VideoEncoderProperties::Profile convertEncProfile(cv::gapi::oak::EncoderConfig::Profile pf) {
+    switch (pf) {
+        case cv::gapi::oak::EncoderConfig::Profile::H264_BASELINE:
+            return dai::VideoEncoderProperties::Profile::H264_BASELINE;
+        case cv::gapi::oak::EncoderConfig::Profile::H264_HIGH:
+            return dai::VideoEncoderProperties::Profile::H264_HIGH;
+        case cv::gapi::oak::EncoderConfig::Profile::H264_MAIN:
+            return dai::VideoEncoderProperties::Profile::H264_MAIN;
+        case cv::gapi::oak::EncoderConfig::Profile::H265_MAIN:
+            return dai::VideoEncoderProperties::Profile::H265_MAIN;
+        case cv::gapi::oak::EncoderConfig::Profile::MJPEG:
+            return dai::VideoEncoderProperties::Profile::MJPEG;
+        default:
+            // basically unreachable
+            GAPI_Assert("Unsupported encoder profile");
+            return {};
+    }
+}
+} // anonymous namespace
+
+// Kernels ///////////////////////////////////////////////////////////////
+
+template<class Impl, class K>
+class GOAKKernelImpl: public detail::OAKCallHelper<Impl, typename K::InArgs, typename K::OutArgs>
+                    , public cv::detail::KernelTag {
+    using P = detail::OAKCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+public:
+    using API = K;
+    static cv::gapi::GBackend   backend() { return cv::gapi::oak::backend();  }
+    static GOAKKernel kernel()  { return GOAKKernel(&P::construct); }
+};
+
+#define GAPI_OAK_KERNEL(Name, API) \
+    struct Name: public cv::gimpl::oak::GOAKKernelImpl<Name, API>
+
+namespace {
+GAPI_OAK_KERNEL(GOAKEncFrame, cv::gapi::oak::GEncFrame) {
+    static std::shared_ptr<dai::Node> put(const cv::gimpl::detail::OAKKernelParams& params,
+                                          GOAKContext::InputPtr& in,
+                                          const cv::gapi::oak::EncoderConfig& cfg,
+                                          GOAKContext::OutputPtr& out) {
+        auto videoEnc = params.pipeline->create<dai::node::VideoEncoder>();
+
+        // FIXME: convert all the parameters to dai
+        videoEnc->setDefaultProfilePreset(cfg.width, cfg.height,
+                                          cfg.frameRate,
+                                          convertEncProfile(cfg.profile));
+
+        in = &(videoEnc->input);
+        out = &(videoEnc->bitstream);
+
+        return videoEnc;
+    }
+};
+
+GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) {
+    static std::shared_ptr<dai::Node> put(const cv::gimpl::detail::OAKKernelParams& params,
+                                          GOAKContext::InputPtr& in,
+                                          const cv::Mat& hk,
+                                          const cv::Mat& vk,
+                                          GOAKContext::OutputPtr& out) {
+        auto edgeDetector = params.pipeline->create<dai::node::EdgeDetector>();
+
+        edgeDetector->setMaxOutputFrameSize(params.camera_size.width * params.camera_size.height);
+
+        auto xinEdgeCfg = params.pipeline->create<dai::node::XLinkIn>();
+        xinEdgeCfg->setStreamName("sobel_cfg");
+
+        auto mat2vec = [&](cv::Mat m) {
+            std::vector<std::vector<int>> v(m.rows);
+            for (int i = 0; i < m.rows; ++i)
+            {
+                m.row(i).reshape(1,1).copyTo(v[i]);
+            }
+            return v;
+        };
+
+        dai::EdgeDetectorConfig cfg;
+        cfg.setSobelFilterKernels(mat2vec(hk), mat2vec(vk));
+
+        xinEdgeCfg->out.link(edgeDetector->inputConfig);
+
+        params.m_in_queues.push_back({"sobel_cfg", cfg});
+
+        in = &(edgeDetector->inputImage);
+        out = &(edgeDetector->outputImage);
+
+        return edgeDetector;
+    }
+};
+} // anonymous namespace
+} // namespace oak
+} // namespace gimpl
+} // namespace cv
+
+namespace cv {
+namespace gapi {
+namespace oak {
+
+cv::gapi::GKernelPackage kernels() {
+    return cv::gapi::kernels< cv::gimpl::oak::GOAKEncFrame
+                            , cv::gimpl::oak::GOAKSobelXY
+                            >();
+}
+
+} // namespace oak
+} // namespace gapi
+} // namespace cv
+
+#else
+
+namespace cv {
+namespace gapi {
+namespace oak {
+
+cv::gapi::GKernelPackage kernels();
+
+cv::gapi::GKernelPackage kernels() {
+    GAPI_Assert(false && "Built without OAK support");
+    return {};
+}
+
+} // namespace oak
+} // namespace gapi
+} // namespace cv
+
+#endif // HAVE_OAK
diff --git a/modules/gapi/src/backends/oak/oak_media_adapter.hpp b/modules/gapi/src/backends/oak/oak_media_adapter.hpp
new file mode 100644
index 0000000000..9c81f5a953
--- /dev/null
+++ b/modules/gapi/src/backends/oak/oak_media_adapter.hpp
@@ -0,0 +1,35 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#ifndef OPENCV_GAPI_OAK_MEDIA_ADAPTER_HPP
+#define OPENCV_GAPI_OAK_MEDIA_ADAPTER_HPP
+
+#include <memory>
+
+#include <opencv2/gapi/media.hpp>
+
+namespace cv {
+namespace gapi {
+namespace oak {
+
+class GAPI_EXPORTS OAKMediaAdapter final : public cv::MediaFrame::IAdapter {
+public:
+    OAKMediaAdapter() = default;
+    OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector<uint8_t>&& buffer);
+    cv::GFrameDesc meta() const override;
+    cv::MediaFrame::View access(cv::MediaFrame::Access) override;
+    ~OAKMediaAdapter() = default;
+private:
+    cv::Size m_sz;
+    cv::MediaFormat m_fmt;
+    std::vector<uint8_t> m_buffer;
+};
+
+} // namespace oak
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_OAK_MEDIA_ADAPTER_HPP
diff --git a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp
index 457f423f54..4bd2a10ea5 100644
--- a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp
+++ b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp
@@ -282,6 +282,23 @@ void GOCVBGR::Actor::extractRMat(const cv::MediaFrame& frame, cv::RMat& rmat)
             rmat = cv::make_rmat<cv::gimpl::RMatOnMat>(bgr);
             break;
         }
+        case cv::MediaFormat::GRAY:
+        {
+            std::call_once(m_warnFlag,
+                []() {
+                    GAPI_LOG_WARNING(NULL, "\nOn-the-fly conversion from GRAY to BGR will happen.\n"
+                        "Conversion may cost a lot for images with high resolution.\n"
+                        "To retrieve cv::Mat from GRAY cv::MediaFrame for free, you may use "
+                        "cv::gapi::streaming::Y.\n");
+                });
+            cv::Mat bgr;
+            auto view = frame.access(cv::MediaFrame::Access::R);
+            cv::Mat gray(desc.size, CV_8UC1, view.ptr[0], view.stride[0]);
+            cv::cvtColor(gray, bgr, cv::COLOR_GRAY2BGR);
+            rmat = cv::make_rmat<cv::gimpl::RMatOnMat>(bgr);
+            break;
+        }
+
         default:
             cv::util::throw_error(
                     std::logic_error("Unsupported MediaFormat for cv::gapi::streaming::BGR"));
@@ -339,6 +356,15 @@ void GOCVY::Actor::extractRMat(const cv::MediaFrame& frame, cv::RMat& rmat)
             });
             break;
         }
+        case cv::MediaFormat::GRAY:
+        {
+            rmat = cv::make_rmat<cv::gimpl::RMatMediaFrameAdapter>(frame,
+            [](const cv::GFrameDesc& d) { return cv::GMatDesc(CV_8U, 1, d.size); },
+            [](const cv::GFrameDesc& d, const cv::MediaFrame::View& v) {
+                return cv::Mat(d.size, CV_8UC1, v.ptr[0], v.stride[0]);
+            });
+            break;
+        }
         default:
             cv::util::throw_error(
                     std::logic_error("Unsupported MediaFormat for cv::gapi::streaming::Y"));
@@ -408,6 +434,12 @@ void GOCVUV::Actor::extractRMat(const cv::MediaFrame& frame, cv::RMat& rmat)
             });
             break;
         }
+        case cv::MediaFormat::GRAY:
+        {
+            cv::Mat uv(desc.size / 2, CV_8UC2, cv::Scalar::all(127));
+            rmat = cv::make_rmat<cv::gimpl::RMatOnMat>(uv);
+            break;
+        }
         default:
             cv::util::throw_error(
                     std::logic_error("Unsupported MediaFormat for cv::gapi::streaming::UV"));
diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp
index d15e17ea28..a3a2746acc 100644
--- a/modules/gapi/src/executor/gstreamingexecutor.cpp
+++ b/modules/gapi/src/executor/gstreamingexecutor.cpp
@@ -323,16 +323,40 @@ public:
 void rewindToStop(std::vector<Q*> &in_queues,
                   const std::size_t  this_id)
 {
-    for (auto &&qit : ade::util::indexed(in_queues))
-    {
-        auto id2 = ade::util::index(qit);
-        auto &q2 = ade::util::value(qit);
-        if (this_id == id2) continue;
+    size_t expected_stop_count = std::count_if(in_queues.begin(), in_queues.end(), [] (const Q* ptr) {
+        return ptr != nullptr;
+    });
 
-        Cmd cmd;
-        while (q2 && !cv::util::holds_alternative<Stop>(cmd))
-            q2->pop(cmd);
+    if (expected_stop_count > 0) {
+        // NB: it requires to substract own queues id from total waiting queue count
+        // because it had got stop message before rewind was called
+        expected_stop_count--;
     }
+    GAPI_LOG_DEBUG(nullptr, "id: " << this_id << ", queues count: " << in_queues.size() <<
+                            ", expected stop msg count: " << expected_stop_count);
+    size_t got_stop_count = 0;
+    while(got_stop_count < expected_stop_count) {
+        for (auto &&qit : ade::util::indexed(in_queues)) {
+            auto id2 = ade::util::index(qit);
+            auto &q2 = ade::util::value(qit);
+            if (this_id == id2) continue;
+
+            GAPI_LOG_DEBUG(nullptr, "drain next id: " << id2 <<
+                                    ", stop count (" << got_stop_count << "/" <<
+                                    expected_stop_count << ")");
+            bool got_cmd = true;
+            while (q2 && got_cmd) {
+                Cmd cmd;
+                got_cmd = q2->try_pop(cmd);
+                if (got_cmd && cv::util::holds_alternative<Stop>(cmd)) {
+                    got_stop_count ++;
+                    GAPI_LOG_DEBUG(nullptr, "got stop from id: " << id2);
+                    break;
+                }
+            }
+        }
+    }
+    GAPI_LOG_DEBUG(nullptr, "completed");
 }
 
 // This method handles a stop sign got from some input
diff --git a/modules/gapi/src/logger.hpp b/modules/gapi/src/logger.hpp
index cb169bf4be..7ac3c983fc 100644
--- a/modules/gapi/src/logger.hpp
+++ b/modules/gapi/src/logger.hpp
@@ -14,10 +14,12 @@
 #  define GAPI_LOG_INFO(tag, ...)    CV_LOG_INFO(tag, __VA_ARGS__)
 #  define GAPI_LOG_WARNING(tag, ...) CV_LOG_WARNING(tag, __VA_ARGS__)
 #  define GAPI_LOG_DEBUG(tag, ...)    CV_LOG_DEBUG(tag, __VA_ARGS__)
+#  define GAPI_LOG_FATAL(tag, ...)   CV_LOG_FATAL(tag, __VA_ARGS__)
 #else
 #  define GAPI_LOG_INFO(tag, ...)
 #  define GAPI_LOG_WARNING(tag, ...)
 #  define GAPI_LOG_DEBUG(tag, ...)
+#  define GAPI_LOG_FATAL(tag, ...)
 #endif //  !defined(GAPI_STANDALONE)
 
 
diff --git a/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp b/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp
index 9019289ae4..188f162ffd 100644
--- a/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp
+++ b/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp
@@ -28,13 +28,41 @@ GStreamerMediaAdapter::GStreamerMediaAdapter(const cv::GFrameDesc& frameDesc,
 
     GstVideoMeta* videoMeta = gst_buffer_get_video_meta(m_buffer);
     if (videoMeta != nullptr) {
-        m_strides = { videoMeta->stride[0], videoMeta->stride[1] };
-        m_offsets = { videoMeta->offset[0], videoMeta->offset[1] };
+        switch (m_frameDesc.fmt) {
+            case cv::MediaFormat::NV12: {
+                m_strides = { videoMeta->stride[0], videoMeta->stride[1] };
+                m_offsets = { videoMeta->offset[0], videoMeta->offset[1] };
+                break;
+            }
+            case cv::MediaFormat::GRAY: {
+                m_strides = { videoMeta->stride[0]};
+                m_offsets = { videoMeta->offset[0]};
+                break;
+            }
+            default: {
+                GAPI_Assert(false && "Non NV12 or GRAY Media format is not expected here");
+                break;
+            }
+        }
     } else {
-        m_strides = { GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 0),
-                      GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 1) };
-        m_offsets = { GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 0),
-                      GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 1) };
+        switch (m_frameDesc.fmt) {
+            case cv::MediaFormat::NV12: {
+                m_strides = { GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 0),
+                              GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 1) };
+                m_offsets = { GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 0),
+                              GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 1) };
+                break;
+            }
+            case cv::MediaFormat::GRAY: {
+                m_strides = { GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 0)};
+                m_offsets = { GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 0)};
+                break;
+            }
+            default: {
+                GAPI_Assert(false && "Non NV12 or GRAY Media format is not expected here");
+                break;
+            }
+        }
     }
 }
 
@@ -71,8 +99,10 @@ cv::MediaFrame::View GStreamerMediaAdapter::access(cv::MediaFrame::Access access
 
         if(!m_isMapped.load(std::memory_order_relaxed)) {
 
-            GAPI_Assert(GST_VIDEO_INFO_N_PLANES(m_videoInfo.get()) == 2);
-            GAPI_Assert(GST_VIDEO_INFO_FORMAT(m_videoInfo.get()) == GST_VIDEO_FORMAT_NV12);
+            GAPI_Assert(GST_VIDEO_INFO_N_PLANES(m_videoInfo.get()) == 2 ||
+                        GST_VIDEO_INFO_N_PLANES(m_videoInfo.get()) == 1);
+            GAPI_Assert(GST_VIDEO_INFO_FORMAT(m_videoInfo.get()) == GST_VIDEO_FORMAT_NV12 ||
+                        GST_VIDEO_INFO_FORMAT(m_videoInfo.get()) == GST_VIDEO_FORMAT_GRAY8);
 
             // TODO: Use RAII for map/unmap
             if (access == cv::MediaFrame::Access::W) {
@@ -85,27 +115,56 @@ cv::MediaFrame::View GStreamerMediaAdapter::access(cv::MediaFrame::Access access
             }
 
             GAPI_Assert(GST_VIDEO_FRAME_PLANE_STRIDE(&m_videoFrame, 0) == m_strides[0]);
-            GAPI_Assert(GST_VIDEO_FRAME_PLANE_STRIDE(&m_videoFrame, 1) == m_strides[1]);
             GAPI_Assert(GST_VIDEO_FRAME_PLANE_OFFSET(&m_videoFrame, 0) == m_offsets[0]);
-            GAPI_Assert(GST_VIDEO_FRAME_PLANE_OFFSET(&m_videoFrame, 1) == m_offsets[1]);
+            if (m_frameDesc.fmt == cv::MediaFormat::NV12) {
+                GAPI_Assert(GST_VIDEO_FRAME_PLANE_STRIDE(&m_videoFrame, 1) == m_strides[1]);
+                GAPI_Assert(GST_VIDEO_FRAME_PLANE_OFFSET(&m_videoFrame, 1) == m_offsets[1]);
+            }
 
             m_isMapped.store(true, std::memory_order_release);
         }
     }
 
-    cv::MediaFrame::View::Ptrs ps {
-        static_cast<uint8_t*>(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[0], // Y-plane
-        static_cast<uint8_t*>(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[1], // UV-plane
-        nullptr,
-        nullptr
-    };
+    cv::MediaFrame::View::Ptrs ps;
+    cv::MediaFrame::View::Strides ss;
+
+    switch (m_frameDesc.fmt) {
+        case cv::MediaFormat::NV12: {
+            ps = {
+                static_cast<uint8_t*>(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[0], // Y-plane
+                static_cast<uint8_t*>(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[1], // UV-plane
+                nullptr,
+                nullptr
+            };
+            ss = {
+                static_cast<std::size_t>(m_strides[0]), // Y-plane stride
+                static_cast<std::size_t>(m_strides[1]), // UV-plane stride
+                0u,
+                0u
+            };
+            break;
+        }
+        case cv::MediaFormat::GRAY: {
+            ps = {
+                static_cast<uint8_t*>(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[0], // Y-plane
+                nullptr,
+                nullptr,
+                nullptr
+            };
+            ss = {
+                static_cast<std::size_t>(m_strides[0]), // Y-plane stride
+                0u,
+                0u,
+                0u
+            };
+            break;
+        }
+        default: {
+            GAPI_Assert(false && "Non NV12 or GRAY Media format is not expected here");
+            break;
+        }
+    }
 
-    cv::MediaFrame::View::Strides ss = {
-        static_cast<std::size_t>(m_strides[0]), // Y-plane stride
-        static_cast<std::size_t>(m_strides[1]), // UV-plane stride
-        0u,
-        0u
-    };
 
     --thread_counters;
     return cv::MediaFrame::View(std::move(ps), std::move(ss));
diff --git a/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp b/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp
index 661125657c..f1bd438ce2 100644
--- a/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp
+++ b/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp
@@ -30,8 +30,9 @@ namespace gst {
 
 #ifdef HAVE_GSTREAMER
 
-constexpr char NV12_CAPS_STRING[] =
-    "video/x-raw,format=NV12;video/x-raw(memory:DMABuf),format=NV12";
+constexpr char ALLOWED_CAPS_STRING[] =
+    "video/x-raw,format=(string){NV12, GRAY8};video/x-raw(memory:DMABuf),format=(string){NV12, GRAY8}";
+
 
 namespace {
 GstPadProbeReturn appsinkQueryCallback(GstPad*, GstPadProbeInfo* info, gpointer)
@@ -137,17 +138,17 @@ void GStreamerSource::Priv::configureAppsink() {
     // Do not emit signals: all calls will be synchronous and blocking.
     gst_app_sink_set_emit_signals(GST_APP_SINK(m_appsink.get()), FALSE);
 
-    GStreamerPtr<GstCaps> nv12Caps(gst_caps_from_string(NV12_CAPS_STRING));
+    GStreamerPtr<GstCaps> gstCaps(gst_caps_from_string(ALLOWED_CAPS_STRING));
 
     GStreamerPtr<GstPad> appsinkPad(gst_element_get_static_pad(m_appsink, "sink"));
     GStreamerPtr<GstCaps> peerCaps(gst_pad_peer_query_caps(appsinkPad, NULL));
-    if (!gst_caps_can_intersect(peerCaps, nv12Caps)) {
+    if (!gst_caps_can_intersect(peerCaps, gstCaps)) {
         cv::util::throw_error(
-            std::logic_error("appsink element can only consume video-frame in NV12 format in "
+            std::logic_error("appsink element can only consume video-frame in NV12 or GRAY8 format in "
                              "GStreamerSource"));
     }
 
-    gst_app_sink_set_caps(GST_APP_SINK(m_appsink.get()), nv12Caps);
+    gst_app_sink_set_caps(GST_APP_SINK(m_appsink.get()), gstCaps);
 
     gst_pad_add_probe(appsinkPad, GST_PAD_PROBE_TYPE_QUERY_DOWNSTREAM, appsinkQueryCallback,
                       NULL, NULL);
@@ -184,10 +185,29 @@ void GStreamerSource::Priv::prepareVideoMeta()
             cv::util::throw_error(std::logic_error("Cannot query video width/height."));
         }
 
+        // Fill GstVideoInfo structure to work further with GstVideoFrame class.
+        if (!gst_video_info_from_caps(&m_videoInfo, prerollCaps)) {
+            cv::util::throw_error(std::logic_error("preroll sample has invalid caps."));
+        }
+        m_type = GST_VIDEO_INFO_FORMAT(&m_videoInfo);
         switch(m_outputType) {
             case GStreamerSource::OutputType::FRAME: {
                 // Construct metadata for media frame.
-                m_mediaFrameMeta = GFrameDesc { cv::MediaFormat::NV12, cv::Size(width, height) };
+                switch (m_type) {
+                    case GST_VIDEO_FORMAT_NV12: {
+                        m_mediaFrameMeta = GFrameDesc{ cv::MediaFormat::NV12, cv::Size(width, height) };
+                        GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 2);
+                        break;
+                    }
+                    case GST_VIDEO_FORMAT_GRAY8: {
+                        m_mediaFrameMeta = GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(width, height) };
+                        GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 1);
+                        break;
+                    }
+                    default: {
+                        GAPI_Assert(false && "Unsupported GStreamerSource FRAME type.");
+                    }
+                }
                 break;
             }
             case GStreamerSource::OutputType::MAT: {
@@ -197,13 +217,6 @@ void GStreamerSource::Priv::prepareVideoMeta()
             }
         }
 
-        // Fill GstVideoInfo structure to work further with GstVideoFrame class.
-        if (!gst_video_info_from_caps(&m_videoInfo, prerollCaps)) {
-            cv::util::throw_error(std::logic_error("preroll sample has invalid caps."));
-        }
-        GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 2);
-        GAPI_Assert(GST_VIDEO_INFO_FORMAT(&m_videoInfo) == GST_VIDEO_FORMAT_NV12);
-
         m_isMetaPrepared = true;
     }
 }
@@ -272,28 +285,46 @@ bool GStreamerSource::Priv::retrieveFrame(cv::Mat& data)
 
     try
     {
-        // m_matMeta holds width and height for 8U BGR frame, but actual
-        // frame m_buffer we request from GStreamer pipeline has 8U NV12 format.
-        // Constructing y and uv cv::Mat-s from such a m_buffer:
-        GAPI_Assert((uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 1) ==
+        switch (m_type) {
+            case GST_VIDEO_FORMAT_NV12: {
+                // m_matMeta holds width and height for 8U BGR frame, but actual
+                // frame m_buffer we request from GStreamer pipeline has 8U NV12 format.
+                // Constructing y and uv cv::Mat-s from such a m_buffer:
+                GAPI_Assert((uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 1) ==
                     (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) +
                     GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 1));
+                GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 2);
 
-        cv::Mat y(m_matMeta.size, CV_8UC1,
-                  (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) +
-                  GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 0),
-                  GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 0));
-        cv::Mat uv(m_matMeta.size / 2, CV_8UC2,
-                   (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) +
-                   GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 1),
-                   GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 1));
+                cv::Mat y(m_matMeta.size, CV_8UC1,
+                    (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) +
+                    GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 0),
+                    GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 0));
+                cv::Mat uv(m_matMeta.size / 2, CV_8UC2,
+                    (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) +
+                    GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 1),
+                    GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 1));
 
-        cv::cvtColorTwoPlane(y, uv, data, cv::COLOR_YUV2BGR_NV12);
+                cv::cvtColorTwoPlane(y, uv, data, cv::COLOR_YUV2BGR_NV12);
+                break;
+            }
+            case GST_VIDEO_FORMAT_GRAY8: {
+                GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 1);
+                cv::Mat y(m_matMeta.size, CV_8UC1,
+                    (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) +
+                    GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 0),
+                    GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 0));
+                cv::cvtColor(y, data, cv::COLOR_GRAY2BGR);
+                break;
+            }
+            default: {
+                GAPI_Assert(false && "retrieveFrame - unsupported GStreamerSource FRAME type.");
+            }
+        }
     }
     catch (...)
     {
         gst_video_frame_unmap(&videoFrame);
-        cv::util::throw_error(std::runtime_error("NV12 buffer conversion to BGR is failed!"));
+        cv::util::throw_error(std::runtime_error("NV12 or GRAY8 buffer conversion to BGR is failed!"));
     }
     gst_video_frame_unmap(&videoFrame);
 
diff --git a/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp b/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp
index b0940c48a3..0671213197 100644
--- a/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp
+++ b/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp
@@ -59,6 +59,7 @@ protected:
     bool m_isPipelinePlaying = false;
 
     int64_t m_frameId = 0L;
+    size_t m_type = 0; //Gstreamer video format type
 
 protected:
     void configureAppsink();
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp
index 2cdf1c2b44..ad0e5bf667 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp
@@ -210,30 +210,29 @@ VPLCPUAccelerationPolicy::create_surface_pool(size_t pool_size, size_t surface_s
 }
 
 VPLCPUAccelerationPolicy::pool_key_t
-VPLCPUAccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) {
+VPLCPUAccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxFrameInfo& info) {
 
     // External (application) allocation of decode surfaces
     GAPI_LOG_DEBUG(nullptr, "Query mfxFrameAllocRequest.NumFrameSuggested: " << alloc_request.NumFrameSuggested <<
                             ", mfxFrameAllocRequest.Type: " << alloc_request.Type);
 
-    mfxU32 singleSurfaceSize = utils::GetSurfaceSize_(param.mfx.FrameInfo.FourCC,
-                                                      param.mfx.FrameInfo.Width,
-                                                      param.mfx.FrameInfo.Height);
+    mfxU32 singleSurfaceSize = utils::GetSurfaceSize_(info.FourCC,
+                                                      info.Width,
+                                                      info.Height);
     if (!singleSurfaceSize) {
         throw std::runtime_error("Cannot determine surface size for: fourCC: " +
-                                 std::to_string(param.mfx.FrameInfo.FourCC) +
-                                 ", width: " + std::to_string(param.mfx.FrameInfo.Width) +
-                                 ", height: " + std::to_string(param.mfx.FrameInfo.Height));
+                                 std::to_string(info.FourCC) +
+                                 ", width: " + std::to_string(info.Width) +
+                                 ", height: " + std::to_string(info.Height));
     }
 
-    const auto &frameInfo = param.mfx.FrameInfo;
     auto surface_creator =
-            [&frameInfo] (std::shared_ptr<void> out_buf_ptr, size_t out_buf_ptr_offset,
+            [&info] (std::shared_ptr<void> out_buf_ptr, size_t out_buf_ptr_offset,
                           size_t out_buf_size) -> surface_ptr_t {
-                return (frameInfo.FourCC == MFX_FOURCC_RGB4) ?
-                        utils::create_surface_RGB4_(frameInfo, out_buf_ptr, out_buf_ptr_offset,
+                return (info.FourCC == MFX_FOURCC_RGB4) ?
+                        utils::create_surface_RGB4_(info, out_buf_ptr, out_buf_ptr_offset,
                                                     out_buf_size) :
-                        utils::create_surface_other_(frameInfo, out_buf_ptr, out_buf_ptr_offset,
+                        utils::create_surface_other_(info, out_buf_ptr, out_buf_ptr_offset,
                                                      out_buf_size);};
 
     return create_surface_pool(alloc_request.NumFrameSuggested,
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp
index fdc0afd4bf..8a2061dce0 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp
@@ -32,7 +32,7 @@ struct GAPI_EXPORTS VPLCPUAccelerationPolicy final : public VPLAccelerationPolic
     void init(session_t session) override;
     void deinit(session_t session) override;
     pool_key_t create_surface_pool(size_t pool_size, size_t surface_size_bytes, surface_ptr_ctr_t creator);
-    pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) override;
+    pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxFrameInfo& info) override;
     surface_weak_ptr_t get_free_surface(pool_key_t key) override;
     size_t get_free_surface_count(pool_key_t key) const override;
     size_t get_surface_count(pool_key_t key) const override;
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp
index f528190ad5..02720f3774 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp
@@ -98,9 +98,7 @@ void VPLDX11AccelerationPolicy::deinit(session_t session) {
 
 VPLDX11AccelerationPolicy::pool_key_t
 VPLDX11AccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_req,
-                                               mfxVideoParam& param) {
-    param.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY;
-
+                                               mfxFrameInfo& info) {
     // allocate textures by explicit request
     mfxFrameAllocResponse mfxResponse;
     mfxStatus sts = on_alloc(&alloc_req, &mfxResponse);
@@ -120,7 +118,7 @@ VPLDX11AccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc
     pool_t pool(numSurfaces);
     for (int i = 0; i < numSurfaces; i++) {
         std::unique_ptr<mfxFrameSurface1> handle(new mfxFrameSurface1 {});
-        handle->Info = param.mfx.FrameInfo;
+        handle->Info = info;
         handle->Data.MemId = mfxResponse.mids[i];
 
         pool.push_back(Surface::create_surface(std::move(handle), table_it->second));
@@ -261,24 +259,54 @@ mfxStatus VPLDX11AccelerationPolicy::on_alloc(const mfxFrameAllocRequest *reques
     desc.Format = colorFormat;
     desc.SampleDesc.Count = 1;
     desc.Usage = D3D11_USAGE_DEFAULT;
-    desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED;
+    desc.MiscFlags = 0;
     desc.BindFlags = D3D11_BIND_DECODER;
 
+    if ((MFX_MEMTYPE_FROM_VPPIN & request->Type) && (DXGI_FORMAT_YUY2 == desc.Format) ||
+        (DXGI_FORMAT_B8G8R8A8_UNORM == desc.Format) ||
+        (DXGI_FORMAT_R10G10B10A2_UNORM == desc.Format) ||
+        (DXGI_FORMAT_R16G16B16A16_UNORM == desc.Format)) {
+        desc.BindFlags = D3D11_BIND_RENDER_TARGET;
+    }
+
+    if ((MFX_MEMTYPE_FROM_VPPOUT & request->Type) ||
+        (MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET & request->Type)) {
+        desc.BindFlags = D3D11_BIND_RENDER_TARGET;
+    }
+
     if (request->Type & MFX_MEMTYPE_SHARED_RESOURCE) {
         desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE;
         desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED;
     }
 
-    ComPtrGuard<ID3D11Texture2D> main_texture = createCOMPtrGuard<ID3D11Texture2D>();
+    if (DXGI_FORMAT_P8 == desc.Format) {
+        desc.BindFlags = 0;
+    }
+
+    size_t main_textures_count = 1;
+    if (D3D11_BIND_RENDER_TARGET & desc.BindFlags) {
+        GAPI_LOG_DEBUG(nullptr, "Use array of testures instead of texture array");
+        desc.ArraySize = 1;
+        main_textures_count = request->NumFrameSuggested;
+    }
+
+    // create GPU textures
     HRESULT err = S_OK;
-    {
-        ID3D11Texture2D *pTexture2D = nullptr;
-        err = hw_handle->CreateTexture2D(&desc, nullptr, &pTexture2D);
-        if (FAILED(err)) {
-            GAPI_LOG_WARNING(nullptr, "Cannot create texture, error: " + std::to_string(HRESULT_CODE(err)));
-            return MFX_ERR_MEMORY_ALLOC;
+    std::vector<ComPtrGuard<ID3D11Texture2D>> main_textures;
+    main_textures.reserve(main_textures_count);
+    for (size_t i = 0; i < main_textures_count; i++) {
+        ComPtrGuard<ID3D11Texture2D> main_texture = createCOMPtrGuard<ID3D11Texture2D>();
+        {
+            ID3D11Texture2D *pTexture2D = nullptr;
+            err = hw_handle->CreateTexture2D(&desc, nullptr, &pTexture2D);
+            if (FAILED(err)) {
+                GAPI_LOG_WARNING(nullptr, "Cannot create texture by index: " << i <<
+                                          ", error: " << std::to_string(HRESULT_CODE(err)));
+                return MFX_ERR_MEMORY_ALLOC;
+            }
+            main_texture.reset(pTexture2D);
         }
-        main_texture.reset(pTexture2D);
+        main_textures.push_back(std::move(main_texture));
     }
 
     // create staging texture to read it from
@@ -308,7 +336,7 @@ mfxStatus VPLDX11AccelerationPolicy::on_alloc(const mfxFrameAllocRequest *reques
                                          DX11AllocationRecord::create(request->NumFrameSuggested,
                                                                       device_context,
                                                                       allocator,
-                                                                      std::move(main_texture),
+                                                                      std::move(main_textures),
                                                                       std::move(staging_textures)));
         if (!inserted_it.second) {
             GAPI_LOG_WARNING(nullptr, "Cannot assign allocation by id: " + std::to_string(request->AllocId) +
@@ -363,7 +391,7 @@ mfxStatus VPLDX11AccelerationPolicy::on_get_hdl(mfxMemId mid, mfxHDL *handle) {
     pPair->second = static_cast<mfxHDL>(reinterpret_cast<DX11AllocationItem::subresource_id_t *>(
                                         static_cast<uint64_t>(data->get_subresource())));
 
-    GAPI_LOG_DEBUG(nullptr, "texture : " << pPair->first << ", sub id: " << pPair->second);
+    GAPI_LOG_DEBUG(nullptr, "ID3D11Texture2D : " << pPair->first << ", sub id: " << pPair->second);
     return MFX_ERR_NONE;
 }
 
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp
index e053089587..893698eb36 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp
@@ -43,7 +43,7 @@ struct GAPI_EXPORTS VPLDX11AccelerationPolicy final: public VPLAccelerationPolic
     void init(session_t session) override;
     void deinit(session_t session) override;
     pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request,
-                                   mfxVideoParam& param) override;
+                                   mfxFrameInfo& info) override;
     surface_weak_ptr_t get_free_surface(pool_key_t key) override;
     size_t get_free_surface_count(pool_key_t key) const override;
     size_t get_surface_count(pool_key_t key) const override;
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp
index a9059c29ef..b1d7c25bb1 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp
@@ -54,7 +54,7 @@ struct VPLAccelerationPolicy
     // for existing workspace in existing pool (see realloc)
     // thus it is not implemented,
     // PLEASE provide initial memory area large enough
-    virtual pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) = 0;
+    virtual pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxFrameInfo& info) = 0;
 
     virtual surface_weak_ptr_t get_free_surface(pool_key_t key) = 0;
     virtual size_t get_free_surface_count(pool_key_t key) const = 0;
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp
index 3bbfb25b0a..574860e03d 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp
@@ -96,6 +96,7 @@ void LockAdapter::unlock_write(mfxMemId mid, mfxFrameData &data) {
 
 SharedLock* LockAdapter::set_adaptee(SharedLock* new_impl) {
     SharedLock* old_impl = impl;
+    GAPI_DbgAssert(old_impl == nullptr || new_impl == nullptr && "Must not be previous impl");
     impl = new_impl;
     return old_impl;
 }
@@ -355,13 +356,14 @@ DX11AllocationRecord::~DX11AllocationRecord() {
     GAPI_LOG_DEBUG(nullptr, "release final referenced texture: " << texture_ptr.get());
 }
 
-void DX11AllocationRecord::init(unsigned int items,
-                                ID3D11DeviceContext* origin_ctx,
+void DX11AllocationRecord::init(unsigned int items, ID3D11DeviceContext* origin_ctx,
                                 mfxFrameAllocator origin_allocator,
-                                ComPtrGuard<ID3D11Texture2D>&& texture,
+                                std::vector<ComPtrGuard<ID3D11Texture2D>> &&textures,
                                 std::vector<ComPtrGuard<ID3D11Texture2D>> &&staging_textures) {
+
     GAPI_DbgAssert(items != 0 && "Cannot create DX11AllocationRecord with empty items");
     GAPI_DbgAssert(items == staging_textures.size() && "Allocation items count and staging size are not equal");
+    GAPI_DbgAssert(textures.size() != 1 ? items == textures.size() : true && "Allocation items count and staging size are not equal");
     GAPI_DbgAssert(origin_ctx &&
                    "Cannot create DX11AllocationItem for empty origin_ctx");
     auto shared_allocator_copy = origin_allocator;
@@ -374,13 +376,22 @@ void DX11AllocationRecord::init(unsigned int items,
     shared_allocator_copy.pthis = nullptr;
 
 
-    GAPI_LOG_DEBUG(nullptr, "subresources count: " << items << ", text: " << texture.get());
+    GAPI_LOG_DEBUG(nullptr, "subresources count: " << items);
     resources.reserve(items);
-    // no AddRef here, because DX11AllocationRecord receive ownership it here
-    texture_ptr = createCOMSharedPtrGuard(std::move(texture));
+
+    if (textures.size() == 1) {
+        texture_ptr = createCOMSharedPtrGuard(std::move(textures[0]));
+    }
     for(unsigned int i = 0; i < items; i++) {
-        resources.emplace_back(new DX11AllocationItem(get_ptr(), origin_ctx, shared_allocator_copy,
-                                                      texture_ptr, i, std::move(staging_textures[i])));
+        if (textures.size() == 1) {
+            GAPI_LOG_DEBUG(nullptr, "subresources: [" << i <<", " << items << "], ID3D11Texture2D: " << texture_ptr.get());
+            resources.emplace_back(new DX11AllocationItem(get_ptr(), origin_ctx, shared_allocator_copy,
+                                                          texture_ptr, i, std::move(staging_textures[i])));
+        } else {
+            GAPI_LOG_DEBUG(nullptr, "subresources: [" << i <<", " << items << "], ID3D11Texture2D: " << textures[i].get());
+            resources.emplace_back(new DX11AllocationItem(get_ptr(), origin_ctx, shared_allocator_copy,
+                                                          std::move(textures[i]), 0, std::move(staging_textures[i])));
+        }
     }
 }
 
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp
index 46ddff86a4..c68a08a3f8 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp
@@ -133,10 +133,10 @@ struct DX11AllocationRecord : public std::enable_shared_from_this<DX11Allocation
     size_t size() const;
 private:
     DX11AllocationRecord();
+
     void init(unsigned int items, ID3D11DeviceContext* origin_ctx,
               mfxFrameAllocator origin_allocator,
-              ComPtrGuard<ID3D11Texture2D>&& texture, std::vector<ComPtrGuard<ID3D11Texture2D>> &&staging_textures);
-
+              std::vector<ComPtrGuard<ID3D11Texture2D>>&& textures, std::vector<ComPtrGuard<ID3D11Texture2D>> &&staging_textures);
     std::vector<AllocationId> resources;
     ComSharedPtrGuard<ID3D11Texture2D> texture_ptr;
 };
diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp
index 04cf10c8d7..6afa2cf0b6 100644
--- a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp
+++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp
@@ -48,7 +48,8 @@ VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr<Surface> surf
     Surface::data_t& data = parent_surface_ptr->get_data();
     GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() <<
                             ", w: " << info.Width << ", h: " << info.Height <<
-                            ", p: " << data.Pitch);
+                            ", p: " << data.Pitch <<
+                            ", frame id: " << reinterpret_cast<void*>(this));
     switch(info.FourCC)
     {
         case MFX_FOURCC_I420:
@@ -72,6 +73,9 @@ VPLMediaFrameDX11Adapter::~VPLMediaFrameDX11Adapter() {
     // Each VPLMediaFrameDX11Adapter releases mfx surface counter
     // The last VPLMediaFrameDX11Adapter releases shared Surface pointer
     // The last surface pointer releases workspace memory
+
+    GAPI_LOG_DEBUG(nullptr, "destroy frame id: " << reinterpret_cast<void*>(this));
+
     Surface::data_t& data = parent_surface_ptr->get_data();
     LockAdapter* alloc_data = reinterpret_cast<LockAdapter*>(data.MemId);
     alloc_data->set_adaptee(nullptr);
@@ -155,30 +159,44 @@ MediaFrame::View VPLMediaFrameDX11Adapter::access(MediaFrame::Access mode) {
 }
 
 cv::util::any VPLMediaFrameDX11Adapter::blobParams() const {
+    /*GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not fully integrated"
+                         "in OpenVINO InferenceEngine and would be temporary disable.");*/
 #ifdef HAVE_INF_ENGINE
-    GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not fully operable "
-                "in G-API streaming. Please waiting for future PRs");
-
     Surface::data_t& data = parent_surface_ptr->get_data();
+    const Surface::info_t& info = parent_surface_ptr->get_info();
     NativeHandleAdapter* native_handle_getter = reinterpret_cast<NativeHandleAdapter*>(data.MemId);
 
     mfxHDLPair handle{};
     native_handle_getter->get_handle(data.MemId, reinterpret_cast<mfxHDL&>(handle));
 
-    InferenceEngine::ParamMap params{{"SHARED_MEM_TYPE", "VA_SURFACE"},
-                                     {"DEV_OBJECT_HANDLE", handle.first},
-                                     {"COLOR_FORMAT", InferenceEngine::ColorFormat::NV12},
-                                     {"VA_PLANE",
+    GAPI_Assert(frame_desc.fmt == MediaFormat::NV12 &&
+                "blobParams() for VPLMediaFrameDX11Adapter supports NV12 only");
+
+    InferenceEngine::ParamMap y_params{{"SHARED_MEM_TYPE", "VA_SURFACE"},
+                                       {"DEV_OBJECT_HANDLE", handle.first},
+                                       {"COLOR_FORMAT", InferenceEngine::ColorFormat::NV12},
+                                       {"VA_PLANE",
                                          static_cast<DX11AllocationItem::subresource_id_t>(
                                             reinterpret_cast<uint64_t>(
                                                 reinterpret_cast<DX11AllocationItem::subresource_id_t *>(
                                                     handle.second)))}};//,
-    const Surface::info_t& info = parent_surface_ptr->get_info();
-    InferenceEngine::TensorDesc tdesc({InferenceEngine::Precision::U8,
-                                       {1, 3, static_cast<size_t>(info.Height),
-                                        static_cast<size_t>(info.Width)},
-                                       InferenceEngine::Layout::NCHW});
-    return std::make_pair(tdesc, params);
+    InferenceEngine::TensorDesc y_tdesc({InferenceEngine::Precision::U8,
+                                        {1, 1, static_cast<size_t>(info.Height),
+                                         static_cast<size_t>(info.Width)},
+                                        InferenceEngine::Layout::NHWC});
+
+    InferenceEngine::ParamMap uv_params = y_params;
+    uv_params["MEM_HANDLE"] = handle.first;
+    uv_params["VA_PLANE"] = static_cast<DX11AllocationItem::subresource_id_t>(
+                                            reinterpret_cast<uint64_t>(
+                                                reinterpret_cast<DX11AllocationItem::subresource_id_t *>(
+                                                    handle.second))) + 1;
+    InferenceEngine::TensorDesc uv_tdesc({InferenceEngine::Precision::U8,
+                                         {1, 2, static_cast<size_t>(info.Height) / 2,
+                                          static_cast<size_t>(info.Width) / 2},
+                                         InferenceEngine::Layout::NHWC});
+    return std::make_pair(std::make_pair(y_tdesc, y_params),
+                          std::make_pair(uv_tdesc, uv_params));
 #else
     GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not implemented");
 #endif // HAVE_INF_ENGINE
diff --git a/modules/gapi/src/streaming/onevpl/cfg_params.cpp b/modules/gapi/src/streaming/onevpl/cfg_params.cpp
index 599f751358..b13f9cadb1 100644
--- a/modules/gapi/src/streaming/onevpl/cfg_params.cpp
+++ b/modules/gapi/src/streaming/onevpl/cfg_params.cpp
@@ -118,6 +118,82 @@ CfgParam CfgParam::create_implementation(const char* value) {
     return CfgParam::create(CfgParam::implementation_name(), std::string(value));
 }
 
+CfgParam CfgParam::create_vpp_frames_pool_size(size_t value) {
+    // NB: cast to uint64_t because CfgParam inner variant works over
+    // uint64_t instead of size_t and mirrored VPL types variety
+    // but size_t looks more friendly for C++ high-level development
+    return CfgParam::create(CfgParam::vpp_frames_pool_size_name(),
+                            static_cast<uint64_t>(value), false);
+}
+
+CfgParam CfgParam::create_vpp_in_width(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_in_width_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_in_height(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_in_height_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_in_crop_x(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_in_crop_x_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_in_crop_y(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_in_crop_y_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_in_crop_w(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_in_crop_w_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_in_crop_h(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_in_crop_h_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_fourcc(uint32_t value) {
+    return CfgParam::create(CfgParam::vpp_out_fourcc_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_chroma_format(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_chroma_format_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_width(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_width_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_height(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_height_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_crop_x(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_crop_x_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_crop_y(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_crop_y_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_crop_w(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_crop_w_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_crop_h(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_crop_h_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_pic_struct(uint16_t value) {
+    return CfgParam::create(CfgParam::vpp_out_pic_struct_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_framerate_n(uint32_t value) {
+    return CfgParam::create(CfgParam::vpp_out_framerate_n_name(), value, false);
+}
+
+CfgParam CfgParam::create_vpp_out_framerate_d(uint32_t value) {
+    return CfgParam::create(CfgParam::vpp_out_framerate_d_name(), value, false);
+}
+
 CfgParam& CfgParam::operator=(const CfgParam& src) {
     if (this != &src) {
         m_priv = src.m_priv;
diff --git a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp
index 07c639faa2..d748825b1c 100644
--- a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp
+++ b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp
@@ -35,6 +35,12 @@ struct ParamCreator<mfxVariant> {
         return create_impl(name, value);
     }
 private:
+    mfxVariant create_impl(const std::string&, mfxU16 value) {
+        mfxVariant ret;
+        ret.Type = MFX_VARIANT_TYPE_U16;
+        ret.Data.U16 = value;
+        return ret;
+    }
     mfxVariant create_impl(const std::string&, mfxU32 value) {
         mfxVariant ret;
         ret.Type = MFX_VARIANT_TYPE_U32;
@@ -53,6 +59,10 @@ private:
         ret.Data.U64 = value;
         return ret;
     }
+    mfxVariant create_impl(const std::string&, const std::string&) {
+        GAPI_Assert(false && "Something wrong: you should not create mfxVariant "
+                             "from string directly - native type is lost in this case");
+    }
 };
 
 template<typename ValueType>
@@ -86,6 +96,76 @@ std::vector<ValueType> get_params_from_string(const std::string& str) {
             ret.push_back(creator.create<mfxU32>(name, cstr_to_mfx_version(value.c_str())));
         } else if (name == CfgParam::frames_pool_size_name()) {
             ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false));
+        } else if (name == CfgParam::vpp_frames_pool_size_name()) {
+            ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false));
+        } else if (name == CfgParam::vpp_in_width_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_in_height_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_in_crop_w_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_in_crop_h_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_in_crop_x_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_in_crop_y_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_fourcc_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint32_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_chroma_format_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_width_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_height_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_crop_w_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_crop_h_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_crop_x_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_crop_y_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_pic_struct_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint16_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_framerate_n_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint32_t>(strtoul_or_throw(value.c_str())),
+                                         false));
+        } else if (name == CfgParam::vpp_out_framerate_d_name()) {
+            ret.push_back(creator.create(name,
+                                         static_cast<uint32_t>(strtoul_or_throw(value.c_str())),
+                                         false));
         } else {
             GAPI_LOG_DEBUG(nullptr, "Cannot parse configuration param, name: " << name <<
                                     ", value: " << value);
@@ -128,6 +208,45 @@ mfxVariant cfg_param_to_mfx_variant(const CfgParam& cfg_val) {
     return ret;
 }
 
+void extract_optional_param_by_name(const std::string &name,
+                                    const std::vector<CfgParam> &in_params,
+                                    cv::util::optional<size_t> &out_param) {
+    auto it = std::find_if(in_params.begin(), in_params.end(), [&name] (const CfgParam& value) {
+        return value.get_name() == name;
+    });
+    if (it != in_params.end()) {
+        cv::util::visit(cv::util::overload_lambdas(
+            [&out_param](uint8_t value)   { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](int8_t value)    { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](uint16_t value)  { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](int16_t value)   { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](uint32_t value)  { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](int32_t value)   { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](uint64_t value)  { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](int64_t value)   { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](float_t value)   { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](double_t value)  { out_param = cv::util::make_optional(static_cast<size_t>(value));   },
+            [&out_param](void*)     { GAPI_Assert(false && "`void*` is unsupported type");  },
+            [&out_param](const std::string& value) {
+                out_param = cv::util::make_optional(strtoull_or_throw(value.c_str()));
+            }),
+            it->get_value());
+    }
+}
+
+unsigned long strtoul_or_throw(const char* str) {
+    char *end_ptr = nullptr;
+    errno = 0;
+    unsigned long ret = strtoul(str, &end_ptr, 10);
+    if ((end_ptr == str) ||
+        ((ret == ULONG_MAX || ret == LONG_MIN) && errno == ERANGE)) {
+            // nothing parsed from the string, handle errors or exit
+        GAPI_LOG_WARNING(nullptr, "strtoul failed for: " << str);
+        GAPI_Assert(false && "strtoul_or_throw");
+    }
+    return ret;
+}
+
 size_t strtoull_or_throw(const char* str) {
     char *end_ptr = nullptr;
     errno = 0;
diff --git a/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp b/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp
index c5e7685756..e01d5c412a 100644
--- a/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp
+++ b/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp
@@ -31,6 +31,11 @@ struct ParamCreator {
 
 mfxVariant cfg_param_to_mfx_variant(const CfgParam& value);
 
+void extract_optional_param_by_name(const std::string &name,
+                                    const std::vector<CfgParam>& in_params,
+                                    cv::util::optional<size_t> &out_param);
+
+unsigned long strtoul_or_throw(const char* str);
 size_t strtoull_or_throw(const char* str);
 int64_t strtoll_or_throw(const char* str);
 
diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp
index 6707a401b1..d8af94f939 100644
--- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp
+++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp
@@ -26,6 +26,31 @@ namespace gapi {
 namespace wip {
 namespace onevpl {
 
+void VPLLegacyDecodeEngine::try_modify_pool_size_request_param(const char* param_name,
+                                                               size_t new_frames_count,
+                                                               mfxFrameAllocRequest& request) {
+    if (new_frames_count < request.NumFrameMin) {
+        GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << param_name << "\": " <<
+                                  new_frames_count << ". It must be equal or greater than "
+                                  "mfxFrameAllocRequest.NumFrameMin: " << request.NumFrameMin);
+        throw std::runtime_error(std::string("Invalid value of param: ") +
+                                 param_name + ", underflow");
+    } else {
+        if (static_cast<size_t>(std::numeric_limits<mfxU16>::max()) < new_frames_count) {
+            GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << param_name << "\": " <<
+                                      new_frames_count << ". It must not be greater than " <<
+                                      std::numeric_limits<mfxU16>::max());
+            throw std::runtime_error(std::string("Invalid value of param: ") +
+                                     param_name + ", overflow");
+        }
+        request.NumFrameSuggested = static_cast<mfxU16>(new_frames_count);
+        GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overriden by user input: " <<
+                                ", mfxFrameAllocRequest.NumFrameMin: " << request.NumFrameMin <<
+                                ", mfxFrameAllocRequest.NumFrameSuggested: " << request.NumFrameSuggested <<
+                                ", mfxFrameAllocRequest.Type: " << request.Type);
+    }
+}
+
 VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptr<VPLAccelerationPolicy>&& accel)
  : ProcessingEngineBase(std::move(accel)) {
 
@@ -138,11 +163,12 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptr<VPLAccelerationPoli
     );
 }
 
-ProcessingEngineBase::session_ptr
-VPLLegacyDecodeEngine::initialize_session(mfxSession mfx_session,
-                                          const std::vector<CfgParam>& cfg_params,
-                                          std::shared_ptr<IDataProvider> provider) {
-    GAPI_DbgAssert(provider && "Cannot create decoder, data provider is nullptr");
+VPLLegacyDecodeEngine::SessionParam VPLLegacyDecodeEngine::prepare_session_param(
+                                                mfxSession mfx_session,
+                                                const std::vector<CfgParam>& cfg_params,
+                                                std::shared_ptr<IDataProvider> provider) {
+
+     GAPI_DbgAssert(provider && "Cannot create decoder, data provider is nullptr");
 
     // init session
     acceleration_policy->init(mfx_session);
@@ -206,78 +232,55 @@ VPLLegacyDecodeEngine::initialize_session(mfxSession mfx_session,
                             ", mfxFrameAllocRequest.Type: " << decRequest.Type);
 
     // NB: override NumFrameSuggested preallocation size (how many frames we can hold)
-    size_t preallocated_frames_count = decRequest.NumFrameSuggested;
-    // NB: if you see bunch of WARNING about "cannot get free surface from pool"
-    // and have abundant RAM size then increase `preallocated_frames_count`
+    // if you see bunch of WARNING about "cannot get free surface from pool"
+    // and have abundant RAM size then increase `CfgParam::frames_pool_size_name()`
     // to keep more free surfaces in a round. Otherwise VPL decode pipeline will be waiting
     // till application is freeing unusable surface on its side.
     //
-    auto queue_capacity_it = std::find_if(cfg_params.begin(), cfg_params.end(), [] (const CfgParam& value) {
-        return value.get_name() == CfgParam::frames_pool_size_name();
-    });
-    if (queue_capacity_it != cfg_params.end()) {
-        cv::util::visit(cv::util::overload_lambdas(
-            [&preallocated_frames_count](uint8_t value)   { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](int8_t value)    { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](uint16_t value)  { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](int16_t value)   { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](uint32_t value)  { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](int32_t value)   { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](uint64_t value)  { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](int64_t value)   { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](float_t value)   { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](double_t value)  { preallocated_frames_count = static_cast<size_t>(value);   },
-            [&preallocated_frames_count](void*)     { GAPI_Assert(false && "`void*` is unsupported type");  },
-            [&preallocated_frames_count](const std::string& value) {
-                preallocated_frames_count = strtoull_or_throw(value.c_str());
-            }),
-            queue_capacity_it->get_value());
-
+    cv::optional<size_t> preallocated_frames_count_cfg;
+    extract_optional_param_by_name(CfgParam::frames_pool_size_name(),
+                                   cfg_params,
+                                   preallocated_frames_count_cfg);
+    if (preallocated_frames_count_cfg.has_value()) {
         GAPI_LOG_INFO(nullptr, "Try to use CfgParam \"" << CfgParam::frames_pool_size_name() << "\": " <<
-                      preallocated_frames_count << ", for session: " << mfx_session);
+                      preallocated_frames_count_cfg.value() << ", for session: " << mfx_session);
+        try_modify_pool_size_request_param(CfgParam::frames_pool_size_name(),
+                                           preallocated_frames_count_cfg.value(),
+                                           decRequest);
 
     }
-    if (preallocated_frames_count < decRequest.NumFrameMin) {
-        GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << CfgParam::frames_pool_size_name() << "\": " <<
-                                  preallocated_frames_count << ". It must be equal or greater than "
-                                  "mfxFrameAllocRequest.NumFrameMin: " << decRequest.NumFrameMin);
-        throw std::runtime_error(std::string("Invalid value of param: ") +
-                                 CfgParam::frames_pool_size_name() + ", underflow");
-    } else {
-        if (static_cast<size_t>(std::numeric_limits<mfxU16>::max()) < preallocated_frames_count) {
-            GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << CfgParam::frames_pool_size_name() << "\": " <<
-                                  preallocated_frames_count << ". It must not be equal than " <<
-                                  std::numeric_limits<mfxU16>::max());
-            throw std::runtime_error(std::string("Invalid value of param: ") +
-                                 CfgParam::frames_pool_size_name() + ", overflow");
-        }
-        decRequest.NumFrameSuggested = static_cast<mfxU16>(preallocated_frames_count);
-        GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overriden by user input for session: " << mfx_session <<
-                            ", mfxFrameAllocRequest.NumFrameMin: " << decRequest.NumFrameMin <<
-                            ", mfxFrameAllocRequest.NumFrameSuggested: " << decRequest.NumFrameSuggested <<
-                            ", mfxFrameAllocRequest.Type: " << decRequest.Type);
-    }
 
+    decRequest.Type |= MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE | MFX_MEMTYPE_FROM_VPPIN;
     VPLAccelerationPolicy::pool_key_t decode_pool_key =
-                acceleration_policy->create_surface_pool(decRequest, mfxDecParams);
+                acceleration_policy->create_surface_pool(decRequest, mfxDecParams.mfx.FrameInfo);
 
     // Input parameters finished, now initialize decode
     // create decoder for session accoring to header recovered from source file
+
     sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams);
     if (MFX_ERR_NONE != sts) {
         throw std::runtime_error("Error initializing Decode, error: " +
                                  mfxstatus_to_string(sts));
     }
 
-    DecoderParams decoder_param {bitstream, mfxDecParams};
+    return {decode_pool_key, {bitstream, mfxDecParams, preallocated_frames_count_cfg}};
+}
+
+
+ProcessingEngineBase::session_ptr
+VPLLegacyDecodeEngine::initialize_session(mfxSession mfx_session,
+                                          const std::vector<CfgParam>& cfg_params,
+                                          std::shared_ptr<IDataProvider> provider) {
+
+    SessionParam param = prepare_session_param(mfx_session, cfg_params, provider);
 
     // create session
     std::shared_ptr<LegacyDecodeSession> sess_ptr =
                 register_session<LegacyDecodeSession>(mfx_session,
-                                                      std::move(decoder_param),
+                                                      std::move(param.decoder_params),
                                                       provider);
 
-    sess_ptr->init_surface_pool(decode_pool_key);
+    sess_ptr->init_surface_pool(param.decode_pool_key);
     // prepare working decode surface
     sess_ptr->swap_surface(*this);
     return sess_ptr;
diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp
index f6a02db3db..1b7bee6a82 100644
--- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp
+++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp
@@ -24,20 +24,31 @@ struct DecoderParams;
 struct IDataProvider;
 struct VPLAccelerationPolicy;
 
-class VPLLegacyDecodeEngine : public ProcessingEngineBase {
+class GAPI_EXPORTS VPLLegacyDecodeEngine : public ProcessingEngineBase {
 public:
 
     VPLLegacyDecodeEngine(std::unique_ptr<VPLAccelerationPolicy>&& accel);
-    session_ptr initialize_session(mfxSession mfx_session,
-                                   const std::vector<CfgParam>& cfg_params,
-                                   std::shared_ptr<IDataProvider> provider) override;
+    virtual session_ptr initialize_session(mfxSession mfx_session,
+                                           const std::vector<CfgParam>& cfg_params,
+                                           std::shared_ptr<IDataProvider> provider) override;
+protected:
+    struct SessionParam {
+        void* decode_pool_key;
+        DecoderParams decoder_params;
+    };
+
+    SessionParam prepare_session_param(mfxSession mfx_session,
+                                       const std::vector<CfgParam>& cfg_params,
+                                       std::shared_ptr<IDataProvider> provider);
 
-private:
     ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override;
     ExecutionStatus process_error(mfxStatus status, LegacyDecodeSession& sess);
 
     void on_frame_ready(LegacyDecodeSession& sess,
                         mfxFrameSurface1* ready_surface);
+    static void try_modify_pool_size_request_param(const char* param_name,
+                                                   size_t new_frames_count,
+                                                   mfxFrameAllocRequest& request);
 };
 } // namespace onevpl
 } // namespace wip
diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp
index bbb1378767..56e51ffd9f 100644
--- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp
+++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp
@@ -11,7 +11,6 @@
 
 #include "streaming/onevpl/engine/decode/decode_session.hpp"
 #include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp"
-#include "streaming/onevpl/accelerators/accel_policy_interface.hpp"
 #include "streaming/onevpl/accelerators/surface/surface.hpp"
 #include "streaming/onevpl/utils.hpp"
 
@@ -75,8 +74,8 @@ Data::Meta LegacyDecodeSession::generate_frame_meta() {
     return meta;
 }
 
-const mfxVideoParam& LegacyDecodeSession::get_video_param() const {
-    return mfx_decoder_param;
+const mfxFrameInfo& LegacyDecodeSession::get_video_param() const {
+    return mfx_decoder_param.mfx.FrameInfo;
 }
 } // namespace onevpl
 } // namespace wip
diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp
index 476a575172..356f9851cd 100644
--- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp
+++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp
@@ -26,9 +26,10 @@ struct IDataProvider;
 class Surface;
 struct VPLAccelerationPolicy;
 
-class LegacyDecodeSession : public EngineSession {
+class GAPI_EXPORTS LegacyDecodeSession : public EngineSession {
 public:
     friend class VPLLegacyDecodeEngine;
+    friend class VPLLegacyTranscodeEngine; //TODO: remove friend add method
 
     LegacyDecodeSession(mfxSession sess, DecoderParams&& decoder_param, std::shared_ptr<IDataProvider> provider);
     ~LegacyDecodeSession();
@@ -38,15 +39,15 @@ public:
     void init_surface_pool(VPLAccelerationPolicy::pool_key_t key);
 
     Data::Meta generate_frame_meta();
-    const mfxVideoParam& get_video_param() const override;
+    virtual const mfxFrameInfo& get_video_param() const override;
 private:
     mfxVideoParam mfx_decoder_param;
     std::shared_ptr<IDataProvider> data_provider;
     VPLAccelerationPolicy::pool_key_t decoder_pool_id;
     mfxFrameAllocRequest request;
 
+protected:
     std::weak_ptr<Surface> procesing_surface_ptr;
-
     using op_handle_t = std::pair<mfxSyncPoint, mfxFrameSurface1*>;
     std::queue<op_handle_t> sync_queue;
 
diff --git a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp
index 67018d0fd7..8a1f4383eb 100644
--- a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp
+++ b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp
@@ -29,6 +29,11 @@ namespace onevpl {
 struct GAPI_EXPORTS DecoderParams {
     std::shared_ptr<IDataProvider::mfx_bitstream> stream;
     mfxVideoParam param;
+    cv::optional<size_t> preallocated_frames_count;
+};
+
+struct GAPI_EXPORTS TranscoderParams {
+    mfxVideoParam param;
 };
 
 struct GAPI_EXPORTS EngineSession {
@@ -41,7 +46,7 @@ struct GAPI_EXPORTS EngineSession {
     std::string error_code_to_str() const;
     virtual ~EngineSession();
 
-    virtual const mfxVideoParam& get_video_param() const = 0;
+    virtual const mfxFrameInfo& get_video_param() const = 0;
 };
 } // namespace onevpl
 } // namespace wip
diff --git a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp
index 72f2f62fc4..35cd664219 100644
--- a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp
+++ b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp
@@ -36,7 +36,7 @@ ProcessingEngineBase::ExecutionStatus ProcessingEngineBase::process(mfxSession s
     session_ptr processing_session = sess_it->second;
     ExecutionData& exec_data = execution_table[session];
 
-    GAPI_LOG_DEBUG(nullptr, "[" << session <<"] start op id: " << exec_data.op_id);
+    GAPI_LOG_DEBUG(nullptr, "[" << session << "] start op id: " << exec_data.op_id);
     ExecutionStatus status = execute_op(pipeline.at(exec_data.op_id), *processing_session);
     size_t old_op_id = exec_data.op_id++;
     if (exec_data.op_id == pipeline.size())
@@ -44,10 +44,10 @@ ProcessingEngineBase::ExecutionStatus ProcessingEngineBase::process(mfxSession s
         exec_data.op_id = 0;
     }
     cv::util::suppress_unused_warning(old_op_id);
-    GAPI_LOG_DEBUG(nullptr, "[" << session <<"] finish op id: " << old_op_id <<
-                                    ", " << processing_session->error_code_to_str() <<
-                                    ", " << ProcessingEngineBase::status_to_string(status) <<
-                                    ", next op id: " << exec_data.op_id);
+    GAPI_LOG_DEBUG(nullptr, "[" << session << "] finish op id: " << old_op_id <<
+                            ", " << processing_session->error_code_to_str() <<
+                            ", " << ProcessingEngineBase::status_to_string(status) <<
+                            ", next op id: " << exec_data.op_id);
 
     if (status == ExecutionStatus::Failed) {
 
@@ -81,7 +81,7 @@ const char* ProcessingEngineBase::status_to_string(ExecutionStatus status)
 
 ProcessingEngineBase::ExecutionStatus ProcessingEngineBase::execute_op(operation_t& op, EngineSession& sess)
 {
-     return op(sess);
+    return op(sess);
 }
 
 size_t ProcessingEngineBase::get_ready_frames_count() const
diff --git a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp
index 059ef963de..cacc8bd748 100644
--- a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp
+++ b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp
@@ -67,16 +67,25 @@ protected:
 
     std::vector<operation_t> pipeline;
     std::unique_ptr<VPLAccelerationPolicy> acceleration_policy;
-
+public:
     virtual ExecutionStatus execute_op(operation_t& op, EngineSession& sess);
 
     template<class ...Ops>
     void create_pipeline(Ops&&...ops)
     {
-        GAPI_DbgAssert(pipeline.empty() && "Pipeline must be empty");
         std::vector<operation_t>({std::forward<Ops>(ops)...}).swap(pipeline);
     }
 
+    template<class ...Ops>
+    void inject_pipeline_operations(size_t in_position, Ops&&...ops)
+    {
+        GAPI_Assert(pipeline.size() >= in_position &&
+                    "Invalid position to inject pipeline operation");
+        auto it = pipeline.begin();
+        std::advance(it, in_position);
+        pipeline.insert(it, {std::forward<Ops>(ops)...});
+    }
+
     template<class SpecificSession, class ...SessionArgs>
     std::shared_ptr<SpecificSession> register_session(mfxSession key,
                                                       SessionArgs&& ...args)
diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp
new file mode 100644
index 0000000000..36d36d5ec0
--- /dev/null
+++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp
@@ -0,0 +1,477 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#ifdef HAVE_ONEVPL
+
+#include <algorithm>
+#include <exception>
+
+#include <opencv2/gapi/streaming/onevpl/data_provider_interface.hpp>
+#include "streaming/onevpl/data_provider_defines.hpp"
+
+#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp"
+#include "streaming/onevpl/engine/transcode/transcode_session.hpp"
+#include "streaming/onevpl/accelerators/accel_policy_interface.hpp"
+#include "streaming/onevpl/accelerators/surface/surface.hpp"
+#include "streaming/onevpl/cfg_params_parser.hpp"
+#include "streaming/onevpl/utils.hpp"
+#include "logger.hpp"
+
+#define ALIGN16(value)           (((value + 15) >> 4) << 4)
+
+namespace cv {
+namespace gapi {
+namespace wip {
+namespace onevpl {
+
+template<typename Type>
+bool set_vpp_param(const char* name, Type& out_vpp_param,
+                   const std::map<std::string, mfxVariant> &params_storage,
+                   mfxSession session);
+
+template<>
+bool set_vpp_param<uint32_t>(const char* name, uint32_t& out_vpp_param,
+                             const std::map<std::string, mfxVariant> &params_storage,
+                             mfxSession session) {
+    auto it = params_storage.find(name);
+    if (it != params_storage.end()) {
+        auto value = it->second.Data.U32;
+        GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name <<
+                               "\": " << value);
+        out_vpp_param = value;
+        return true;
+    }
+    return false;
+}
+
+template<>
+bool set_vpp_param<uint16_t>(const char* name, uint16_t& out_vpp_param,
+                             const std::map<std::string, mfxVariant> &params_storage,
+                             mfxSession session) {
+    auto it = params_storage.find(name);
+    if (it != params_storage.end()) {
+        auto value = it->second.Data.U16;
+        GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name <<
+                               "\": " << value);
+        out_vpp_param = value;
+        return true;
+    }
+    return false;
+}
+
+std::map<std::string, mfxVariant>
+    VPLLegacyTranscodeEngine::get_vpp_params(const std::vector<CfgParam> &cfg_params) {
+    std::map<std::string, mfxVariant> ret;
+    static const char* vpp_param_prefix {"vpp."};
+    for (const auto &param : cfg_params) {
+        const char *param_name_cptr = param.get_name().c_str();
+        if (strstr(param_name_cptr, vpp_param_prefix) == param_name_cptr) {
+            ret.emplace(param.get_name(), cfg_param_to_mfx_variant(param));
+        }
+    }
+    GAPI_LOG_INFO(nullptr, "Detected VPP params count: [" << ret.size() <<
+                            "/" << cfg_params.size() << "]");
+    return ret;
+}
+
+VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptr<VPLAccelerationPolicy>&& accel)
+ : VPLLegacyDecodeEngine(std::move(accel)) {
+
+    GAPI_LOG_INFO(nullptr, "Create Legacy Transcode Engine");
+    //inject_pipeline_operations(2,
+    create_pipeline(
+        // 1) Read File
+        [this] (EngineSession& sess) -> ExecutionStatus
+        {
+            LegacyTranscodeSession &my_sess = static_cast<LegacyTranscodeSession&>(sess);
+            if (!my_sess.data_provider) {
+                my_sess.last_status = MFX_ERR_MORE_DATA;
+                return ExecutionStatus::Continue;
+            }
+
+            my_sess.last_status = MFX_ERR_NONE;
+            if (!my_sess.data_provider->fetch_bitstream_data(my_sess.stream)) {
+                my_sess.last_status = MFX_ERR_MORE_DATA;
+                my_sess.data_provider.reset(); //close source
+            }
+            return ExecutionStatus::Continue;
+        },
+        // 2) enqueue ASYNC decode operation
+        [this] (EngineSession& sess) -> ExecutionStatus
+        {
+            LegacyTranscodeSession &my_sess = static_cast<LegacyTranscodeSession&>(sess);
+
+            // prepare sync object for new surface
+            LegacyTranscodeSession::op_handle_t sync_pair{};
+
+            // enqueue decode operation with current session surface
+            my_sess.last_status =
+                    MFXVideoDECODE_DecodeFrameAsync(my_sess.session,
+                                                    (my_sess.data_provider || (my_sess.stream && my_sess.stream->DataLength))
+                                                        ? my_sess.stream.get()
+
+                                                        : nullptr, /* No more data to read, start decode draining mode*/
+                                                    my_sess.procesing_surface_ptr.lock()->get_handle(),
+                                                    &sync_pair.second,
+                                                    &sync_pair.first);
+
+            GAPI_LOG_DEBUG(nullptr, "START decode: " <<
+                                    ", sync id:  " <<
+                                    sync_pair.first <<
+                                    ", dec in surface:  " <<
+                                    my_sess.procesing_surface_ptr.lock()->get_handle() <<
+                                    ", dec out surface: " << sync_pair.second <<
+                                    ", status: " <<
+                                    mfxstatus_to_string(my_sess.last_status));
+
+            // process wait-like statuses in-place:
+            // It had better to use up all VPL decoding resources in pipeline
+            // as soon as possible. So waiting more free-surface or device free
+            while (my_sess.last_status == MFX_ERR_MORE_SURFACE ||
+                   my_sess.last_status == MFX_WRN_DEVICE_BUSY) {
+                try {
+                    if (my_sess.last_status == MFX_ERR_MORE_SURFACE) {
+                        my_sess.swap_surface(*this);
+                    }
+                    my_sess.last_status =
+                    MFXVideoDECODE_DecodeFrameAsync(my_sess.session,
+                                                    my_sess.stream.get(),
+                                                    my_sess.procesing_surface_ptr.lock()->get_handle(),
+                                                    &sync_pair.second,
+                                                    &sync_pair.first);
+
+                } catch (const std::runtime_error& ex) {
+                    // NB: not an error, yield CPU ticks to check
+                    // surface availability at a next phase.
+                    // But print WARNING to notify user about pipeline stuck
+                    GAPI_LOG_WARNING(nullptr, "[" << my_sess.session <<
+                                               "] has no surface, reason: " <<
+                                               ex.what());
+                    break;
+                }
+            }
+
+            if (my_sess.last_status == MFX_ERR_NONE) {
+                my_sess.sync_queue.emplace(sync_pair);
+            } else if (my_sess.last_status != MFX_ERR_MORE_DATA) /* suppress MFX_ERR_MORE_DATA warning */ {
+                GAPI_LOG_WARNING(nullptr, "decode pending ops count: " <<
+                                          my_sess.sync_queue.size() <<
+                                          ", sync id: " << sync_pair.first <<
+                                          ", status: " <<
+                                          mfxstatus_to_string(my_sess.last_status));
+            }
+            return ExecutionStatus::Continue;
+        },
+        // 3) transcode
+        [this] (EngineSession& sess) -> ExecutionStatus
+        {
+            LegacyTranscodeSession &my_sess = static_cast<LegacyTranscodeSession&>(sess);
+
+            LegacyDecodeSession::op_handle_t last_op {};
+            while (!my_sess.sync_queue.empty()) {
+                do {
+                    if (!my_sess.vpp_surface_ptr.expired()) {
+                        LegacyDecodeSession::op_handle_t pending_op = my_sess.sync_queue.front();
+                        GAPI_LOG_DEBUG(nullptr, "pending DEC ops count: " <<
+                                                my_sess.sync_queue.size() <<
+                                                ", sync id:  " <<
+                                                pending_op.first <<
+                                                ", surface:  " <<
+                                                pending_op.second <<
+                                                ", status: " <<
+                                                mfxstatus_to_string(my_sess.last_status));
+
+                        my_sess.sync_queue.pop();
+                        auto *dec_surface = pending_op.second;
+                        auto *vpp_suface = my_sess.vpp_surface_ptr.lock()->get_handle();
+                        my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(my_sess.session,
+                                                                           dec_surface,
+                                                                           vpp_suface,
+                                                                           nullptr, &pending_op.first);
+                        pending_op.second = vpp_suface;
+
+                        GAPI_LOG_DEBUG(nullptr, "START transcode ops count: " <<
+                                                my_sess.vpp_queue.size() <<
+                                                ", sync id:  " <<
+                                                pending_op.first <<
+                                                ", dec surface:  " <<
+                                                dec_surface <<
+                                                ", trans surface: " << pending_op.second <<
+                                                ", status: " <<
+                                                mfxstatus_to_string(my_sess.last_status));
+
+                        if (my_sess.last_status == MFX_ERR_MORE_SURFACE ||
+                            my_sess.last_status == MFX_ERR_NONE) {
+                            pending_op.second->Data.Locked++; // TODO -S- workaround
+                            my_sess.vpp_queue.emplace(pending_op);
+                        }
+                    }
+
+                    try {
+                        my_sess.swap_transcode_surface(*this);
+                    } catch (const std::runtime_error& ex) {
+                        // NB: not an error, yield CPU ticks to check
+                        // surface availability at a next phase.
+                        // But print WARNING to notify user about pipeline stuck
+                        GAPI_LOG_WARNING(nullptr, "[" << my_sess.session <<
+                                                    "] has no VPP surface, reason: " <<
+                                                  ex.what());
+                        my_sess.vpp_surface_ptr.reset();
+                        break;
+                    }
+                } while(my_sess.last_status == MFX_ERR_MORE_SURFACE);
+
+                if (my_sess.vpp_surface_ptr.expired()) {
+                    // TODO break main loop
+                    break;
+                }
+            }
+            return ExecutionStatus::Continue;
+        },
+        // 4) Wait for ASYNC decode result
+        [this] (EngineSession& sess) -> ExecutionStatus
+        {
+            LegacyTranscodeSession& my_sess = static_cast<LegacyTranscodeSession&>(sess);
+            do {
+                if (!my_sess.vpp_queue.empty()) { // FIFO: check the oldest async operation complete
+                    LegacyDecodeSession::op_handle_t& pending_op = my_sess.vpp_queue.front();
+                    sess.last_status = MFXVideoCORE_SyncOperation(sess.session, pending_op.first, 0);
+
+                    GAPI_LOG_DEBUG(nullptr, "pending VPP ops count: " <<
+                                            my_sess.vpp_queue.size() <<
+                                            ", sync id:  " <<
+                                            pending_op.first <<
+                                            ", surface:  " <<
+                                            pending_op.second <<
+                                            ", status: " <<
+                                            mfxstatus_to_string(my_sess.last_status));
+
+                    // put frames in ready queue on success
+                    if (MFX_ERR_NONE == sess.last_status) {
+                        on_frame_ready(my_sess, pending_op.second);
+                    }
+                }
+            } while (MFX_ERR_NONE == sess.last_status && !my_sess.vpp_queue.empty());
+            return ExecutionStatus::Continue;
+        },
+        // 5) Falls back on generic status procesing
+        [this] (EngineSession& sess) -> ExecutionStatus
+        {
+            return this->process_error(sess.last_status, static_cast<LegacyDecodeSession&>(sess));
+        }
+    );
+}
+
+ProcessingEngineBase::session_ptr
+VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session,
+                                             const std::vector<CfgParam>& cfg_params,
+                                             std::shared_ptr<IDataProvider> provider) {
+    // NB: obtain decoder params
+    VPLLegacyDecodeEngine::SessionParam decode_params =
+                        prepare_session_param(mfx_session, cfg_params, provider);
+
+
+    // NB: create transcode params
+    const auto& mfxDecParams = decode_params.decoder_params.param;
+
+    // NB: create transcode params: Out = In by default, In = initially decoded
+    mfxVideoParam mfxVPPParams{0};
+    mfxVPPParams.vpp.In = mfxDecParams.mfx.FrameInfo;
+    mfxVPPParams.vpp.Out = mfxVPPParams.vpp.In;
+
+    std::map<std::string, mfxVariant> cfg_vpp_params =
+                        VPLLegacyTranscodeEngine::get_vpp_params(cfg_params);
+
+    // override some in-params
+    if (set_vpp_param(CfgParam::vpp_in_width_name(), mfxVPPParams.vpp.In.Width,
+                  cfg_vpp_params, mfx_session)) {
+        mfxVPPParams.vpp.In.Width = ALIGN16(mfxVPPParams.vpp.In.Width);
+    }
+    if (set_vpp_param(CfgParam::vpp_in_height_name(), mfxVPPParams.vpp.In.Height,
+                  cfg_vpp_params, mfx_session)) {
+        mfxVPPParams.vpp.In.Height = ALIGN16(mfxVPPParams.vpp.In.Height);
+    }
+    set_vpp_param(CfgParam::vpp_in_crop_x_name(), mfxVPPParams.vpp.In.CropX,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_in_crop_y_name(), mfxVPPParams.vpp.In.CropY,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_in_crop_w_name(), mfxVPPParams.vpp.In.CropW,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_in_crop_h_name(), mfxVPPParams.vpp.In.CropH,
+                  cfg_vpp_params, mfx_session);
+
+    // override out params
+    set_vpp_param(CfgParam::vpp_out_fourcc_name(), mfxVPPParams.vpp.Out.FourCC,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_out_chroma_format_name(), mfxVPPParams.vpp.Out.ChromaFormat,
+                  cfg_vpp_params, mfx_session);
+    if (set_vpp_param(CfgParam::vpp_out_width_name(), mfxVPPParams.vpp.Out.Width,
+                  cfg_vpp_params, mfx_session)) {
+        mfxVPPParams.vpp.Out.Width = ALIGN16(mfxVPPParams.vpp.Out.Width);
+    }
+    if (set_vpp_param(CfgParam::vpp_out_height_name(), mfxVPPParams.vpp.Out.Height,
+                  cfg_vpp_params, mfx_session)) {
+        mfxVPPParams.vpp.Out.Height = ALIGN16(mfxVPPParams.vpp.Out.Height);
+    }
+    set_vpp_param(CfgParam::vpp_out_crop_x_name(), mfxVPPParams.vpp.Out.CropX,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_out_crop_y_name(), mfxVPPParams.vpp.Out.CropY,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_out_crop_w_name(), mfxVPPParams.vpp.Out.CropW,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_out_crop_h_name(), mfxVPPParams.vpp.Out.CropH,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_out_pic_struct_name(), mfxVPPParams.vpp.Out.PicStruct,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_out_framerate_n_name(), mfxVPPParams.vpp.Out.FrameRateExtN,
+                  cfg_vpp_params, mfx_session);
+    set_vpp_param(CfgParam::vpp_out_framerate_d_name(), mfxVPPParams.vpp.Out.FrameRateExtD,
+                  cfg_vpp_params, mfx_session);
+
+    VPLLegacyTranscodeEngine::validate_vpp_param(mfxVPPParams);
+
+    if (mfxDecParams.IOPattern == MFX_IOPATTERN_OUT_VIDEO_MEMORY) {
+        mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+    } else {
+        mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
+    }
+    GAPI_LOG_INFO(nullptr, "Starting VPP initialization");
+
+    mfxFrameAllocRequest vppRequests[2];
+    memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2);
+    mfxStatus sts = MFXVideoVPP_QueryIOSurf(mfx_session, &mfxVPPParams, vppRequests);
+    if (MFX_ERR_NONE != sts) {
+        GAPI_LOG_WARNING(nullptr, "cannot execute MFXVideoVPP_QueryIOSurf");
+        throw std::runtime_error("Cannot execute MFXVideoVPP_QueryIOSurf, error: " +
+                                  mfxstatus_to_string(sts));
+    }
+
+    // NB: override NumFrameSuggested preallocation size (how many frames we can hold)
+    // if you see bunch of WARNING about "cannot get free surface from pool"
+    // and have abundant RAM size then increase `CfgParam::vpp_frames_pool_size_name()`
+    // to keep more free surfaces in a round. Otherwise VPL decode pipeline will be waiting
+    // till application is freeing unusable surface on its side.
+     cv::optional<size_t> preallocated_frames_count_cfg;
+    extract_optional_param_by_name(CfgParam::vpp_frames_pool_size_name(),
+                                   cfg_params,
+                                   preallocated_frames_count_cfg);
+    if (preallocated_frames_count_cfg.has_value()) {
+        GAPI_LOG_INFO(nullptr, "Try to use CfgParam \"" << CfgParam::vpp_frames_pool_size_name() << "\": " <<
+                      preallocated_frames_count_cfg.value() << ", for session: " << mfx_session);
+        try_modify_pool_size_request_param(CfgParam::vpp_frames_pool_size_name(),
+                                           preallocated_frames_count_cfg.value(),
+                                           vppRequests[1]);
+
+    }
+
+    // NB: Assing ID as upper limit descendant to distinguish specific VPP allocation
+    // from decode allocations witch started from 0: by local module convention
+    vppRequests[1].AllocId = std::numeric_limits<uint16_t>::max();
+
+    vppRequests[1].Type |= MFX_MEMTYPE_FROM_VPPIN;
+    VPLAccelerationPolicy::pool_key_t vpp_out_pool_key =
+                acceleration_policy->create_surface_pool(vppRequests[1], mfxVPPParams.vpp.Out);
+
+    sts = MFXVideoVPP_Init(mfx_session, &mfxVPPParams);
+    if (MFX_ERR_NONE != sts) {
+        GAPI_LOG_WARNING(nullptr, "cannot Init VPP");
+        throw std::runtime_error("Cannot init VPP, error: " +
+                                  mfxstatus_to_string(sts));
+    }
+
+    // create engine session
+    TranscoderParams transcoder_param {mfxVPPParams};
+    std::shared_ptr<LegacyTranscodeSession> sess_ptr =
+                register_session<LegacyTranscodeSession>(mfx_session,
+                                                         std::move(decode_params.decoder_params),
+                                                         std::move(transcoder_param),
+                                                         provider);
+
+    sess_ptr->init_surface_pool(decode_params.decode_pool_key);
+    sess_ptr->init_transcode_surface_pool(vpp_out_pool_key);
+
+    // prepare working surfaces
+    sess_ptr->swap_surface(*this);
+    sess_ptr->swap_transcode_surface(*this);
+    return sess_ptr;
+}
+
+void VPLLegacyTranscodeEngine::validate_vpp_param(const mfxVideoParam& mfxVPPParams) {
+    GAPI_LOG_INFO(nullptr, "Starting VPP param validation");
+    if (mfxVPPParams.vpp.In.Width < mfxVPPParams.vpp.In.CropW + mfxVPPParams.vpp.In.CropX) {
+        GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" <<
+                                  CfgParam::vpp_in_crop_w_name() <<
+                                  "\": " << mfxVPPParams.vpp.In.CropW << " and \"" <<
+                                  CfgParam::vpp_in_crop_x_name() <<
+                                  "\": " << mfxVPPParams.vpp.In.CropX <<
+                                  " must be less or equal to \"" <<
+                                  CfgParam::vpp_in_width_name() << "\": " <<
+                                  mfxVPPParams.vpp.In.Width);
+        GAPI_Assert(false && "Invalid VPP params combination: Width & Crop");
+    }
+
+    if (mfxVPPParams.vpp.In.Height < mfxVPPParams.vpp.In.CropH + mfxVPPParams.vpp.In.CropY) {
+        GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" <<
+                                  CfgParam::vpp_in_crop_h_name() <<
+                                  "\": " << mfxVPPParams.vpp.In.CropH << " and \"" <<
+                                  CfgParam::vpp_in_crop_y_name() <<
+                                  "\": " << mfxVPPParams.vpp.In.CropY <<
+                                  " must be less or equal to \"" <<
+                                  CfgParam::vpp_in_height_name() << "\": " <<
+                                  mfxVPPParams.vpp.In.Height);
+        GAPI_Assert(false && "Invalid VPP params combination: Height & Crop");
+    }
+
+    if (mfxVPPParams.vpp.Out.Width < mfxVPPParams.vpp.Out.CropW + mfxVPPParams.vpp.Out.CropX) {
+        GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" <<
+                                  CfgParam::vpp_out_crop_w_name() <<
+                                  "\": " << mfxVPPParams.vpp.Out.CropW << " and \"" <<
+                                  CfgParam::vpp_out_crop_x_name() <<
+                                  "\": " << mfxVPPParams.vpp.Out.CropX <<
+                                  " must be less or equal to \"" <<
+                                  CfgParam::vpp_out_width_name() << "\": " <<
+                                  mfxVPPParams.vpp.Out.Width);
+        GAPI_Assert(false && "Invalid VPP params combination: Width & Crop");
+    }
+
+    if (mfxVPPParams.vpp.Out.Height < mfxVPPParams.vpp.Out.CropH + mfxVPPParams.vpp.Out.CropY) {
+        GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" <<
+                                  CfgParam::vpp_out_crop_h_name() <<
+                                  "\": " << mfxVPPParams.vpp.Out.CropH << " and \"" <<
+                                  CfgParam::vpp_out_crop_y_name() <<
+                                  "\": " << mfxVPPParams.vpp.Out.CropY <<
+                                  " must be less or equal to \"" <<
+                                  CfgParam::vpp_out_height_name() << "\": " <<
+                                  mfxVPPParams.vpp.Out.Height);
+        GAPI_Assert(false && "Invalid VPP params combination: Height & Crop");
+    }
+
+    GAPI_LOG_INFO(nullptr, "Finished VPP param validation");
+}
+
+ProcessingEngineBase::ExecutionStatus VPLLegacyTranscodeEngine::execute_op(operation_t& op, EngineSession& sess) {
+    return op(sess);
+}
+
+void VPLLegacyTranscodeEngine::on_frame_ready(LegacyTranscodeSession& sess,
+                                              mfxFrameSurface1* ready_surface)
+{
+    GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "], frame ready");
+
+    // manage memory ownership rely on acceleration policy
+    ready_surface->Data.Locked--;  // TODO -S- workaround
+    auto frame_adapter = acceleration_policy->create_frame_adapter(sess.vpp_out_pool_id,
+                                                                   ready_surface);
+    ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta());
+
+    // pop away synced out object
+    sess.vpp_queue.pop();
+}
+} // namespace onevpl
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+#endif // HAVE_ONEVPL
diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp
new file mode 100644
index 0000000000..cf0621dd93
--- /dev/null
+++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp
@@ -0,0 +1,47 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#ifndef GAPI_STREAMING_ONVPL_TRANSCODE_ENGINE_LEGACY_HPP
+#define GAPI_STREAMING_ONVPL_TRANSCODE_ENGINE_LEGACY_HPP
+#include <stdio.h>
+#include <memory>
+
+#include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp"
+
+#ifdef HAVE_ONEVPL
+#include "streaming/onevpl/onevpl_export.hpp"
+
+namespace cv {
+namespace gapi {
+namespace wip {
+namespace onevpl {
+
+class LegacyTranscodeSession;
+struct IDataProvider;
+struct VPLAccelerationPolicy;
+
+class GAPI_EXPORTS VPLLegacyTranscodeEngine : public VPLLegacyDecodeEngine {
+public:
+
+    VPLLegacyTranscodeEngine(std::unique_ptr<VPLAccelerationPolicy>&& accel);
+    session_ptr initialize_session(mfxSession mfx_session,
+                                   const std::vector<CfgParam>& cfg_params,
+                                   std::shared_ptr<IDataProvider> provider) override;
+
+    static std::map<std::string, mfxVariant> get_vpp_params(const std::vector<CfgParam> &cfg_params);
+private:
+    ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override;
+
+    void on_frame_ready(LegacyTranscodeSession& sess,
+                        mfxFrameSurface1* ready_surface);
+    void validate_vpp_param(const mfxVideoParam& mfxVPPParams);
+};
+} // namespace onevpl
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+#endif // HAVE_ONEVPL
+#endif // GAPI_STREAMING_ONVPL_DECODE_ENGINE_LEGACY_HPP
diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp
new file mode 100644
index 0000000000..9fcabc7e10
--- /dev/null
+++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp
@@ -0,0 +1,70 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#ifdef HAVE_ONEVPL
+
+#include <chrono>
+#include <exception>
+
+#include "streaming/onevpl/engine/transcode/transcode_session.hpp"
+#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp"
+#include "streaming/onevpl/accelerators/surface/surface.hpp"
+#include "streaming/onevpl/utils.hpp"
+
+#include "logger.hpp"
+namespace cv {
+namespace gapi {
+namespace wip {
+namespace onevpl {
+LegacyTranscodeSession::LegacyTranscodeSession(mfxSession sess,
+                                               DecoderParams&& decoder_param,
+                                               TranscoderParams&& transcoder_param,
+                                               std::shared_ptr<IDataProvider> provider) :
+    LegacyDecodeSession(sess, std::move(decoder_param), std::move(provider)),
+    mfx_transcoder_param(std::move(transcoder_param.param))
+{
+}
+
+LegacyTranscodeSession::~LegacyTranscodeSession()
+{
+    GAPI_LOG_INFO(nullptr, "Close Transcode for session: " << session);
+    MFXVideoVPP_Close(session);
+}
+
+void LegacyTranscodeSession::init_transcode_surface_pool(VPLAccelerationPolicy::pool_key_t key) {
+    GAPI_Assert(key && "Init transcode pull with empty key");
+    vpp_out_pool_id = key;
+}
+
+void LegacyTranscodeSession::swap_transcode_surface(VPLLegacyTranscodeEngine& engine) {
+    VPLAccelerationPolicy* acceleration_policy = engine.get_accel();
+    GAPI_Assert(acceleration_policy && "Empty acceleration_policy");
+    try {
+        auto cand = acceleration_policy->get_free_surface(vpp_out_pool_id).lock();
+
+        GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface"
+                                ", old: " << (!vpp_surface_ptr.expired()
+                                              ? vpp_surface_ptr.lock()->get_handle()
+                                              : nullptr) <<
+                                ", new: "<< cand->get_handle());
+
+        vpp_surface_ptr = cand;
+    } catch (const std::runtime_error& ex) {
+        GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what());
+
+        // Delegate exception processing on caller
+        throw;
+    }
+}
+
+const mfxFrameInfo& LegacyTranscodeSession::get_video_param() const {
+    return mfx_transcoder_param.vpp.Out;
+}
+} // namespace onevpl
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+#endif // HAVE_ONEVPL
diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp
new file mode 100644
index 0000000000..aa6f70c587
--- /dev/null
+++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp
@@ -0,0 +1,46 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#ifndef GAPI_STREAMING_ONVPL_ENGINE_TRANSCODE_SESSION_HPP
+#define GAPI_STREAMING_ONVPL_ENGINE_TRANSCODE_SESSION_HPP
+
+#ifdef HAVE_ONEVPL
+#include "streaming/onevpl/engine/decode/decode_session.hpp"
+
+namespace cv {
+namespace gapi {
+namespace wip {
+namespace onevpl {
+
+struct IDataProvider;
+class Surface;
+struct VPLAccelerationPolicy;
+
+class GAPI_EXPORTS LegacyTranscodeSession : public LegacyDecodeSession {
+public:
+    friend class VPLLegacyTranscodeEngine;
+
+    LegacyTranscodeSession(mfxSession sess, DecoderParams&& decoder_param,
+                           TranscoderParams&& transcoder_param,
+                           std::shared_ptr<IDataProvider> provider);
+    ~LegacyTranscodeSession();
+
+    void init_transcode_surface_pool(VPLAccelerationPolicy::pool_key_t key);
+    void swap_transcode_surface(VPLLegacyTranscodeEngine& engine);
+    const mfxFrameInfo& get_video_param() const override;
+private:
+    mfxVideoParam mfx_transcoder_param;
+
+    VPLAccelerationPolicy::pool_key_t vpp_out_pool_id;
+    std::weak_ptr<Surface> vpp_surface_ptr;
+    std::queue<op_handle_t> vpp_queue;
+};
+} // namespace onevpl
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+#endif // HAVE_ONEVPL
+#endif // GAPI_STREAMING_ONVPL_ENGINE_TRANSCODE_SESSION_HPP
diff --git a/modules/gapi/src/streaming/onevpl/file_data_provider.hpp b/modules/gapi/src/streaming/onevpl/file_data_provider.hpp
index cfa1245916..10171999a0 100644
--- a/modules/gapi/src/streaming/onevpl/file_data_provider.hpp
+++ b/modules/gapi/src/streaming/onevpl/file_data_provider.hpp
@@ -18,7 +18,7 @@ namespace cv {
 namespace gapi {
 namespace wip {
 namespace onevpl {
-struct FileDataProvider : public IDataProvider {
+struct GAPI_EXPORTS FileDataProvider : public IDataProvider {
 
     using file_ptr = std::unique_ptr<FILE, decltype(&fclose)>;
     FileDataProvider(const std::string& file_path,
diff --git a/modules/gapi/src/streaming/onevpl/source_priv.cpp b/modules/gapi/src/streaming/onevpl/source_priv.cpp
index fd2a401957..d1ff15b06d 100644
--- a/modules/gapi/src/streaming/onevpl/source_priv.cpp
+++ b/modules/gapi/src/streaming/onevpl/source_priv.cpp
@@ -8,6 +8,7 @@
 #include <sstream>
 
 #include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp"
+#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp"
 #include "streaming/onevpl/accelerators/accel_policy_dx11.hpp"
 #include "streaming/onevpl/accelerators/accel_policy_cpu.hpp"
 #include "streaming/onevpl/utils.hpp"
@@ -106,6 +107,20 @@ GSource::Priv::Priv(std::shared_ptr<IDataProvider> provider,
             GAPI_Assert(false && "MFXSetConfigFilterProperty failed");
         }
 
+        mfx_param.Type     = MFX_VARIANT_TYPE_U32;
+        mfx_param.Data.U32 = MFX_EXTBUFF_VPP_SCALING;
+        sts = MFXSetConfigFilterProperty(cfg_inst,
+        (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC",
+        mfx_param);
+
+        if (sts != MFX_ERR_NONE )
+        {
+            GAPI_LOG_WARNING(nullptr, "MFXSetConfigFilterProperty failed, error: " <<
+                                      mfxstatus_to_string(sts) <<
+                                      " - for \"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC\"");
+            GAPI_Assert(false && "MFXSetConfigFilterProperty failed");
+        }
+
         ++cfg_param_it;
     }
 
@@ -204,7 +219,12 @@ GSource::Priv::Priv(std::shared_ptr<IDataProvider> provider,
                         "GSource mfx_impl_description->ApiVersion.Major >= VPL_NEW_API_MAJOR_VERSION"
                         " - is not implemented");
         } else {
-            engine.reset(new VPLLegacyDecodeEngine(std::move(acceleration)));
+            const auto& transcode_params = VPLLegacyTranscodeEngine::get_vpp_params(preferred_params);
+            if (!transcode_params.empty()) {
+                engine.reset(new VPLLegacyTranscodeEngine(std::move(acceleration)));
+            } else {
+                engine.reset(new VPLLegacyDecodeEngine(std::move(acceleration)));
+            }
         }
     }
 
@@ -212,13 +232,13 @@ GSource::Priv::Priv(std::shared_ptr<IDataProvider> provider,
     auto engine_session_ptr = engine->initialize_session(mfx_session, cfg_params,
                                                          provider);
 
-    const mfxVideoParam& video_param = engine_session_ptr->get_video_param();
+    const mfxFrameInfo& video_param = engine_session_ptr->get_video_param();
 
     // set valid description
     description.size = cv::Size {
-                            video_param.mfx.FrameInfo.Width,
-                            video_param.mfx.FrameInfo.Height};
-    switch(video_param.mfx.FrameInfo.FourCC) {
+                            video_param.Width,
+                            video_param.Height};
+    switch(video_param.FourCC) {
         case MFX_FOURCC_I420:
             throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame doesn't support I420 type");
         case MFX_FOURCC_NV12:
@@ -226,7 +246,7 @@ GSource::Priv::Priv(std::shared_ptr<IDataProvider> provider,
             break;
         default:
             throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame unknown 'fmt' type: " +
-                                     std::to_string(video_param.mfx.FrameInfo.FourCC));
+                                     std::to_string(video_param.FourCC));
     }
     description_is_valid = true;
 
diff --git a/modules/gapi/src/streaming/onevpl/utils.hpp b/modules/gapi/src/streaming/onevpl/utils.hpp
index 36711bf9a0..76a66a63f4 100644
--- a/modules/gapi/src/streaming/onevpl/utils.hpp
+++ b/modules/gapi/src/streaming/onevpl/utils.hpp
@@ -73,8 +73,8 @@ const char* mfx_codec_type_to_cstr(const mfxU32 fourcc, const mfxU32 type);
 
 mfxU32 cstr_to_mfx_version(const char* cstr);
 
-std::string mfxstatus_to_string(int64_t err);
-std::string mfxstatus_to_string(mfxStatus err);
+std::string GAPI_EXPORTS mfxstatus_to_string(int64_t err);
+std::string GAPI_EXPORTS mfxstatus_to_string(mfxStatus err);
 
 std::ostream& operator<< (std::ostream& out, const mfxImplDescription& idesc);
 
diff --git a/modules/gapi/test/common/gapi_core_tests_inl.hpp b/modules/gapi/test/common/gapi_core_tests_inl.hpp
index 89261a6490..11b6e066a6 100644
--- a/modules/gapi/test/common/gapi_core_tests_inl.hpp
+++ b/modules/gapi/test/common/gapi_core_tests_inl.hpp
@@ -1701,6 +1701,25 @@ namespace {
     };
 };
 
+namespace {
+    class TestMediaGray final : public cv::MediaFrame::IAdapter {
+        cv::Mat m_mat;
+
+    public:
+        explicit TestMediaGray(cv::Mat m)
+            : m_mat(m) {
+        }
+        cv::GFrameDesc meta() const override {
+            return cv::GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(m_mat.cols, m_mat.rows) };
+        }
+        cv::MediaFrame::View access(cv::MediaFrame::Access) override {
+            cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr };
+            cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u };
+            return cv::MediaFrame::View(std::move(pp), std::move(ss));
+        }
+    };
+};
+
 TEST_P(SizeMFTest, ParseTest)
 {
     cv::Size out_sz;
@@ -1715,6 +1734,20 @@ TEST_P(SizeMFTest, ParseTest)
     EXPECT_EQ(sz, out_sz);
 }
 
+TEST_P(SizeMFTest, ParseGrayTest)
+{
+    cv::Size out_sz;
+    cv::Mat gray = cv::Mat::eye(sz.height, sz.width, CV_8UC1);
+    cv::MediaFrame frame = cv::MediaFrame::Create<TestMediaGray>(gray);
+
+    cv::GFrame in;
+    auto out = cv::gapi::streaming::size(in);
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+    c.apply(cv::gin(frame), cv::gout(out_sz), getCompileArgs());
+
+    EXPECT_EQ(sz, out_sz);
+}
+
 } // opencv_test
 
 #endif //OPENCV_GAPI_CORE_TESTS_INL_HPP
diff --git a/modules/gapi/test/gapi_frame_tests.cpp b/modules/gapi/test/gapi_frame_tests.cpp
index 5911ef9d9a..76038b5168 100644
--- a/modules/gapi/test/gapi_frame_tests.cpp
+++ b/modules/gapi/test/gapi_frame_tests.cpp
@@ -29,6 +29,23 @@ GAPI_OCV_KERNEL(OCVBlurFrame, GBlurFrame) {
     }
 };
 
+G_API_OP(GBlurFrameGray, <GMat(GFrame)>, "test.blur_frame_gray") {
+    static GMatDesc outMeta(GFrameDesc in) {
+        return cv::GMatDesc(CV_8U, 1, in.size);
+    }
+};
+
+GAPI_OCV_KERNEL(OCVBlurFrameGray, GBlurFrameGray) {
+    static void run(const cv::MediaFrame & in, cv::Mat & out) {
+        GAPI_Assert(in.desc().fmt == cv::MediaFormat::GRAY);
+        cv::MediaFrame::View view = in.access(cv::MediaFrame::Access::R);
+        cv::blur(cv::Mat(in.desc().size, CV_8UC1, view.ptr[0], view.stride[0]),
+        out,
+        cv::Size{ 3,3 });
+    }
+};
+
+
 ////////////////////////////////////////////////////////////////////////////////
 // cv::MediaFrame tests
 namespace {
@@ -70,6 +87,26 @@ public:
         return cv::MediaFrame::View(std::move(pp), std::move(ss));
     }
 };
+
+class TestMediaGray final : public cv::MediaFrame::IAdapter {
+    cv::Mat m_mat;
+    using Cb = cv::MediaFrame::View::Callback;
+    Cb m_cb;
+
+public:
+    explicit TestMediaGray(cv::Mat m, Cb cb = []() {})
+        : m_mat(m), m_cb(cb) {
+    }
+    cv::GFrameDesc meta() const override {
+        return cv::GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(m_mat.cols, m_mat.rows) };
+    }
+    cv::MediaFrame::View access(cv::MediaFrame::Access) override {
+        cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr };
+        cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u };
+        return cv::MediaFrame::View(std::move(pp), std::move(ss), Cb{ m_cb });
+    }
+};
+
 } // anonymous namespace
 
 struct MediaFrame_Test: public ::testing::Test {
@@ -120,6 +157,49 @@ TEST_F(MediaFrame_BGR, Input) {
     EXPECT_EQ(0, cvtest::norm(out_mat_ocv, out_mat_gapi, NORM_INF));
 }
 
+struct MediaFrame_Gray : public MediaFrame_Test {
+    M gray;
+    MediaFrame_Gray()
+        : gray(M::eye(240, 320, CV_8UC1)) {
+        cv::randn(gray, cv::Scalar::all(127.0f), cv::Scalar::all(40.f));
+        frame = MF::Create<TestMediaGray>(gray);
+    }
+};
+
+TEST_F(MediaFrame_Gray, Meta) {
+    auto meta = frame.desc();
+    EXPECT_EQ(cv::MediaFormat::GRAY, meta.fmt);
+    EXPECT_EQ(cv::Size(320, 240), meta.size);
+}
+
+TEST_F(MediaFrame_Gray, Access) {
+    cv::MediaFrame::View view1 = frame.access(cv::MediaFrame::Access::R);
+    EXPECT_EQ(gray.ptr(), view1.ptr[0]);
+    EXPECT_EQ(gray.step, view1.stride[0]);
+
+    cv::MediaFrame::View view2 = frame.access(cv::MediaFrame::Access::R);
+    EXPECT_EQ(gray.ptr(), view2.ptr[0]);
+    EXPECT_EQ(gray.step, view2.stride[0]);
+}
+
+TEST_F(MediaFrame_Gray, Input) {
+    // Run the OpenCV code
+    cv::Mat out_mat_ocv, out_mat_gapi;
+    cv::blur(gray, out_mat_ocv, cv::Size{ 3,3 });
+
+    // Run the G-API code
+    cv::GFrame in;
+    cv::GMat out = GBlurFrameGray::on(in);
+    cv::GComputation(cv::GIn(in), cv::GOut(out))
+        .apply(cv::gin(frame),
+            cv::gout(out_mat_gapi),
+            cv::compile_args(cv::gapi::kernels<OCVBlurFrameGray>()));
+
+    // Compare
+    EXPECT_EQ(0, cvtest::norm(out_mat_ocv, out_mat_gapi, NORM_INF));
+}
+
+
 struct MediaFrame_NV12: public MediaFrame_Test {
     cv::Size sz;
     cv::Mat buf, y, uv;
diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp
index 69ed80054c..8dc23a3880 100644
--- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp
+++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp
@@ -29,6 +29,10 @@
 #elif defined(_MSC_VER)
 #pragma warning(push)
 #pragma warning(disable : 4100)
+# if _MSC_VER < 1910
+#  pragma warning(disable:4268) // Disable warnings of ngraph. OpenVINO recommends to use MSVS 2019.
+#  pragma warning(disable:4800)
+# endif
 #elif defined(__GNUC__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
diff --git a/modules/gapi/test/oak/gapi_tests_oak.cpp b/modules/gapi/test/oak/gapi_tests_oak.cpp
new file mode 100644
index 0000000000..c153333374
--- /dev/null
+++ b/modules/gapi/test/oak/gapi_tests_oak.cpp
@@ -0,0 +1,26 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021 Intel Corporation
+
+#include "../test_precomp.hpp"
+
+#ifdef HAVE_OAK
+
+#include <opencv2/gapi/oak/oak.hpp>
+
+namespace opencv_test
+{
+
+// FIXME: consider a better solution
+TEST(OAK, Available)
+{
+    cv::GFrame in;
+    auto out = cv::gapi::oak::encode(in, {});
+    auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{}, cv::gapi::oak::kernels());
+    auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)).compileStreaming(std::move(args));
+}
+} // opencv_test
+
+#endif // HAVE_OAK
diff --git a/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp b/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp
index 0478d2dc1d..7921eb71c2 100644
--- a/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp
+++ b/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp
@@ -29,6 +29,7 @@ namespace opencv_test
 struct GStreamerSourceTest : public TestWithParam<std::tuple<std::string, cv::Size, std::size_t>>
 { };
 
+
 TEST_P(GStreamerSourceTest, AccuracyTest)
 {
     std::string pipeline;
@@ -143,6 +144,16 @@ G_TYPED_KERNEL(GGstFrameCopyToNV12, <std::tuple<cv::GMat,cv::GMat>(GFrame)>,
     }
 };
 
+G_TYPED_KERNEL(GGstFrameCopyToGRAY8, <cv::GMat(GFrame)>,
+    "org.opencv.test.gstframe_copy_to_gray8")
+{
+    static GMatDesc outMeta(GFrameDesc desc) {
+        GMatDesc y{ CV_8U, 1, desc.size, false };
+        return y;
+    }
+};
+
+
 GAPI_OCV_KERNEL(GOCVGstFrameCopyToNV12, GGstFrameCopyToNV12)
 {
     static void run(const cv::MediaFrame& in, cv::Mat& y, cv::Mat& uv)
@@ -156,21 +167,50 @@ GAPI_OCV_KERNEL(GOCVGstFrameCopyToNV12, GGstFrameCopyToNV12)
     }
 };
 
+GAPI_OCV_KERNEL(GOCVGstFrameCopyToGRAY8, GGstFrameCopyToGRAY8)
+{
+    static void run(const cv::MediaFrame & in, cv::Mat & y)
+    {
+        auto view = in.access(cv::MediaFrame::Access::R);
+        cv::Mat ly(y.size(), y.type(), view.ptr[0], view.stride[0]);
+        ly.copyTo(y);
+    }
+};
+
+
 TEST_P(GStreamerSourceTest, GFrameTest)
 {
     std::string pipeline;
     cv::Size expectedFrameSize;
     std::size_t streamLength { };
+    bool isNV12 = false;
     std::tie(pipeline, expectedFrameSize, streamLength) = GetParam();
 
+    //Check if pipline string contains NV12 sub-string
+    if (pipeline.find("NV12") != std::string::npos) {
+        isNV12 = true;
+    }
+
     // Graph declaration:
     cv::GFrame in;
     cv::GMat copiedY, copiedUV;
-    std::tie(copiedY, copiedUV) = GGstFrameCopyToNV12::on(in);
-    cv::GComputation c(cv::GIn(in), cv::GOut(copiedY, copiedUV));
+    if (isNV12) {
+        std::tie(copiedY, copiedUV) = GGstFrameCopyToNV12::on(in);
+    }
+    else {
+        copiedY = GGstFrameCopyToGRAY8::on(in);
+    }
+
+    cv::GComputation c(cv::GIn(in), isNV12 ? cv::GOut(copiedY, copiedUV) : cv::GOut(copiedY));
 
     // Graph compilation for streaming mode:
-    auto ccomp = c.compileStreaming(cv::compile_args(cv::gapi::kernels<GOCVGstFrameCopyToNV12>()));
+    cv::GStreamingCompiled ccomp;
+    if (isNV12) {
+        ccomp = c.compileStreaming(cv::compile_args(cv::gapi::kernels<GOCVGstFrameCopyToNV12>()));
+    } else {
+        ccomp = c.compileStreaming(cv::compile_args(cv::gapi::kernels<GOCVGstFrameCopyToGRAY8>()));
+    }
+
 
     EXPECT_TRUE(ccomp);
     EXPECT_FALSE(ccomp.running());
@@ -186,29 +226,41 @@ TEST_P(GStreamerSourceTest, GFrameTest)
     // Streaming - pulling of frames until the end:
     cv::Mat y_mat, uv_mat;
 
-    EXPECT_TRUE(ccomp.pull(cv::gout(y_mat, uv_mat)));
+    EXPECT_TRUE(isNV12 ? ccomp.pull(cv::gout(y_mat, uv_mat)) : ccomp.pull(cv::gout(y_mat)));
     EXPECT_TRUE(!y_mat.empty());
-    EXPECT_TRUE(!uv_mat.empty());
+    if (isNV12) {
+        EXPECT_TRUE(!uv_mat.empty());
+    }
 
     cv::Size expectedYSize = expectedFrameSize;
     cv::Size expectedUVSize = expectedFrameSize / 2;
 
     EXPECT_EQ(expectedYSize, y_mat.size());
-    EXPECT_EQ(expectedUVSize, uv_mat.size());
+    if (isNV12) {
+        EXPECT_EQ(expectedUVSize, uv_mat.size());
+    }
 
     EXPECT_EQ(CV_8UC1, y_mat.type());
-    EXPECT_EQ(CV_8UC2, uv_mat.type());
+    if (isNV12) {
+        EXPECT_EQ(CV_8UC2, uv_mat.type());
+    }
 
     std::size_t framesCount = 1UL;
-    while (ccomp.pull(cv::gout(y_mat, uv_mat))) {
+    while (isNV12 ? ccomp.pull(cv::gout(y_mat, uv_mat)) : ccomp.pull(cv::gout(y_mat))) {
         EXPECT_TRUE(!y_mat.empty());
-        EXPECT_TRUE(!uv_mat.empty());
+        if (isNV12) {
+            EXPECT_TRUE(!uv_mat.empty());
+        }
 
         EXPECT_EQ(expectedYSize, y_mat.size());
-        EXPECT_EQ(expectedUVSize, uv_mat.size());
+        if (isNV12) {
+            EXPECT_EQ(expectedUVSize, uv_mat.size());
+        }
 
         EXPECT_EQ(CV_8UC1, y_mat.type());
-        EXPECT_EQ(CV_8UC2, uv_mat.type());
+        if (isNV12) {
+            EXPECT_EQ(CV_8UC2, uv_mat.type());
+        }
 
         framesCount++;
     }
@@ -221,36 +273,56 @@ TEST_P(GStreamerSourceTest, GFrameTest)
     EXPECT_EQ(streamLength, framesCount);
 }
 
+
 // FIXME: Need to launch with sudo. May be infrastructure problems.
 // TODO: It is needed to add tests for streaming from native KMB camera: kmbcamsrc
 //       GStreamer element.
 INSTANTIATE_TEST_CASE_P(CameraEmulatingPipeline, GStreamerSourceTest,
                         Combine(Values("videotestsrc is-live=true pattern=colors num-buffers=10 ! "
                                        "videorate ! videoscale ! "
-                                       "video/x-raw,width=1920,height=1080,framerate=3/1 ! "
+                                       "video/x-raw,format=NV12,width=1920,height=1080,framerate=3/1 ! "
+                                       "appsink",
+                                       "videotestsrc is-live=true pattern=colors num-buffers=10 ! "
+                                       "videorate ! videoscale ! "
+                                       "video/x-raw,format=GRAY8,width=1920,height=1080,framerate=3/1 ! "
                                        "appsink"),
                                 Values(cv::Size(1920, 1080)),
                                 Values(10UL)));
 
+
 INSTANTIATE_TEST_CASE_P(FileEmulatingPipeline, GStreamerSourceTest,
                         Combine(Values("videotestsrc pattern=colors num-buffers=10 ! "
                                        "videorate ! videoscale ! "
-                                       "video/x-raw,width=640,height=420,framerate=3/1 ! "
+                                       "video/x-raw,format=NV12,width=640,height=420,framerate=3/1 ! "
+                                       "appsink",
+                                       "videotestsrc pattern=colors num-buffers=10 ! "
+                                       "videorate ! videoscale ! "
+                                       "video/x-raw,format=GRAY8,width=640,height=420,framerate=3/1 ! "
                                        "appsink"),
                                 Values(cv::Size(640, 420)),
                                 Values(10UL)));
 
+
 INSTANTIATE_TEST_CASE_P(MultipleLiveSources, GStreamerSourceTest,
                         Combine(Values("videotestsrc is-live=true pattern=colors num-buffers=10 ! "
-                                       "videoscale ! video/x-raw,width=1280,height=720 ! appsink "
+                                       "videoscale ! video/x-raw,format=NV12,width=1280,height=720 ! appsink "
+                                       "videotestsrc is-live=true pattern=colors num-buffers=10 ! "
+                                       "fakesink",
+                                       "videotestsrc is-live=true pattern=colors num-buffers=10 ! "
+                                       "videoscale ! video/x-raw,format=GRAY8,width=1280,height=720 ! appsink "
                                        "videotestsrc is-live=true pattern=colors num-buffers=10 ! "
                                        "fakesink"),
                                 Values(cv::Size(1280, 720)),
                                 Values(10UL)));
 
+
 INSTANTIATE_TEST_CASE_P(MultipleNotLiveSources, GStreamerSourceTest,
                         Combine(Values("videotestsrc pattern=colors num-buffers=10 ! "
-                                       "videoscale ! video/x-raw,width=1280,height=720 ! appsink "
+                                       "videoscale ! video/x-raw,format=NV12,width=1280,height=720 ! appsink "
+                                       "videotestsrc pattern=colors num-buffers=10 ! "
+                                       "fakesink",
+                                       "videotestsrc pattern=colors num-buffers=10 ! "
+                                       "videoscale ! video/x-raw,format=GRAY8,width=1280,height=720 ! appsink "
                                        "videotestsrc pattern=colors num-buffers=10 ! "
                                        "fakesink"),
                                 Values(cv::Size(1280, 720)),
@@ -308,11 +380,11 @@ TEST(GStreamerMultiSourceSmokeTest, Test)
     EXPECT_FALSE(ccomp.running());
 }
 
-struct GStreamerMultiSourceTest :
+struct GStreamerMultiSourceTestNV12 :
     public TestWithParam<std::tuple<cv::GComputation, cv::gapi::wip::GStreamerSource::OutputType>>
 { };
 
-TEST_P(GStreamerMultiSourceTest, ImageDataTest)
+TEST_P(GStreamerMultiSourceTestNV12, ImageDataTest)
 {
     std::string pathToLeftIm = findDataFile("cv/stereomatching/datasets/tsukuba/im6.png");
     std::string pathToRightIm = findDataFile("cv/stereomatching/datasets/tsukuba/im2.png");
@@ -377,7 +449,7 @@ TEST_P(GStreamerMultiSourceTest, ImageDataTest)
     EXPECT_FALSE(compiled.running());
 }
 
-INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTest,
+INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTestNV12,
                         Combine(Values(cv::GComputation([]()
                                        {
                                            cv::GMat in1, in2;
@@ -387,7 +459,7 @@ INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTe
                                        })),
                                Values(cv::gapi::wip::GStreamerSource::OutputType::MAT)));
 
-INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSourceTest,
+INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSourceTestNV12,
                         Combine(Values(cv::GComputation([]()
                                        {
                                            cv::GFrame in1, in2;
@@ -396,6 +468,96 @@ INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSource
                                                          cv::gapi::streaming::BGR(in2)));
                                        })),
                                Values(cv::gapi::wip::GStreamerSource::OutputType::FRAME)));
+
+struct GStreamerMultiSourceTestGRAY8 :
+    public TestWithParam<std::tuple<cv::GComputation, cv::gapi::wip::GStreamerSource::OutputType>>
+{ };
+
+TEST_P(GStreamerMultiSourceTestGRAY8, ImageDataTest)
+{
+    std::string pathToLeftIm = findDataFile("cv/stereomatching/datasets/tsukuba/im6.png");
+    std::string pathToRightIm = findDataFile("cv/stereomatching/datasets/tsukuba/im2.png");
+
+    std::string pipelineToReadImage("filesrc location=LOC ! pngdec ! videoconvert ! "
+        "videoscale ! video/x-raw,format=GRAY8 ! appsink");
+
+    cv::gapi::wip::GStreamerSource leftImageProvider(
+        std::regex_replace(pipelineToReadImage, std::regex("LOC"), pathToLeftIm));
+    cv::gapi::wip::GStreamerSource rightImageProvider(
+        std::regex_replace(pipelineToReadImage, std::regex("LOC"), pathToRightIm));
+
+    cv::gapi::wip::Data leftImData, rightImData;
+    leftImageProvider.pull(leftImData);
+    rightImageProvider.pull(rightImData);
+
+    cv::Mat leftRefMat =  cv::util::get<cv::Mat>(leftImData);
+    cv::Mat rightRefMat = cv::util::get<cv::Mat>(rightImData);
+
+    // Retrieve test parameters:
+    std::tuple<cv::GComputation, cv::gapi::wip::GStreamerSource::OutputType> params = GetParam();
+    cv::GComputation extractImage = std::move(std::get<0>(params));
+    cv::gapi::wip::GStreamerSource::OutputType outputType = std::get<1>(params);
+
+    // Graph compilation for streaming mode:
+    auto compiled =
+        extractImage.compileStreaming();
+
+    EXPECT_TRUE(compiled);
+    EXPECT_FALSE(compiled.running());
+
+    cv::gapi::wip::GStreamerPipeline
+        pipeline(std::string("multifilesrc location=" + pathToLeftIm + " index=0 loop=true ! "
+                 "pngdec ! videoconvert ! videoscale ! video/x-raw,format=GRAY8 ! "
+                 "appsink name=sink1 ") +
+                 std::string("multifilesrc location=" + pathToRightIm + " index=0 loop=true ! "
+                 "pngdec ! videoconvert ! videoscale ! video/x-raw,format=GRAY8 ! "
+                 "appsink name=sink2"));
+
+    // GStreamer streaming sources configuration:
+    auto src1 = pipeline.getStreamingSource("sink1", outputType);
+    auto src2 = pipeline.getStreamingSource("sink2", outputType);
+
+    compiled.setSource(cv::gin(src1, src2));
+
+    // Start of streaming:
+    compiled.start();
+    EXPECT_TRUE(compiled.running());
+
+    // Streaming - pulling of frames:
+    cv::Mat in_mat1, in_mat2;
+
+    std::size_t counter { }, limit { 10 };
+    while(compiled.pull(cv::gout(in_mat1, in_mat2)) && (counter < limit)) {
+        EXPECT_EQ(0, cv::norm(in_mat1, leftRefMat, cv::NORM_INF));
+        EXPECT_EQ(0, cv::norm(in_mat2, rightRefMat, cv::NORM_INF));
+        ++counter;
+    }
+
+    compiled.stop();
+
+    EXPECT_FALSE(compiled.running());
+}
+
+INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTestGRAY8,
+                        Combine(Values(cv::GComputation([]()
+                                       {
+                                           cv::GMat in1, in2;
+                                           return cv::GComputation(cv::GIn(in1, in2),
+                                                                   cv::GOut(cv::gapi::copy(in1),
+                                                                            cv::gapi::copy(in2)));
+                                       })),
+                               Values(cv::gapi::wip::GStreamerSource::OutputType::MAT)));
+
+INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSourceTestGRAY8,
+                        Combine(Values(cv::GComputation([]()
+                                       {
+                                           cv::GFrame in1, in2;
+                                           return cv::GComputation(cv::GIn(in1, in2),
+                                                cv::GOut(cv::gapi::streaming::BGR(in1),
+                                                         cv::gapi::streaming::BGR(in2)));
+                                       })),
+                               Values(cv::gapi::wip::GStreamerSource::OutputType::FRAME)));
+
 } // namespace opencv_test
 
 #endif // HAVE_GSTREAMER
diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp
index 3f876fc61b..4d33d4b0c5 100644
--- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp
+++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp
@@ -164,6 +164,26 @@ public:
     }
 };
 
+class TestMediaGRAY final : public cv::MediaFrame::IAdapter {
+    cv::Mat m_mat;
+    using Cb = cv::MediaFrame::View::Callback;
+    Cb m_cb;
+
+public:
+    explicit TestMediaGRAY(cv::Mat m, Cb cb = []() {})
+        : m_mat(m), m_cb(cb) {
+    }
+    cv::GFrameDesc meta() const override {
+        return cv::GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(m_mat.cols, m_mat.rows) };
+    }
+    cv::MediaFrame::View access(cv::MediaFrame::Access) override {
+        cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr };
+        cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u };
+        return cv::MediaFrame::View(std::move(pp), std::move(ss), Cb{ m_cb });
+    }
+};
+
+
 class BGRSource : public cv::gapi::wip::GCaptureSource {
 public:
     explicit BGRSource(const std::string& pipeline)
@@ -230,6 +250,31 @@ public:
     }
 };
 
+class GRAYSource : public cv::gapi::wip::GCaptureSource {
+public:
+    explicit GRAYSource(const std::string& pipeline)
+        : cv::gapi::wip::GCaptureSource(pipeline) {
+    }
+
+    bool pull(cv::gapi::wip::Data& data) {
+        if (cv::gapi::wip::GCaptureSource::pull(data)) {
+            cv::Mat bgr = cv::util::get<cv::Mat>(data);
+            cv::Mat gray;
+            cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
+            data = cv::MediaFrame::Create<TestMediaGRAY>(gray);
+            return true;
+        }
+        return false;
+    }
+
+    GMetaArg descr_of() const override {
+        return cv::GMetaArg{ cv::GFrameDesc{cv::MediaFormat::GRAY,
+                                            cv::util::get<cv::GMatDesc>(
+                                            cv::gapi::wip::GCaptureSource::descr_of()).size} };
+    }
+};
+
+
 void checkPullOverload(const cv::Mat& ref,
                        const bool has_output,
                        cv::util::variant<cv::GRunArgs, cv::GOptRunArgs>& args) {
@@ -1789,6 +1834,46 @@ TEST(GAPI_Streaming, CopyFrame)
     }
 }
 
+TEST(GAPI_Streaming, CopyFrameGray)
+{
+    std::string filepath = findDataFile("cv/video/768x576.avi");
+
+    cv::GFrame in;
+    auto out = cv::gapi::copy(in);
+
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out));
+
+    auto cc = comp.compileStreaming();
+    try {
+        cc.setSource<GRAYSource>(filepath);
+    }
+    catch (...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+
+    cv::VideoCapture cap;
+    cap.open(filepath);
+    if (!cap.isOpened())
+        throw SkipTestException("Video file can not be opened");
+
+    cv::MediaFrame frame;
+    cv::Mat ocv_mat;
+    std::size_t num_frames = 0u;
+    std::size_t max_frames = 10u;
+
+    cc.start();
+    while (cc.pull(cv::gout(frame)) && num_frames < max_frames)
+    {
+        auto view = frame.access(cv::MediaFrame::Access::R);
+        cv::Mat gapi_mat(frame.desc().size, CV_8UC1, view.ptr[0]);
+        num_frames++;
+        cap >> ocv_mat;
+        cv::Mat gray;
+        cvtColor(ocv_mat, gray, cv::COLOR_BGR2GRAY);
+        EXPECT_EQ(0, cvtest::norm(gray, gapi_mat, NORM_INF));
+    }
+}
+
 TEST(GAPI_Streaming, CopyMat)
 {
     std::string filepath = findDataFile("cv/video/768x576.avi");
@@ -1892,23 +1977,97 @@ TEST(GAPI_Streaming, Reshape)
     }
 }
 
+TEST(GAPI_Streaming, ReshapeGray)
+{
+    std::string filepath = findDataFile("cv/video/768x576.avi");
+
+    cv::GFrame in;
+    auto out = cv::gapi::copy(in);
+
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out));
+
+    auto cc = comp.compileStreaming();
+    try {
+        cc.setSource<GRAYSource>(filepath);
+    }
+    catch (...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+
+    cv::VideoCapture cap;
+    cap.open(filepath);
+    if (!cap.isOpened())
+        throw SkipTestException("Video file can not be opened");
+
+    cv::MediaFrame frame;
+    cv::Mat ocv_mat;
+    std::size_t num_frames = 0u;
+    std::size_t max_frames = 10u;
+
+    cc.start();
+    while (cc.pull(cv::gout(frame)) && num_frames < max_frames)
+    {
+        auto view = frame.access(cv::MediaFrame::Access::R);
+        cv::Mat gapi_mat(frame.desc().size, CV_8UC1, view.ptr[0]);
+        num_frames++;
+        cap >> ocv_mat;
+        cv::Mat gray;
+        cvtColor(ocv_mat, gray, cv::COLOR_BGR2GRAY);
+        EXPECT_EQ(0, cvtest::norm(gray, gapi_mat, NORM_INF));
+    }
+
+    // Reshape the graph meta
+    filepath = findDataFile("cv/video/1920x1080.avi");
+    cc.stop();
+    try {
+        cc.setSource<GRAYSource>(filepath);
+    }
+    catch (...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+
+    cap.open(filepath);
+    if (!cap.isOpened())
+        throw SkipTestException("Video file can not be opened");
+
+    cv::MediaFrame frame2;
+    cv::Mat ocv_mat2;
+
+    num_frames = 0u;
+
+    cc.start();
+    while (cc.pull(cv::gout(frame2)) && num_frames < max_frames)
+    {
+        auto view = frame2.access(cv::MediaFrame::Access::R);
+        cv::Mat gapi_mat(frame2.desc().size, CV_8UC1, view.ptr[0]);
+        num_frames++;
+        cap >> ocv_mat2;
+        cv::Mat gray;
+        cvtColor(ocv_mat2, gray, cv::COLOR_BGR2GRAY);
+        EXPECT_EQ(0, cvtest::norm(gray, gapi_mat, NORM_INF));
+    }
+}
+
+
 namespace {
     enum class TestSourceType {
         BGR,
-        NV12
+        NV12,
+        GRAY
     };
     std::ostream& operator<<(std::ostream& os, TestSourceType a) {
         os << "Source:";
         switch (a) {
             case TestSourceType::BGR:  return os << "BGR";
             case TestSourceType::NV12: return os << "NV12";
+            case TestSourceType::GRAY: return os << "GRAY";
             default: CV_Assert(false && "unknown TestSourceType");
         }
     }
 
     cv::gapi::wip::IStreamSource::Ptr createTestSource(TestSourceType sourceType,
                                                        const std::string& pipeline) {
-        assert(sourceType == TestSourceType::BGR || sourceType == TestSourceType::NV12);
+        assert(sourceType == TestSourceType::BGR || sourceType == TestSourceType::NV12 || sourceType == TestSourceType::GRAY);
 
         cv::gapi::wip::IStreamSource::Ptr ptr { };
 
@@ -1933,6 +2092,16 @@ namespace {
                 }
                 break;
             }
+            case TestSourceType::GRAY: {
+                try {
+                    ptr = cv::gapi::wip::make_src<GRAYSource>(pipeline);
+                }
+                catch (...) {
+                    throw SkipTestException(std::string("GRAYSource for '") + pipeline +
+                        "' couldn't be created!");
+                }
+                break;
+            }
             default: {
                 throw SkipTestException("Incorrect type of source! "
                                         "Something went wrong in the test!");
@@ -2000,6 +2169,25 @@ namespace {
               cvtBGR2NV12(bgr, y, uv);
               return uv;
           } },
+        { std::make_pair(TestSourceType::GRAY, TestAccessType::BGR),
+          [](const cv::Mat& bgr) {
+              cv::Mat gray;
+              cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
+              cv::Mat out_bgr;
+              cv::cvtColor(gray, out_bgr, cv::COLOR_GRAY2BGR);
+              return out_bgr;
+          } },
+        { std::make_pair(TestSourceType::GRAY, TestAccessType::Y),
+          [](const cv::Mat& bgr) {
+              cv::Mat gray;
+              cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
+              return gray;
+          } },
+        { std::make_pair(TestSourceType::GRAY, TestAccessType::UV),
+          [](const cv::Mat& bgr) {
+              cv::Mat uv(bgr.size() / 2, CV_8UC2, cv::Scalar::all(127));
+              return uv;
+          } },
     };
 } // anonymous namespace
 
@@ -2007,6 +2195,7 @@ struct GAPI_Accessors_In_Streaming : public TestWithParam<
     std::tuple<std::string,TestSourceType,TestAccessType>>
 { };
 
+
 TEST_P(GAPI_Accessors_In_Streaming, AccuracyTest)
 {
     std::string filepath{};
@@ -2050,10 +2239,11 @@ TEST_P(GAPI_Accessors_In_Streaming, AccuracyTest)
 
 INSTANTIATE_TEST_CASE_P(TestAccessor, GAPI_Accessors_In_Streaming,
                         Combine(Values("cv/video/768x576.avi"),
-                                Values(TestSourceType::BGR, TestSourceType::NV12),
+                                Values(TestSourceType::BGR, TestSourceType::NV12, TestSourceType::GRAY),
                                 Values(TestAccessType::BGR, TestAccessType::Y, TestAccessType::UV)
                         ));
 
+
 struct GAPI_Accessors_Meta_In_Streaming : public TestWithParam<
     std::tuple<std::string,TestSourceType,TestAccessType>>
 { };
@@ -2120,7 +2310,7 @@ TEST_P(GAPI_Accessors_Meta_In_Streaming, AccuracyTest)
 
 INSTANTIATE_TEST_CASE_P(AccessorMeta, GAPI_Accessors_Meta_In_Streaming,
                         Combine(Values("cv/video/768x576.avi"),
-                                Values(TestSourceType::BGR, TestSourceType::NV12),
+                                Values(TestSourceType::BGR, TestSourceType::NV12, TestSourceType::GRAY),
                                 Values(TestAccessType::BGR, TestAccessType::Y, TestAccessType::UV)
                         ));
 
@@ -2232,7 +2422,7 @@ TEST(GAPI_Streaming, TestDesyncRMat) {
     cv::optional<cv::RMat> out_desync;
     cv::optional<cv::RMat> out_rmat;
     while (true) {
-        // Initially it throwed "bad variant access" since there was
+        // Initially it threw "bad variant access" since there was
         // no RMat handling in wrap_opt_arg
         EXPECT_NO_THROW(pipe.pull(cv::gout(out_desync, out_rmat)));
         if (out_rmat) break;
@@ -2273,11 +2463,54 @@ TEST(GAPI_Streaming, TestDesyncMediaFrame) {
     cv::optional<cv::MediaFrame> out_desync;
     cv::optional<cv::MediaFrame> out_frame;
     while (true) {
-        // Initially it throwed "bad variant access" since there was
+        // Initially it threw "bad variant access" since there was
         // no MediaFrame handling in wrap_opt_arg
         EXPECT_NO_THROW(pipe.pull(cv::gout(out_desync, out_frame)));
         if (out_frame) break;
     }
 }
 
+G_API_OP(GTestBlurGray, <GFrame(GFrame)>, "test.blur_gray") {
+    static GFrameDesc outMeta(GFrameDesc d) { return d; }
+};
+GAPI_OCV_KERNEL(GOcvTestBlurGray, GTestBlurGray) {
+    static void run(const cv::MediaFrame & in, cv::MediaFrame & out) {
+        auto d = in.desc();
+        GAPI_Assert(d.fmt == cv::MediaFormat::GRAY);
+        auto view = in.access(cv::MediaFrame::Access::R);
+        cv::Mat mat(d.size, CV_8UC1, view.ptr[0]);
+        cv::Mat blurred;
+        cv::blur(mat, blurred, cv::Size{ 3,3 });
+        out = cv::MediaFrame::Create<TestMediaGRAY>(blurred);
+    }
+};
+
+TEST(GAPI_Streaming, TestDesyncMediaFrameGray) {
+    cv::GFrame in;
+    auto blurred = GTestBlurGray::on(in);
+    auto desynced = cv::gapi::streaming::desync(blurred);
+    auto out = GTestBlurGray::on(blurred);
+    auto pipe = cv::GComputation(cv::GIn(in), cv::GOut(desynced, out))
+        .compileStreaming(cv::compile_args(cv::gapi::kernels<GOcvTestBlurGray>()));
+
+    std::string filepath = findDataFile("cv/video/768x576.avi");
+    try {
+        pipe.setSource<GRAYSource>(filepath);
+    }
+    catch (...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+    pipe.start();
+
+    cv::optional<cv::MediaFrame> out_desync;
+    cv::optional<cv::MediaFrame> out_frame;
+    while (true) {
+        // Initially it threw "bad variant access" since there was
+        // no MediaFrame handling in wrap_opt_arg
+        EXPECT_NO_THROW(pipe.pull(cv::gout(out_desync, out_frame)));
+        if (out_frame) break;
+    }
+}
+
+
 } // namespace opencv_test
diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp
index c62f58eecf..51fb9f276a 100644
--- a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp
+++ b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp
@@ -29,6 +29,7 @@
 
 #ifdef HAVE_ONEVPL
 #include <opencv2/gapi/streaming/onevpl/data_provider_interface.hpp>
+#include "streaming/onevpl/file_data_provider.hpp"
 #include "streaming/onevpl/cfg_param_device_selector.hpp"
 
 #include "streaming/onevpl/accelerators/surface/surface.hpp"
@@ -37,8 +38,15 @@
 #include "streaming/onevpl/accelerators/accel_policy_dx11.hpp"
 #include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp"
 #include "streaming/onevpl/accelerators/utils/shared_lock.hpp"
-#include "streaming/onevpl/engine/processing_engine_base.hpp"
-#include "streaming/onevpl/engine/engine_session.hpp"
+#define private public
+#define protected public
+#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp"
+#include "streaming/onevpl/engine/transcode/transcode_session.hpp"
+#undef protected
+#undef private
+#include "logger.hpp"
+
+#define ALIGN16(value)           (((value + 15) >> 4) << 4)
 
 namespace opencv_test
 {
@@ -63,9 +71,9 @@ struct TestProcessingSession : public cv::gapi::wip::onevpl::EngineSession {
         EngineSession(mfx_session, {}) {
     }
 
-    const mfxVideoParam& get_video_param() const override {
+    const mfxFrameInfo& get_video_param() const override {
         static mfxVideoParam empty;
-        return empty;
+        return empty.mfx.FrameInfo;
     }
 };
 
@@ -581,7 +589,7 @@ TEST(OneVPL_Source_DX11_Accel, Init)
 
     // Allocate surfaces for decoder
     VPLAccelerationPolicy::pool_key_t key = accel.create_surface_pool(request,
-                                                                      mfxDecParams);
+                                                                      mfxDecParams.mfx.FrameInfo);
     auto cand_surface = accel.get_free_surface(key).lock();
 
     sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams);
@@ -594,6 +602,212 @@ TEST(OneVPL_Source_DX11_Accel, Init)
     MFXClose(mfx_session);
     MFXUnload(mfx_handle);
 }
+
+TEST(OneVPL_Source_DX11_Accel_VPL, Init)
+{
+    using namespace cv::gapi::wip::onevpl;
+
+    std::vector<CfgParam> cfg_params_w_dx11;
+    cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11));
+    std::unique_ptr<VPLAccelerationPolicy> acceleration_policy (new VPLDX11AccelerationPolicy(std::make_shared<CfgParamDeviceSelector>(cfg_params_w_dx11)));
+
+    mfxLoader mfx_handle = MFXLoad();
+
+    mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle);
+    EXPECT_TRUE(cfg_inst_0);
+    mfxVariant mfx_param_0;
+    mfx_param_0.Type = MFX_VARIANT_TYPE_U32;
+    mfx_param_0.Data.U32 = MFX_IMPL_TYPE_HARDWARE;
+    EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(),
+                                                    mfx_param_0), MFX_ERR_NONE);
+
+    mfxConfig cfg_inst_1 = MFXCreateConfig(mfx_handle);
+    EXPECT_TRUE(cfg_inst_1);
+    mfxVariant mfx_param_1;
+    mfx_param_1.Type = MFX_VARIANT_TYPE_U32;
+    mfx_param_1.Data.U32 = MFX_ACCEL_MODE_VIA_D3D11;
+    EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(),
+                                                    mfx_param_1), MFX_ERR_NONE);
+
+    mfxConfig cfg_inst_2 = MFXCreateConfig(mfx_handle);
+    EXPECT_TRUE(cfg_inst_2);
+    mfxVariant mfx_param_2;
+    mfx_param_2.Type = MFX_VARIANT_TYPE_U32;
+    mfx_param_2.Data.U32 = MFX_CODEC_HEVC;
+    EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(),
+                                                    mfx_param_2), MFX_ERR_NONE);
+
+    mfxConfig cfg_inst_3 = MFXCreateConfig(mfx_handle);
+    EXPECT_TRUE(cfg_inst_3);
+    mfxVariant mfx_param_3;
+    mfx_param_3.Type = MFX_VARIANT_TYPE_U32;
+    mfx_param_3.Data.U32 = MFX_EXTBUFF_VPP_SCALING;
+    EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_3,
+                                         (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC",
+                                         mfx_param_3), MFX_ERR_NONE);
+    // create session
+    mfxSession mfx_session{};
+    mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session);
+    EXPECT_EQ(MFX_ERR_NONE, sts);
+
+    // assign acceleration
+    EXPECT_NO_THROW(acceleration_policy->init(mfx_session));
+
+    // create proper bitstream
+    std::string file_path = findDataFile("highgui/video/big_buck_bunny.h265");
+    std::shared_ptr<IDataProvider> data_provider(new FileDataProvider(file_path,
+                                                                      {CfgParam::create_decoder_id(MFX_CODEC_HEVC)}));
+    IDataProvider::mfx_codec_id_type decoder_id_name = data_provider->get_mfx_codec_id();
+
+    // Prepare video param
+    mfxVideoParam mfxDecParams {};
+    mfxDecParams.mfx.CodecId = decoder_id_name;
+    mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+
+    // try fetch & decode input data
+    sts = MFX_ERR_NONE;
+    std::shared_ptr<IDataProvider::mfx_bitstream> bitstream{};
+    do {
+        EXPECT_TRUE(data_provider->fetch_bitstream_data(bitstream));
+        sts = MFXVideoDECODE_DecodeHeader(mfx_session, bitstream.get(), &mfxDecParams);
+        EXPECT_TRUE(MFX_ERR_NONE == sts || MFX_ERR_MORE_DATA == sts);
+    } while (sts == MFX_ERR_MORE_DATA && !data_provider->empty());
+
+    EXPECT_EQ(MFX_ERR_NONE, sts);
+
+    mfxFrameAllocRequest request{};
+    memset(&request, 0, sizeof(request));
+    sts = MFXVideoDECODE_QueryIOSurf(mfx_session, &mfxDecParams, &request);
+    EXPECT_EQ(MFX_ERR_NONE, sts);
+
+    // Allocate surfaces for decoder
+    request.Type |= MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE | MFX_MEMTYPE_FROM_VPPIN;
+    VPLAccelerationPolicy::pool_key_t decode_pool_key = acceleration_policy->create_surface_pool(request,
+                                                                      mfxDecParams.mfx.FrameInfo);
+    sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams);
+    EXPECT_EQ(MFX_ERR_NONE, sts);
+
+    // initialize VPLL
+    mfxU16 vppOutImgWidth  = 672;
+    mfxU16 vppOutImgHeight = 382;
+
+    mfxVideoParam mfxVPPParams{0};
+    mfxVPPParams.vpp.In = mfxDecParams.mfx.FrameInfo;
+
+    mfxVPPParams.vpp.Out.FourCC        = MFX_FOURCC_NV12;
+    mfxVPPParams.vpp.Out.ChromaFormat  = MFX_CHROMAFORMAT_YUV420;
+    mfxVPPParams.vpp.Out.Width         = ALIGN16(vppOutImgWidth);
+    mfxVPPParams.vpp.Out.Height        = ALIGN16(vppOutImgHeight);
+    mfxVPPParams.vpp.Out.CropX = 0;
+    mfxVPPParams.vpp.Out.CropY = 0;
+    mfxVPPParams.vpp.Out.CropW         = vppOutImgWidth;
+    mfxVPPParams.vpp.Out.CropH         = vppOutImgHeight;
+    mfxVPPParams.vpp.Out.PicStruct     = MFX_PICSTRUCT_PROGRESSIVE;
+    mfxVPPParams.vpp.Out.FrameRateExtN = 30;
+    mfxVPPParams.vpp.Out.FrameRateExtD = 1;
+
+    mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+
+    mfxFrameAllocRequest vppRequests[2];
+    memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2);
+    EXPECT_EQ(MFXVideoVPP_QueryIOSurf(mfx_session, &mfxVPPParams, vppRequests), MFX_ERR_NONE);
+
+    vppRequests[1].AllocId = 666;
+    VPLAccelerationPolicy::pool_key_t vpp_out_pool_key =
+                acceleration_policy->create_surface_pool(vppRequests[1], mfxVPPParams.vpp.Out);
+    EXPECT_EQ(MFXVideoVPP_Init(mfx_session, &mfxVPPParams), MFX_ERR_NONE);
+
+    // finalize session creation
+    DecoderParams d_param{bitstream, mfxDecParams};
+    TranscoderParams t_param{mfxVPPParams};
+    VPLLegacyTranscodeEngine engine(std::move(acceleration_policy));
+    std::shared_ptr<LegacyTranscodeSession> sess_ptr =
+                                engine.register_session<LegacyTranscodeSession>(
+                                                        mfx_session,
+                                                        std::move(d_param),
+                                                        std::move(t_param),
+                                                        data_provider);
+
+    sess_ptr->init_surface_pool(decode_pool_key);
+    sess_ptr->init_transcode_surface_pool(vpp_out_pool_key);
+
+    // prepare working surfaces
+    sess_ptr->swap_surface(engine);
+    sess_ptr->swap_transcode_surface(engine);
+
+    // launch pipeline
+    LegacyTranscodeSession & my_sess = *sess_ptr;
+    {
+        if (!my_sess.data_provider) {
+                my_sess.last_status = MFX_ERR_MORE_DATA;
+        } else {
+            my_sess.last_status = MFX_ERR_NONE;
+            if (!my_sess.data_provider->fetch_bitstream_data(my_sess.stream)) {
+                my_sess.last_status = MFX_ERR_MORE_DATA;
+                my_sess.data_provider.reset(); //close source
+            }
+        }
+
+        // 2) enqueue ASYNC decode operation
+        // prepare sync object for new surface
+        LegacyTranscodeSession::op_handle_t sync_pair{};
+
+        // enqueue decode operation with current session surface
+        {
+            my_sess.last_status =
+                    MFXVideoDECODE_DecodeFrameAsync(my_sess.session,
+                                                    (my_sess.data_provider || (my_sess.stream && my_sess.stream->DataLength))
+                                                        ? my_sess.stream.get()
+
+                                                        : nullptr, /* No more data to read, start decode draining mode*/
+                                                    my_sess.procesing_surface_ptr.lock()->get_handle(),
+                                                    &sync_pair.second,
+                                                    &sync_pair.first);
+
+            // process wait-like statuses in-place:
+            // It had better to use up all VPL decoding resources in pipeline
+            // as soon as possible. So waiting more free-surface or device free
+            while (my_sess.last_status == MFX_ERR_MORE_SURFACE ||
+                   my_sess.last_status == MFX_WRN_DEVICE_BUSY) {
+                try {
+                    if (my_sess.last_status == MFX_ERR_MORE_SURFACE) {
+                        my_sess.swap_surface(engine);
+                    }
+                    my_sess.last_status =
+                    MFXVideoDECODE_DecodeFrameAsync(my_sess.session,
+                                                    my_sess.stream.get(),
+                                                    my_sess.procesing_surface_ptr.lock()->get_handle(),
+                                                    &sync_pair.second,
+                                                    &sync_pair.first);
+
+                } catch (const std::runtime_error&) {
+                    // NB: not an error, yield CPU ticks to check
+                    // surface availability at a next phase.
+                    break;
+                }
+            }
+        }
+        // 4) transcode
+        {
+            auto *dec_surface = sync_pair.second;
+            if(my_sess.vpp_surface_ptr.lock())
+            {
+                mfxFrameSurface1* out_surf = my_sess.vpp_surface_ptr.lock()->get_handle();
+                my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(my_sess.session, dec_surface,
+                                                                out_surf,
+                                                                nullptr, &sync_pair.first);
+                sync_pair.second = out_surf;
+
+                my_sess.last_status = MFXVideoCORE_SyncOperation(my_sess.session, sync_pair.first, 11000);
+            }
+            try {
+                my_sess.swap_transcode_surface(engine);
+            } catch (... ) {
+                my_sess.vpp_surface_ptr.reset();
+            }
+        }
+    }
+}
 #endif // HAVE_DIRECTX
 #endif // HAVE_D3D11
 
diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
index a7fdfc8b67..65d24e0ab0 100644
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@@ -84,6 +84,9 @@ if(HAVE_QT)
         list(APPEND qt_deps OpenGLWidgets)
       endif()
       list(APPEND qt_deps OpenGL)
+      if(OPENGL_LIBRARIES)
+        list(APPEND HIGHGUI_LIBRARIES "${OPENGL_LIBRARIES}")
+      endif()
     endif()
 
     foreach(dt_dep ${qt_deps})
@@ -93,8 +96,11 @@ if(HAVE_QT)
     endforeach()
   else()
     ocv_assert(QT_VERSION_MAJOR EQUAL 4)
-    if (HAVE_QT_OPENGL)
+    if(HAVE_QT_OPENGL)
       set(QT_USE_QTOPENGL TRUE)
+      if(OPENGL_LIBRARIES)
+        list(APPEND HIGHGUI_LIBRARIES "${OPENGL_LIBRARIES}")
+      endif()
     endif()
     include(${QT_USE_FILE})
 
@@ -157,6 +163,9 @@ if(TARGET ocv.3rdparty.win32ui)
     set(OPENCV_HIGHGUI_BUILTIN_BACKEND "WIN32UI")
     list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_w32.cpp)
     list(APPEND tgts ocv.3rdparty.win32ui)
+    if(HAVE_OPENGL AND OPENGL_LIBRARIES)
+      list(APPEND tgts "${OPENGL_LIBRARIES}")
+    endif()
   endif()
 endif()
 
@@ -271,14 +280,6 @@ if(APPLE)
   add_apple_compiler_options(${the_module})
 endif()
 
-if(OPENCV_HIGHGUI_BUILTIN_BACKEND STREQUAL "WIN32UI" AND HAVE_OPENGL AND OPENGL_LIBRARIES)
-  ocv_target_link_libraries(${the_module} PRIVATE "${OPENGL_LIBRARIES}")
-endif()
-
-if(OPENCV_HIGHGUI_BUILTIN_BACKEND MATCHES "^QT" AND HAVE_OPENGL AND OPENGL_LIBRARIES)
-  ocv_target_link_libraries(${the_module} PRIVATE "${OPENGL_LIBRARIES}")
-endif()
-
 if(MSVC AND NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
   set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /NODEFAULTLIB:libcmt.lib /DEBUG")
 endif()
diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp
index 481fee9fbd..81d205a69a 100644
--- a/modules/highgui/src/window.cpp
+++ b/modules/highgui/src/window.cpp
@@ -963,6 +963,8 @@ void cv::imshow( const String& winname, InputArray _img )
 {
     CV_TRACE_FUNCTION();
 
+    const Size size = _img.size();
+    CV_Assert(size.width>0 && size.height>0);
     {
         cv::AutoLock lock(cv::getWindowMutex());
         cleanupClosedWindows_();
@@ -995,9 +997,7 @@ void cv::imshow( const String& winname, InputArray _img )
         }
     }
 
-    const Size size = _img.size();
 #ifndef HAVE_OPENGL
-    CV_Assert(size.width>0 && size.height>0);
     {
         Mat img = _img.getMat();
         CvMat c_img = cvMat(img);
@@ -1005,7 +1005,6 @@ void cv::imshow( const String& winname, InputArray _img )
     }
 #else
     const double useGl = getWindowProperty(winname, WND_PROP_OPENGL);
-    CV_Assert(size.width>0 && size.height>0);
 
     if (useGl <= 0)
     {
diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp
index f6ba44b425..d8f2271faa 100644
--- a/modules/highgui/src/window_QT.cpp
+++ b/modules/highgui/src/window_QT.cpp
@@ -529,6 +529,9 @@ static int icvInitSystem(int* c, char** v)
     //"For any GUI application using Qt, there is precisely one QApplication object"
     if (!QApplication::instance())
     {
+#if QT_VERSION >= QT_VERSION_CHECK(5, 6, 0)
+        QCoreApplication::setAttribute(Qt::AA_EnableHighDpiScaling, true);
+#endif
         new QApplication(*c, v);
         setlocale(LC_NUMERIC,"C");
 
diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index d7ff9a178d..148eea71e7 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -98,17 +98,17 @@ enum ImwriteFlags {
        IMWRITE_EXR_COMPRESSION     = (3 << 4) + 1, /* 49 */ //!< override EXR compression type (ZIP_COMPRESSION = 3 is default)
        IMWRITE_WEBP_QUALITY        = 64, //!< For WEBP, it can be a quality from 1 to 100 (the higher is the better). By default (without any parameter) and for quality above 100 the lossless compression is used.
        IMWRITE_PAM_TUPLETYPE       = 128,//!< For PAM, sets the TUPLETYPE field to the corresponding string value that is defined for the format
-       IMWRITE_TIFF_RESUNIT = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values
-       IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI
-       IMWRITE_TIFF_YDPI = 258, //!< For TIFF, use to specify the Y direction DPI
-       IMWRITE_TIFF_COMPRESSION = 259, //!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default.
+       IMWRITE_TIFF_RESUNIT        = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values
+       IMWRITE_TIFF_XDPI           = 257,//!< For TIFF, use to specify the X direction DPI
+       IMWRITE_TIFF_YDPI           = 258,//!< For TIFF, use to specify the Y direction DPI
+       IMWRITE_TIFF_COMPRESSION    = 259,//!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default.
        IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000.
      };
 
 enum ImwriteEXRTypeFlags {
        /*IMWRITE_EXR_TYPE_UNIT = 0, //!< not supported */
-       IMWRITE_EXR_TYPE_HALF = 1,   //!< store as HALF (FP16)
-       IMWRITE_EXR_TYPE_FLOAT = 2   //!< store as FP32 (default)
+       IMWRITE_EXR_TYPE_HALF   = 1, //!< store as HALF (FP16)
+       IMWRITE_EXR_TYPE_FLOAT  = 2  //!< store as FP32 (default)
      };
 
 enum ImwriteEXRCompressionFlags {
@@ -140,14 +140,14 @@ enum ImwritePNGFlags {
        IMWRITE_PNG_STRATEGY_FIXED        = 4  //!< Using this value prevents the use of dynamic Huffman codes, allowing for a simpler decoder for special applications.
      };
 
-//! Imwrite PAM specific tupletype flags used to define the 'TUPETYPE' field of a PAM file.
+//! Imwrite PAM specific tupletype flags used to define the 'TUPLETYPE' field of a PAM file.
 enum ImwritePAMFlags {
-       IMWRITE_PAM_FORMAT_NULL = 0,
-       IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1,
-       IMWRITE_PAM_FORMAT_GRAYSCALE = 2,
+       IMWRITE_PAM_FORMAT_NULL            = 0,
+       IMWRITE_PAM_FORMAT_BLACKANDWHITE   = 1,
+       IMWRITE_PAM_FORMAT_GRAYSCALE       = 2,
        IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA = 3,
-       IMWRITE_PAM_FORMAT_RGB = 4,
-       IMWRITE_PAM_FORMAT_RGB_ALPHA = 5,
+       IMWRITE_PAM_FORMAT_RGB             = 4,
+       IMWRITE_PAM_FORMAT_RGB_ALPHA       = 5
      };
 
 //! @} imgcodecs_flags
@@ -209,8 +209,8 @@ CV_EXPORTS_W Mat imread( const String& filename, int flags = IMREAD_COLOR );
 
 The function imreadmulti loads a multi-page image from the specified file into a vector of Mat objects.
 @param filename Name of file to be loaded.
+@param mats A vector of Mat objects holding each page.
 @param flags Flag that can take values of cv::ImreadModes, default with cv::IMREAD_ANYCOLOR.
-@param mats A vector of Mat objects holding each page, if more than one.
 @sa cv::imread
 */
 CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector<Mat>& mats, int flags = IMREAD_ANYCOLOR);
@@ -219,10 +219,10 @@ CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector<Mat>& m
 
 The function imreadmulti loads a specified range from a multi-page image from the specified file into a vector of Mat objects.
 @param filename Name of file to be loaded.
+@param mats A vector of Mat objects holding each page.
 @param start Start index of the image to load
 @param count Count number of images to load
 @param flags Flag that can take values of cv::ImreadModes, default with cv::IMREAD_ANYCOLOR.
-@param mats A vector of Mat objects holding each page, if more than one.
 @sa cv::imread
 */
 CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector<Mat>& mats, int start, int count, int flags = IMREAD_ANYCOLOR);
diff --git a/modules/imgcodecs/src/grfmt_exr.cpp b/modules/imgcodecs/src/grfmt_exr.cpp
index 960f5da3d3..0585035202 100644
--- a/modules/imgcodecs/src/grfmt_exr.cpp
+++ b/modules/imgcodecs/src/grfmt_exr.cpp
@@ -637,7 +637,7 @@ bool  ExrEncoder::write( const Mat& img, const std::vector<int>& params )
 
     for( size_t i = 0; i < params.size(); i += 2 )
     {
-        if( params[i] == CV_IMWRITE_EXR_TYPE )
+        if( params[i] == IMWRITE_EXR_TYPE )
         {
             switch( params[i+1] )
             {
diff --git a/modules/imgcodecs/src/grfmt_jpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg.cpp
index 3dd9d68771..17feafc404 100644
--- a/modules/imgcodecs/src/grfmt_jpeg.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg.cpp
@@ -643,23 +643,23 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
 
         for( size_t i = 0; i < params.size(); i += 2 )
         {
-            if( params[i] == CV_IMWRITE_JPEG_QUALITY )
+            if( params[i] == IMWRITE_JPEG_QUALITY )
             {
                 quality = params[i+1];
                 quality = MIN(MAX(quality, 0), 100);
             }
 
-            if( params[i] == CV_IMWRITE_JPEG_PROGRESSIVE )
+            if( params[i] == IMWRITE_JPEG_PROGRESSIVE )
             {
                 progressive = params[i+1];
             }
 
-            if( params[i] == CV_IMWRITE_JPEG_OPTIMIZE )
+            if( params[i] == IMWRITE_JPEG_OPTIMIZE )
             {
                 optimize = params[i+1];
             }
 
-            if( params[i] == CV_IMWRITE_JPEG_LUMA_QUALITY )
+            if( params[i] == IMWRITE_JPEG_LUMA_QUALITY )
             {
                 if (params[i+1] >= 0)
                 {
@@ -674,7 +674,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
                 }
             }
 
-            if( params[i] == CV_IMWRITE_JPEG_CHROMA_QUALITY )
+            if( params[i] == IMWRITE_JPEG_CHROMA_QUALITY )
             {
                 if (params[i+1] >= 0)
                 {
@@ -682,7 +682,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
                 }
             }
 
-            if( params[i] == CV_IMWRITE_JPEG_RST_INTERVAL )
+            if( params[i] == IMWRITE_JPEG_RST_INTERVAL )
             {
                 rst_interval = params[i+1];
                 rst_interval = MIN(MAX(rst_interval, 0), 65535L);
diff --git a/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp
index 73d49282d7..c5b1a292cc 100644
--- a/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp
@@ -545,7 +545,7 @@ bool Jpeg2KOpjDecoderBase::readHeader()
      */
     bool hasAlpha = false;
     const int numcomps = image_->numcomps;
-    CV_Assert(numcomps >= 1);
+    CV_Check(numcomps, numcomps >= 1 && numcomps <= 4, "Unsupported number of components");
     for (int i = 0; i < numcomps; i++)
     {
         const opj_image_comp_t& comp = image_->comps[i];
diff --git a/modules/imgcodecs/src/grfmt_pam.cpp b/modules/imgcodecs/src/grfmt_pam.cpp
index 4db595055e..1c8f8476a5 100644
--- a/modules/imgcodecs/src/grfmt_pam.cpp
+++ b/modules/imgcodecs/src/grfmt_pam.cpp
@@ -111,12 +111,12 @@ static bool rgb_convert (void *src, void *target, int width, int target_channels
     int target_depth);
 
 const static struct pam_format formats[] = {
-    {CV_IMWRITE_PAM_FORMAT_NULL, "", NULL, {0, 0, 0, 0} },
-    {CV_IMWRITE_PAM_FORMAT_BLACKANDWHITE, "BLACKANDWHITE", NULL, {0, 0, 0, 0} },
-    {CV_IMWRITE_PAM_FORMAT_GRAYSCALE, "GRAYSCALE", NULL, {0, 0, 0, 0} },
-    {CV_IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA, "GRAYSCALE_ALPHA", NULL, {0, 0, 0, 0} },
-    {CV_IMWRITE_PAM_FORMAT_RGB, "RGB", rgb_convert, {0, 1, 2, 0} },
-    {CV_IMWRITE_PAM_FORMAT_RGB_ALPHA, "RGB_ALPHA", NULL, {0, 1, 2, 0} },
+    {IMWRITE_PAM_FORMAT_NULL, "", NULL, {0, 0, 0, 0} },
+    {IMWRITE_PAM_FORMAT_BLACKANDWHITE, "BLACKANDWHITE", NULL, {0, 0, 0, 0} },
+    {IMWRITE_PAM_FORMAT_GRAYSCALE, "GRAYSCALE", NULL, {0, 0, 0, 0} },
+    {IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA, "GRAYSCALE_ALPHA", NULL, {0, 0, 0, 0} },
+    {IMWRITE_PAM_FORMAT_RGB, "RGB", rgb_convert, {0, 1, 2, 0} },
+    {IMWRITE_PAM_FORMAT_RGB_ALPHA, "RGB_ALPHA", NULL, {0, 1, 2, 0} },
 };
 #define PAM_FORMATS_NO (sizeof (fields) / sizeof ((fields)[0]))
 
@@ -341,7 +341,7 @@ PAMDecoder::PAMDecoder()
     m_offset = -1;
     m_buf_supported = true;
     bit_mode = false;
-    selected_fmt = CV_IMWRITE_PAM_FORMAT_NULL;
+    selected_fmt = IMWRITE_PAM_FORMAT_NULL;
     m_maxval = 0;
     m_channels = 0;
     m_sampledepth = 0;
@@ -462,15 +462,19 @@ bool PAMDecoder::readHeader()
 
         if (flds_endhdr && flds_height && flds_width && flds_depth && flds_maxval)
         {
-            if (selected_fmt == CV_IMWRITE_PAM_FORMAT_NULL)
+            if (selected_fmt == IMWRITE_PAM_FORMAT_NULL)
             {
                 if (m_channels == 1 && m_maxval == 1)
-                    selected_fmt = CV_IMWRITE_PAM_FORMAT_BLACKANDWHITE;
+                    selected_fmt = IMWRITE_PAM_FORMAT_BLACKANDWHITE;
                 else if (m_channels == 1 && m_maxval < 256)
-                    selected_fmt = CV_IMWRITE_PAM_FORMAT_GRAYSCALE;
+                    selected_fmt = IMWRITE_PAM_FORMAT_GRAYSCALE;
                 else if (m_channels == 3 && m_maxval < 256)
-                    selected_fmt = CV_IMWRITE_PAM_FORMAT_RGB;
+                    selected_fmt = IMWRITE_PAM_FORMAT_RGB;
+                else
+                    CV_Error(Error::StsError, "Can't determine selected_fmt (IMWRITE_PAM_FORMAT_NULL)");
             }
+            CV_CheckDepth(m_sampledepth, m_sampledepth == CV_8U || m_sampledepth == CV_16U, "");
+            CV_Check(m_channels, m_channels >= 1 && m_channels <= 4, "Unsupported number of channels");
             m_type = CV_MAKETYPE(m_sampledepth, m_channels);
             m_offset = m_strm.getPos();
 
@@ -512,7 +516,7 @@ bool PAMDecoder::readData(Mat& img)
     if( m_offset < 0 || !m_strm.isOpened())
         return false;
 
-    if (selected_fmt != CV_IMWRITE_PAM_FORMAT_NULL)
+    if (selected_fmt != IMWRITE_PAM_FORMAT_NULL)
         fmt = &formats[selected_fmt];
     else {
         /* default layout handling */
@@ -567,6 +571,10 @@ bool PAMDecoder::readData(Mat& img)
                         FillColorRow1( data, src, m_width, palette );
                     }
                 }
+                else
+                {
+                    CV_Error(Error::StsError, cv::format("Unsupported value of target_channels: %d", target_channels));
+                }
             } else {
                 for (int y = 0; y < m_height; y++, data += imp_stride)
                 {
@@ -662,8 +670,8 @@ bool PAMEncoder::write( const Mat& img, const std::vector<int>& params )
 
     /* parse save file type */
     for( size_t i = 0; i < params.size(); i += 2 )
-        if( params[i] == CV_IMWRITE_PAM_TUPLETYPE ) {
-            if ( params[i+1] > CV_IMWRITE_PAM_FORMAT_NULL &&
+        if( params[i] == IMWRITE_PAM_TUPLETYPE ) {
+            if ( params[i+1] > IMWRITE_PAM_FORMAT_NULL &&
                  params[i+1] < (int) PAM_FORMATS_NO)
                 fmt = &formats[params[i+1]];
         }
diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp
index 5e7523b203..36cf17e1e3 100644
--- a/modules/imgcodecs/src/grfmt_tiff.cpp
+++ b/modules/imgcodecs/src/grfmt_tiff.cpp
@@ -112,6 +112,8 @@ static bool cv_tiffSetErrorHandler()
 
 static const char fmtSignTiffII[] = "II\x2a\x00";
 static const char fmtSignTiffMM[] = "MM\x00\x2a";
+static const char fmtSignBigTiffII[] = "II\x2b\x00";
+static const char fmtSignBigTiffMM[] = "MM\x00\x2b";
 
 TiffDecoder::TiffDecoder()
 {
@@ -140,13 +142,15 @@ bool TiffDecoder::checkSignature( const String& signature ) const
 {
     return signature.size() >= 4 &&
         (memcmp(signature.c_str(), fmtSignTiffII, 4) == 0 ||
-        memcmp(signature.c_str(), fmtSignTiffMM, 4) == 0);
+        memcmp(signature.c_str(), fmtSignTiffMM, 4) == 0 ||
+        memcmp(signature.c_str(), fmtSignBigTiffII, 4) == 0 ||
+        memcmp(signature.c_str(), fmtSignBigTiffMM, 4) == 0);
 }
 
 int TiffDecoder::normalizeChannelsNumber(int channels) const
 {
-    CV_Assert(channels <= 4);
-    return channels > 4 ? 4 : channels;
+    CV_Check(channels, channels >= 1 && channels <= 4, "Unsupported number of channels");
+    return channels;
 }
 
 ImageDecoder TiffDecoder::newDecoder() const
@@ -295,34 +299,53 @@ bool TiffDecoder::readHeader()
                 (ncn != 1 && ncn != 3 && ncn != 4)))
                 bpp = 8;
 
+            uint16 sample_format = SAMPLEFORMAT_UINT;
+            TIFFGetField(tif, TIFFTAG_SAMPLEFORMAT, &sample_format);
             int wanted_channels = normalizeChannelsNumber(ncn);
-            switch(bpp)
+            switch (bpp)
             {
-                case 1:
-                    m_type = CV_MAKETYPE(CV_8U, !isGrayScale ? wanted_channels : 1);
-                    result = true;
-                    break;
-                case 8:
-                    //Palette color, the value of the component is used as an index into the red,
-                    //green and blue curves in the ColorMap field to retrieve an RGB triplet that defines the color.
-                    if(photometric == PHOTOMETRIC_PALETTE)
-                        m_type = CV_MAKETYPE(CV_8U, 3);
-                    else
-                        m_type = CV_MAKETYPE(CV_8U, !isGrayScale ? wanted_channels : 1);
-                    result = true;
-                    break;
-                case 16:
-                    m_type = CV_MAKETYPE(CV_16U, !isGrayScale ? wanted_channels : 1);
-                    result = true;
-                    break;
-                case 32:
-                    m_type = CV_MAKETYPE(CV_32F, wanted_channels);
-                    result = true;
-                    break;
-                case 64:
-                    m_type = CV_MAKETYPE(CV_64F, wanted_channels);
-                    result = true;
-                    break;
+            case 1:
+            {
+                CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, "");
+                int depth = sample_format == SAMPLEFORMAT_INT ? CV_8S : CV_8U;
+                m_type = CV_MAKETYPE(depth, !isGrayScale ? wanted_channels : 1);
+                result = true;
+                break;
+            }
+            case 8:
+            {
+                //Palette color, the value of the component is used as an index into the red,
+                //green and blue curves in the ColorMap field to retrieve an RGB triplet that defines the color.
+                CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, "");
+                int depth = sample_format == SAMPLEFORMAT_INT ? CV_8S : CV_8U;
+                if (photometric == PHOTOMETRIC_PALETTE)
+                    m_type = CV_MAKETYPE(depth, 3);
+                else
+                    m_type = CV_MAKETYPE(depth, !isGrayScale ? wanted_channels : 1);
+                result = true;
+                break;
+            }
+            case 16:
+            {
+                CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, "");
+                int depth = sample_format == SAMPLEFORMAT_INT ? CV_16S : CV_16U;
+                m_type = CV_MAKETYPE(depth, !isGrayScale ? wanted_channels : 1);
+                result = true;
+                break;
+            }
+            case 32:
+            {
+                CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_IEEEFP || sample_format == SAMPLEFORMAT_INT, "");
+                int depth = sample_format == SAMPLEFORMAT_IEEEFP ? CV_32F : CV_32S;
+                m_type = CV_MAKETYPE(depth, wanted_channels);
+                result = true;
+                break;
+            }
+            case 64:
+                CV_CheckEQ((int)sample_format, SAMPLEFORMAT_IEEEFP, "");
+                m_type = CV_MAKETYPE(CV_64F, wanted_channels);
+                result = true;
+                break;
             default:
                 CV_Error(cv::Error::StsError, "Invalid bitsperpixel value read from TIFF header! Must be 1, 8, 16, 32 or 64.");
             }
@@ -432,7 +455,7 @@ bool  TiffDecoder::readData( Mat& img )
 
     bool color = img.channels() > 1;
 
-    CV_CheckType(type, depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F, "");
+    CV_CheckType(type, depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F, "");
 
     if (m_width && m_height)
     {
@@ -649,7 +672,7 @@ bool  TiffDecoder::readData( Mat& img )
                                 CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, buffer, buffer_size) >= 0);
                             }
 
-                            Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? CV_32F : CV_64F, ncn), buffer);
+                            Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? (depth == CV_32S ? CV_32S : CV_32F) : CV_64F, ncn), buffer);
                             Rect roi_tile(0, 0, tile_width, tile_height);
                             Rect roi_img(x, img_y, tile_width, tile_height);
                             if (!m_hdr && ncn == 3)
@@ -698,7 +721,7 @@ ImageEncoder TiffEncoder::newEncoder() const
 
 bool TiffEncoder::isFormatSupported( int depth ) const
 {
-    return depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F;
+    return depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F;
 }
 
 void  TiffEncoder::writeTag( WLByteStream& strm, TiffTag tag,
@@ -842,7 +865,7 @@ bool TiffEncoder::writeLibTiff( const std::vector<Mat>& img_vec, const std::vect
         int width = img.cols, height = img.rows;
         int type = img.type();
         int depth = CV_MAT_DEPTH(type);
-        CV_CheckType(type, depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F, "");
+        CV_CheckType(type, depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F, "");
         CV_CheckType(type, channels >= 1 && channels <= 4, "");
 
         CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, width));
@@ -865,19 +888,31 @@ bool TiffEncoder::writeLibTiff( const std::vector<Mat>& img_vec, const std::vect
         int page_compression = compression;
 
         int bitsPerChannel = -1;
+        uint16 sample_format = SAMPLEFORMAT_INT;
         switch (depth)
         {
             case CV_8U:
+                sample_format = SAMPLEFORMAT_UINT;
+                /* FALLTHRU */
+            case CV_8S:
             {
                 bitsPerChannel = 8;
                 break;
             }
+
             case CV_16U:
+                sample_format = SAMPLEFORMAT_UINT;
+                /* FALLTHRU */
+            case CV_16S:
             {
                 bitsPerChannel = 16;
                 break;
             }
+
             case CV_32F:
+                sample_format = SAMPLEFORMAT_IEEEFP;
+                /* FALLTHRU */
+            case CV_32S:
             {
                 bitsPerChannel = 32;
                 page_compression = COMPRESSION_NONE;
@@ -887,6 +922,7 @@ bool TiffEncoder::writeLibTiff( const std::vector<Mat>& img_vec, const std::vect
             {
                 bitsPerChannel = 64;
                 page_compression = COMPRESSION_NONE;
+                sample_format = SAMPLEFORMAT_IEEEFP;
                 break;
             }
             default:
@@ -912,7 +948,7 @@ bool TiffEncoder::writeLibTiff( const std::vector<Mat>& img_vec, const std::vect
         CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG));
         CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, rowsPerStrip));
 
-        CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, depth >= CV_32F ? SAMPLEFORMAT_IEEEFP : SAMPLEFORMAT_UINT));
+        CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, sample_format));
 
         if (page_compression != COMPRESSION_NONE)
         {
@@ -1011,7 +1047,7 @@ bool  TiffEncoder::write( const Mat& img, const std::vector<int>& params)
     int type = img.type();
     int depth = CV_MAT_DEPTH(type);
 
-    CV_CheckType(type, depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F, "");
+    CV_CheckType(type, depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F, "");
 
     std::vector<Mat> img_vec;
     img_vec.push_back(img);
diff --git a/modules/imgcodecs/src/grfmt_webp.cpp b/modules/imgcodecs/src/grfmt_webp.cpp
index e137b8734d..3860abb64e 100644
--- a/modules/imgcodecs/src/grfmt_webp.cpp
+++ b/modules/imgcodecs/src/grfmt_webp.cpp
@@ -243,7 +243,7 @@ bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
 
     if (params.size() > 1)
     {
-        if (params[0] == CV_IMWRITE_WEBP_QUALITY)
+        if (params[0] == IMWRITE_WEBP_QUALITY)
         {
             comp_lossless = false;
             quality = static_cast<float>(params[1]);
diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp
index 91f30cfe98..e9b6d0517c 100644
--- a/modules/imgcodecs/src/loadsave.cpp
+++ b/modules/imgcodecs/src/loadsave.cpp
@@ -562,7 +562,7 @@ imreadmulti_(const String& filename, int flags, std::vector<Mat>& mats, int star
             if ((flags & IMREAD_ANYDEPTH) == 0)
                 type = CV_MAKETYPE(CV_8U, CV_MAT_CN(type));
 
-            if ((flags & CV_LOAD_IMAGE_COLOR) != 0 ||
+            if ((flags & IMREAD_COLOR) != 0 ||
                 ((flags & IMREAD_ANYCOLOR) != 0 && CV_MAT_CN(type) > 1))
                 type = CV_MAKETYPE(CV_MAT_DEPTH(type), 3);
             else
diff --git a/modules/imgcodecs/src/precomp.hpp b/modules/imgcodecs/src/precomp.hpp
index aa2a999f63..70cc1e7105 100644
--- a/modules/imgcodecs/src/precomp.hpp
+++ b/modules/imgcodecs/src/precomp.hpp
@@ -43,11 +43,8 @@
 #define __IMGCODECS_H_
 
 #include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgcodecs/legacy/constants_c.h"
-
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/private.hpp"
-
 #include "opencv2/imgproc.hpp"
 
 #include <stdlib.h>
diff --git a/modules/imgcodecs/test/test_tiff.cpp b/modules/imgcodecs/test/test_tiff.cpp
index a2f9655c73..1c6e4a6b29 100644
--- a/modules/imgcodecs/test/test_tiff.cpp
+++ b/modules/imgcodecs/test/test_tiff.cpp
@@ -147,6 +147,26 @@ TEST(Imgcodecs_Tiff, decode_infinite_rowsperstrip)
     EXPECT_EQ(0, remove(filename.c_str()));
 }
 
+TEST(Imgcodecs_Tiff, readWrite_unsigned)
+{
+    const string root = cvtest::TS::ptr()->get_data_path();
+    const string filenameInput = root + "readwrite/gray_8u.tif";
+    const string filenameOutput = cv::tempfile(".tiff");
+    const Mat img = cv::imread(filenameInput, IMREAD_UNCHANGED);
+    ASSERT_FALSE(img.empty());
+    ASSERT_EQ(CV_8UC1, img.type());
+
+    Mat matS8;
+    img.convertTo(matS8, CV_8SC1);
+
+    ASSERT_TRUE(cv::imwrite(filenameOutput, matS8));
+    const Mat img2 = cv::imread(filenameOutput, IMREAD_UNCHANGED);
+    ASSERT_EQ(img2.type(), matS8.type());
+    ASSERT_EQ(img2.size(), matS8.size());
+    EXPECT_LE(cvtest::norm(matS8, img2, NORM_INF | NORM_RELATIVE), 1e-3);
+    EXPECT_EQ(0, remove(filenameOutput.c_str()));
+}
+
 TEST(Imgcodecs_Tiff, readWrite_32FC1)
 {
     const string root = cvtest::TS::ptr()->get_data_path();
@@ -455,6 +475,29 @@ TEST(Imgcodecs_Tiff, read_multipage_indexed)
     }
 }
 
+TEST(Imgcodecs_Tiff, read_bigtiff_images)
+{
+    const string root = cvtest::TS::ptr()->get_data_path();
+    const string filenamesInput[] = {
+        "readwrite/BigTIFF.tif",
+        "readwrite/BigTIFFMotorola.tif",
+        "readwrite/BigTIFFLong.tif",
+        "readwrite/BigTIFFLong8.tif",
+        "readwrite/BigTIFFMotorolaLongStrips.tif",
+        "readwrite/BigTIFFLong8Tiles.tif",
+        "readwrite/BigTIFFSubIFD4.tif",
+        "readwrite/BigTIFFSubIFD8.tif"
+    };
+
+    for (int i = 0; i < 8; i++)
+    {
+        const Mat bigtiff_img = imread(root + filenamesInput[i], IMREAD_UNCHANGED);
+        ASSERT_FALSE(bigtiff_img.empty());
+        EXPECT_EQ(64, bigtiff_img.cols);
+        EXPECT_EQ(64, bigtiff_img.rows);
+        ASSERT_EQ(CV_8UC3, bigtiff_img.type());
+    }
+}
 
 #endif
 
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index 69b5b0accd..cb7e4d8b98 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -3576,10 +3576,11 @@ a mask and then extract the contour, or copy the region to another image, and so
 function unless the #FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See
 the details below.
 @param mask Operation mask that should be a single-channel 8-bit image, 2 pixels wider and 2 pixels
-taller than image. Since this is both an input and output parameter, you must take responsibility
-of initializing it. Flood-filling cannot go across non-zero pixels in the input mask. For example,
+taller than image. If an empty Mat is passed it will be created automatically. Since this is both an
+input and output parameter, you must take responsibility of initializing it.
+Flood-filling cannot go across non-zero pixels in the input mask. For example,
 an edge detector output can be used as a mask to stop filling at edges. On output, pixels in the
-mask corresponding to filled pixels in the image are set to 1 or to the a value specified in flags
+mask corresponding to filled pixels in the image are set to 1 or to the specified value in flags
 as described below. Additionally, the function fills the border of the mask with ones to simplify
 internal processing. It is therefore possible to use the same mask in multiple calls to the function
 to make sure the filled areas do not overlap.
@@ -4956,13 +4957,13 @@ CV_EXPORTS_W Rect getTextSize( Size imgsize, const String& text, Point org,
                                FontFace& fface, int size, int weight=0,
                                PutTextFlags flags=PUT_TEXT_ALIGN_LEFT, Range wrap=Range() );
 
-/** @brief Line iterator
 
-The class is used to iterate over all the pixels on the raster line
-segment connecting two specified points.
 
-The class LineIterator is used to get each pixel of a raster line. It
-can be treated as versatile implementation of the Bresenham algorithm
+/** @brief Class for iterating over all pixels on a raster line segment.
+
+The class LineIterator is used to get each pixel of a raster line connecting
+two specified points.
+It can be treated as a versatile implementation of the Bresenham algorithm
 where you can stop at each pixel and do some extra processing, for
 example, grab pixel values along the line or draw a line with an effect
 (for example, with XOR operation).
@@ -4991,14 +4992,19 @@ for(int i = 0; i < it2.count; i++, ++it2)
 class CV_EXPORTS LineIterator
 {
 public:
-    /** @brief initializes the iterator
+    /** @brief Initializes iterator object for the given line and image.
 
-    creates iterators for the line connecting pt1 and pt2
-    the line will be clipped on the image boundaries
-    the line is 8-connected or 4-connected
-    If leftToRight=true, then the iteration is always done
-    from the left-most point to the right most,
-    not to depend on the ordering of pt1 and pt2 parameters;
+    The returned iterator can be used to traverse all pixels on a line that
+    connects the given two points.
+    The line will be clipped on the image boundaries.
+
+    @param img Underlying image.
+    @param pt1 First endpoint of the line.
+    @param pt2 The other endpoint of the line.
+    @param connectivity Pixel connectivity of the iterator. Valid values are 4 (iterator can move
+    up, down, left and right) and 8 (iterator can also move diagonally).
+    @param leftToRight If true, the line is traversed from the leftmost endpoint to the rightmost
+    endpoint. Otherwise, the line is traversed from \p pt1 to \p pt2.
     */
     LineIterator( const Mat& img, Point pt1, Point pt2,
                   int connectivity = 8, bool leftToRight = false )
@@ -5031,16 +5037,23 @@ public:
     }
     void init(const Mat* img, Rect boundingAreaRect, Point pt1, Point pt2, int connectivity, bool leftToRight);
 
-    /** @brief returns pointer to the current pixel
+    /** @brief Returns pointer to the current pixel.
     */
     uchar* operator *();
-    /** @brief prefix increment operator (++it). shifts iterator to the next pixel
+
+    /** @brief Moves iterator to the next pixel on the line.
+
+    This is the prefix version (++it).
     */
     LineIterator& operator ++();
-    /** @brief postfix increment operator (it++). shifts iterator to the next pixel
+
+    /** @brief Moves iterator to the next pixel on the line.
+
+    This is the postfix version (it++).
     */
     LineIterator operator ++(int);
-    /** @brief returns coordinates of the current pixel
+
+    /** @brief Returns coordinates of the current pixel.
     */
     Point pos() const;
 
diff --git a/modules/imgproc/src/connectedcomponents.cpp b/modules/imgproc/src/connectedcomponents.cpp
index 1ad74ed38a..f2d41f454d 100644
--- a/modules/imgproc/src/connectedcomponents.cpp
+++ b/modules/imgproc/src/connectedcomponents.cpp
@@ -1570,7 +1570,7 @@ namespace cv{
 #define CONDITION_S img_row[c - 1] > 0
 #define CONDITION_X img_row[c] > 0
 
-#define ACTION_1 // nothing to do
+#define ACTION_1 img_labels_row[c] = 0;
 #define ACTION_2 img_labels_row[c] = label; \
                     P_[label] = label; \
                     label = label + 1;
@@ -1831,7 +1831,7 @@ namespace cv{
 
             std::vector<LabelT> P_(Plength, 0);
             LabelT* P = P_.data();
-            //P[0] = 0;
+            P[0] = 0;
             LabelT lunique = 1;
 
             // First scan
@@ -1851,7 +1851,7 @@ namespace cv{
 #define CONDITION_S img_row[c - 1] > 0
 #define CONDITION_X img_row[c] > 0
 
-#define ACTION_1 // nothing to do
+#define ACTION_1 img_labels_row[c] = 0;
 #define ACTION_2 img_labels_row[c] = lunique; \
                                      P[lunique] = lunique;        \
                                      lunique = lunique + 1; // new label
diff --git a/modules/imgproc/src/drawing.cpp b/modules/imgproc/src/drawing.cpp
index ac56791f42..90844c7f51 100644
--- a/modules/imgproc/src/drawing.cpp
+++ b/modules/imgproc/src/drawing.cpp
@@ -673,7 +673,7 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
         pt1.y ^= pt2.y & j;
 
         x_step = XY_ONE;
-        y_step = (dy << XY_SHIFT) / (ax | 1);
+        y_step = dy * (1 << XY_SHIFT) / (ax | 1);
         ecount = (int)((pt2.x - pt1.x) >> XY_SHIFT);
     }
     else
@@ -686,7 +686,7 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
         pt2.y ^= pt1.y & i;
         pt1.y ^= pt2.y & i;
 
-        x_step = (dx << XY_SHIFT) / (ay | 1);
+        x_step = dx * (1 << XY_SHIFT) / (ay | 1);
         y_step = XY_ONE;
         ecount = (int)((pt2.y - pt1.y) >> XY_SHIFT);
     }
diff --git a/modules/imgproc/src/floodfill.cpp b/modules/imgproc/src/floodfill.cpp
index 2816795bc6..8595011d48 100644
--- a/modules/imgproc/src/floodfill.cpp
+++ b/modules/imgproc/src/floodfill.cpp
@@ -477,11 +477,10 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask,
     nv_buf._[0] = nv_buf._[1] = nv_buf._[2] = nv_buf._[3] = 0;
 
     struct { Vec3b b; Vec3i i; Vec3f f; } ld_buf, ud_buf;
-    Mat img = _image.getMat(), mask;
-    if( !_mask.empty() )
-        mask = _mask.getMat();
-    Size size = img.size();
 
+    Mat img = _image.getMat(), mask;
+
+    Size size = img.size();
     int type = img.type();
     int depth = img.depth();
     int cn = img.channels();
@@ -495,6 +494,20 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask,
     if( connectivity != 0 && connectivity != 4 && connectivity != 8 )
         CV_Error( CV_StsBadFlag, "Connectivity must be 4, 0(=4) or 8" );
 
+    if( _mask.empty() )
+    {
+        _mask.create( size.height + 2, size.width + 2, CV_8UC1 );
+        _mask.setTo(0);
+    }
+
+    mask = _mask.getMat();
+    CV_CheckTypeEQ( mask.type(), CV_8U, "" );
+    CV_CheckEQ( mask.rows, size.height + 2, "" );
+    CV_CheckEQ( mask.cols, size.width + 2, "" );
+
+    Mat mask_inner = mask( Rect(1, 1, mask.cols - 2, mask.rows - 2) );
+    copyMakeBorder( mask_inner, mask, 1, 1, 1, 1, BORDER_ISOLATED | BORDER_CONSTANT, Scalar(1) );
+
     bool is_simple = mask.empty() && (flags & FLOODFILL_MASK_ONLY) == 0;
 
     for( i = 0; i < cn; i++ )
@@ -544,26 +557,6 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask,
         }
     }
 
-    if( mask.empty() )
-    {
-        Mat tempMask( size.height + 2, size.width + 2, CV_8UC1 );
-        tempMask.setTo(Scalar::all(0));
-        mask = tempMask;
-    }
-    else
-    {
-        CV_Assert( mask.rows == size.height+2 && mask.cols == size.width+2 );
-        CV_Assert( mask.type() == CV_8U );
-    }
-
-    memset( mask.ptr(), 1, mask.cols );
-    memset( mask.ptr(mask.rows-1), 1, mask.cols );
-
-    for( i = 1; i <= size.height; i++ )
-    {
-        mask.at<uchar>(i, 0) = mask.at<uchar>(i, mask.cols-1) = (uchar)1;
-    }
-
     if( depth == CV_8U )
         for( i = 0; i < cn; i++ )
         {
@@ -632,7 +625,8 @@ int cv::floodFill( InputOutputArray _image, Point seedPoint,
 {
     CV_INSTRUMENT_REGION();
 
-    return floodFill(_image, Mat(), seedPoint, newVal, rect, loDiff, upDiff, flags);
+    Mat mask;
+    return floodFill(_image, mask, seedPoint, newVal, rect, loDiff, upDiff, flags);
 }
 
 
diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp
index c348828ff0..1fbb9aae51 100644
--- a/modules/imgproc/src/histogram.cpp
+++ b/modules/imgproc/src/histogram.cpp
@@ -909,7 +909,8 @@ static bool ipp_calchist(const Mat &image, Mat &hist, int histSize, const float*
 #endif
 
     // IPP_DISABLE_HISTOGRAM - https://github.com/opencv/opencv/issues/11544
-    if (uniform && (ranges[0][1] - ranges[0][0]) != histSize)
+    // and https://github.com/opencv/opencv/issues/21595
+    if ((uniform && (ranges[0][1] - ranges[0][0]) != histSize) || abs(ranges[0][0]) != cvFloor(ranges[0][0]))
         return false;
 
     Mat ihist = hist;
diff --git a/modules/imgproc/test/test_connectedcomponents.cpp b/modules/imgproc/test/test_connectedcomponents.cpp
index ed11ea6fda..e1a6b761c7 100644
--- a/modules/imgproc/test/test_connectedcomponents.cpp
+++ b/modules/imgproc/test/test_connectedcomponents.cpp
@@ -789,5 +789,16 @@ TEST(Imgproc_ConnectedComponents, single_column)
 }
 
 
+TEST(Imgproc_ConnectedComponents, 4conn_regression_21366)
+{
+    Mat src = Mat::zeros(Size(10, 10), CV_8UC1);
+    {
+        Mat labels, stats, centroids;
+        EXPECT_NO_THROW(cv::connectedComponentsWithStats(src, labels, stats, centroids, 4));
+    }
+}
+
+
+
 }
 } // namespace
diff --git a/modules/imgproc/test/test_floodfill.cpp b/modules/imgproc/test/test_floodfill.cpp
index b880c4ee37..934e421fba 100644
--- a/modules/imgproc/test/test_floodfill.cpp
+++ b/modules/imgproc/test/test_floodfill.cpp
@@ -531,11 +531,11 @@ TEST(Imgproc_FloodFill, maskValue)
 {
     const int n = 50;
     Mat img = Mat::zeros(n, n, CV_8U);
-    Mat mask = Mat::zeros(n + 2, n + 2, CV_8U);
+    Mat mask;
 
     circle(img, Point(n/2, n/2), 20, Scalar(100), 4);
 
-    int flags = 4 + CV_FLOODFILL_MASK_ONLY;
+    int flags = 4 + FLOODFILL_MASK_ONLY;
     floodFill(img, mask, Point(n/2 + 13, n/2), Scalar(100), NULL, Scalar(),  Scalar(), flags);
 
     ASSERT_EQ(1, cvtest::norm(mask.rowRange(1, n-1).colRange(1, n-1), NORM_INF));
diff --git a/modules/imgproc/test/test_histograms.cpp b/modules/imgproc/test/test_histograms.cpp
index a6c75a318d..b57af774f2 100644
--- a/modules/imgproc/test/test_histograms.cpp
+++ b/modules/imgproc/test/test_histograms.cpp
@@ -1993,6 +1993,38 @@ TEST(Imgproc_Hist_Calc, badarg)
     EXPECT_NO_THROW(cv::calcBackProject(&img, 1, channels, hist, backProj, NULL, 1, true));
 }
 
+TEST(Imgproc_Hist_Calc, IPP_ranges_with_equal_exponent_21595)
+{
+    const int channels[] = { 0 };
+    float range1[] = { -0.5f, 1.5f };
+    const float* ranges[] = { range1 };
+    const int hist_size[] = { 2 };
+
+    uint8_t m[1][6] = { { 0, 1, 0, 1 , 1, 1 } };
+    cv::Mat images_u = Mat(1, 6, CV_8UC1, m);
+    cv::Mat histogram_u;
+    cv::calcHist(&images_u, 1, channels, noArray(), histogram_u, 1, hist_size, ranges);
+
+    ASSERT_EQ(histogram_u.at<float>(0), 2.f) << "0 not counts correctly, res: " << histogram_u.at<float>(0);
+    ASSERT_EQ(histogram_u.at<float>(1), 4.f) << "1 not counts correctly, res: " << histogram_u.at<float>(0);
+}
+
+TEST(Imgproc_Hist_Calc, IPP_ranges_with_nonequal_exponent_21595)
+{
+    const int channels[] = { 0 };
+    float range1[] = { -1.3f, 1.5f };
+    const float* ranges[] = { range1 };
+    const int hist_size[] = { 3 };
+
+    uint8_t m[1][6] = { { 0, 1, 0, 1 , 1, 1 } };
+    cv::Mat images_u = Mat(1, 6, CV_8UC1, m);
+    cv::Mat histogram_u;
+    cv::calcHist(&images_u, 1, channels, noArray(), histogram_u, 1, hist_size, ranges);
+
+    ASSERT_EQ(histogram_u.at<float>(0), 0.f) << "not equal to zero, res: " << histogram_u.at<float>(0);
+    ASSERT_EQ(histogram_u.at<float>(1), 2.f) << "0 not counts correctly, res: " << histogram_u.at<float>(1);
+    ASSERT_EQ(histogram_u.at<float>(2), 4.f) << "1 not counts correctly, res: " << histogram_u.at<float>(2);
+}
 
 }} // namespace
 /* End Of File */
diff --git a/modules/ml/src/em.cpp b/modules/ml/src/em.cpp
index ec73bfd1b5..3e0eeb560a 100644
--- a/modules/ml/src/em.cpp
+++ b/modules/ml/src/em.cpp
@@ -656,7 +656,7 @@ public:
 
         // Update weights
         // not normalized first
-        reduce(trainProbs, weights, 0, CV_REDUCE_SUM);
+        reduce(trainProbs, weights, 0, REDUCE_SUM);
 
         // Update means
         means.create(nclusters, dim, CV_64FC1);
diff --git a/modules/ml/test/test_precomp.hpp b/modules/ml/test/test_precomp.hpp
index e2d36d2c2d..380e612616 100644
--- a/modules/ml/test/test_precomp.hpp
+++ b/modules/ml/test/test_precomp.hpp
@@ -4,7 +4,6 @@
 #include "opencv2/ts.hpp"
 #include <opencv2/ts/cuda_test.hpp> // EXPECT_MAT_NEAR
 #include "opencv2/ml.hpp"
-#include "opencv2/core/core_c.h"
 
 #include <fstream>
 using std::ifstream;
diff --git a/modules/objdetect/CMakeLists.txt b/modules/objdetect/CMakeLists.txt
index 411386fd7d..27480c7078 100644
--- a/modules/objdetect/CMakeLists.txt
+++ b/modules/objdetect/CMakeLists.txt
@@ -1,5 +1,16 @@
 set(the_description "Object Detection")
-ocv_define_module(objdetect opencv_core opencv_imgproc opencv_3d opencv_dnn WRAP java objc python js)
+ocv_define_module(objdetect
+    opencv_core
+    opencv_imgproc
+    opencv_3d
+    OPTIONAL
+        opencv_dnn
+    WRAP
+        python
+        java
+        objc
+        js
+)
 
 if(HAVE_QUIRC)
     get_property(QUIRC_INCLUDE GLOBAL PROPERTY QUIRC_INCLUDE_DIR)
diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp
index fa81779f40..13271cebf4 100644
--- a/modules/objdetect/include/opencv2/objdetect.hpp
+++ b/modules/objdetect/include/opencv2/objdetect.hpp
@@ -49,8 +49,8 @@
 /**
 @defgroup objdetect Object Detection
 
-Haar Feature-based Cascade Classifier for Object Detection
-----------------------------------------------------------
+@{
+    @defgroup objdetect_cascade_classifier Cascade Classifier for Object Detection
 
 The object detector described below has been initially proposed by Paul Viola @cite Viola01 and
 improved by Rainer Lienhart @cite Lienhart02 .
@@ -90,8 +90,7 @@ middle) and the sum of the image pixels under the black stripe multiplied by 3 i
 compensate for the differences in the size of areas. The sums of pixel values over a rectangular
 regions are calculated rapidly using integral images (see below and the integral description).
 
-To see the object detector at work, have a look at the facedetect demo:
-<https://github.com/opencv/opencv/tree/5.x/samples/cpp/dbt_face_detection.cpp>
+Check @ref tutorial_cascade_classifier "the corresponding tutorial" for more details.
 
 The following reference is for the detection part only. There is a separate application called
 opencv_traincascade that can train a cascade of boosted classifiers from a set of samples.
@@ -99,10 +98,13 @@ opencv_traincascade that can train a cascade of boosted classifiers from a set o
 @note In the new C++ interface it is also possible to use LBP (local binary pattern) features in
 addition to Haar-like features. .. [Viola01] Paul Viola and Michael J. Jones. Rapid Object Detection
 using a Boosted Cascade of Simple Features. IEEE CVPR, 2001. The paper is available online at
-<http://research.microsoft.com/en-us/um/people/viola/Pubs/Detect/violaJones_CVPR2001.pdf>
+<https://github.com/SvHey/thesis/blob/master/Literature/ObjectDetection/violaJones_CVPR2001.pdf>
 
-@{
-    @defgroup objdetect_c C API
+    @defgroup objdetect_hog HOG (Histogram of Oriented Gradients) descriptor and object detector
+    @defgroup objdetect_qrcode QRCode detection and encoding
+    @defgroup objdetect_dnn_face DNN-based face detection and recognition
+Check @ref tutorial_dnn_face "the corresponding tutorial" for more details.
+    @defgroup objdetect_common Common functions and classes
 @}
  */
 
@@ -111,13 +113,15 @@ typedef struct CvHaarClassifierCascade CvHaarClassifierCascade;
 namespace cv
 {
 
-//! @addtogroup objdetect
+//! @addtogroup objdetect_common
 //! @{
 
 ///////////////////////////// Object Detection ////////////////////////////
 
-//! class for grouping object candidates, detected by Cascade Classifier, HOG etc.
-//! instance of the class is to be passed to cv::partition (see cxoperations.hpp)
+/** @brief This class is used for grouping object candidates detected by Cascade Classifier, HOG etc.
+
+instance of the class is to be passed to cv::partition
+ */
 class CV_EXPORTS SimilarRects
 {
 public:
@@ -162,6 +166,10 @@ CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, std::vector<int>&
 CV_EXPORTS   void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights,
                                             std::vector<double>& foundScales,
                                             double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
+//! @}
+
+//! @addtogroup objdetect_cascade_classifier
+//! @{
 
 template<> struct DefaultDeleter<CvHaarClassifierCascade>{ CV_EXPORTS void operator ()(CvHaarClassifierCascade* obj) const; };
 
@@ -243,7 +251,7 @@ public:
     CV_WRAP bool load( const String& filename );
     /** @brief Reads a classifier from a FileStorage node.
 
-    @note The file may contain a new cascade classifier (trained traincascade application) only.
+    @note The file may contain a new cascade classifier (trained by the traincascade application) only.
      */
     CV_WRAP bool read( const FileNode& node );
 
@@ -260,12 +268,6 @@ public:
     cvHaarDetectObjects. It is not used for a new cascade.
     @param minSize Minimum possible object size. Objects smaller than that are ignored.
     @param maxSize Maximum possible object size. Objects larger than that are ignored. If `maxSize == minSize` model is evaluated on single scale.
-
-    The function is parallelized with the TBB library.
-
-    @note
-       -   (Python) A face detection example using cascade classifiers can be found at
-            opencv_source_code/samples/python/facedetect.py
     */
     CV_WRAP void detectMultiScale( InputArray image,
                           CV_OUT std::vector<Rect>& objects,
@@ -338,7 +340,10 @@ public:
 };
 
 CV_EXPORTS Ptr<BaseCascadeClassifier::MaskGenerator> createFaceDetectionMaskGenerator();
+//! @}
 
+//! @addtogroup objdetect_hog
+//! @{
 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 
 //! struct for detection region of interest (ROI)
@@ -378,7 +383,7 @@ public:
          };
     enum DescriptorStorageFormat { DESCR_FORMAT_COL_BY_COL, DESCR_FORMAT_ROW_BY_ROW };
 
-    /**@brief Creates the HOG descriptor and detector with default params.
+    /**@brief Creates the HOG descriptor and detector with default parameters.
 
     aqual to HOGDescriptor(Size(64,128), Size(16,16), Size(8,8), Size(8,8), 9 )
     */
@@ -414,6 +419,8 @@ public:
     {}
 
     /** @overload
+
+    Creates the HOG descriptor and detector and loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file.
     @param filename The file name containing HOGDescriptor properties and coefficients for the linear SVM classifier.
     */
     CV_WRAP HOGDescriptor(const String& filename)
@@ -452,19 +459,19 @@ public:
     */
     CV_WRAP virtual void setSVMDetector(InputArray svmdetector);
 
-    /** @brief Reads HOGDescriptor parameters from a cv::FileNode.
+    /** @brief Reads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file node.
     @param fn File node
     */
     virtual bool read(FileNode& fn);
 
-    /** @brief Stores HOGDescriptor parameters in a cv::FileStorage.
+    /** @brief Stores HOGDescriptor parameters and coefficients for the linear SVM classifier in a file storage.
     @param fs File storage
     @param objname Object name
     */
     virtual void write(FileStorage& fs, const String& objname) const;
 
-    /** @brief loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file.
-    @param filename Path of the file to read.
+    /** @brief loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file
+    @param filename Name of the file to read.
     @param objname The optional name of the node to read (if empty, the first top-level node will be used).
     */
     CV_WRAP virtual bool load(const String& filename, const String& objname = String());
@@ -537,13 +544,14 @@ public:
     @param winStride Window stride. It must be a multiple of block stride.
     @param padding Padding
     @param scale Coefficient of the detection window increase.
-    @param finalThreshold Final threshold
+    @param groupThreshold Coefficient to regulate the similarity threshold. When detected, some objects can be covered
+    by many rectangles. 0 means not to perform grouping.
     @param useMeanshiftGrouping indicates grouping algorithm
     */
     CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                   CV_OUT std::vector<double>& foundWeights, double hitThreshold = 0,
                                   Size winStride = Size(), Size padding = Size(), double scale = 1.05,
-                                  double finalThreshold = 2.0,bool useMeanshiftGrouping = false) const;
+                                  double groupThreshold = 2.0, bool useMeanshiftGrouping = false) const;
 
     /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list
     of rectangles.
@@ -555,13 +563,14 @@ public:
     @param winStride Window stride. It must be a multiple of block stride.
     @param padding Padding
     @param scale Coefficient of the detection window increase.
-    @param finalThreshold Final threshold
+    @param groupThreshold Coefficient to regulate the similarity threshold. When detected, some objects can be covered
+    by many rectangles. 0 means not to perform grouping.
     @param useMeanshiftGrouping indicates grouping algorithm
     */
     virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                   double hitThreshold = 0, Size winStride = Size(),
                                   Size padding = Size(), double scale = 1.05,
-                                  double finalThreshold = 2.0, bool useMeanshiftGrouping = false) const;
+                                  double groupThreshold = 2.0, bool useMeanshiftGrouping = false) const;
 
     /** @brief  Computes gradients and quantized gradient orientations.
     @param img Matrix contains the image to be computed
@@ -666,6 +675,10 @@ public:
     */
     void groupRectangles(std::vector<cv::Rect>& rectList, std::vector<double>& weights, int groupThreshold, double eps) const;
 };
+//! @}
+
+//! @addtogroup objdetect_qrcode
+//! @{
 
 class CV_EXPORTS_W QRCodeEncoder {
 protected:
@@ -827,7 +840,7 @@ protected:
     Ptr<Impl> p;
 };
 
-//! @} objdetect
+//! @}
 }
 
 #include "opencv2/objdetect/detection_based_tracker.hpp"
diff --git a/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp b/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp
index 18cde13eab..fb96c668a5 100644
--- a/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp
+++ b/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp
@@ -51,7 +51,7 @@
 namespace cv
 {
 
-//! @addtogroup objdetect
+//! @addtogroup objdetect_cascade_classifier
 //! @{
 
 class CV_EXPORTS DetectionBasedTracker
@@ -215,7 +215,7 @@ class CV_EXPORTS DetectionBasedTracker
         void detectInRegion(const cv::Mat& img, const cv::Rect& r, std::vector<cv::Rect>& detectedObjectsInRegions);
 };
 
-//! @} objdetect
+//! @}
 
 } //end of cv namespace
 
diff --git a/modules/objdetect/include/opencv2/objdetect/face.hpp b/modules/objdetect/include/opencv2/objdetect/face.hpp
index f2429c5f31..1b3681c652 100644
--- a/modules/objdetect/include/opencv2/objdetect/face.hpp
+++ b/modules/objdetect/include/opencv2/objdetect/face.hpp
@@ -7,13 +7,15 @@
 
 #include <opencv2/core.hpp>
 
-/** @defgroup dnn_face DNN-based face detection and recognition
- */
-
 namespace cv
 {
 
-/** @brief DNN-based face detector, model download link: https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.
+//! @addtogroup objdetect_dnn_face
+//! @{
+
+/** @brief DNN-based face detector
+
+model download link: https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet
  */
 class CV_EXPORTS_W FaceDetectorYN
 {
@@ -80,7 +82,9 @@ public:
                                               int target_id = 0);
 };
 
-/** @brief DNN-based face recognizer, model download link: https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.
+/** @brief DNN-based face recognizer
+
+model download link: https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface
  */
 class CV_EXPORTS_W FaceRecognizerSF
 {
@@ -105,11 +109,11 @@ public:
     CV_WRAP virtual void feature(InputArray aligned_img, OutputArray face_feature) = 0;
 
     /** @brief Calculating the distance between two face features
-     *  @param _face_feature1 the first input feature
-     *  @param _face_feature2 the second input feature of the same size and the same type as _face_feature1
+     *  @param face_feature1 the first input feature
+     *  @param face_feature2 the second input feature of the same size and the same type as face_feature1
      *  @param dis_type defining the similarity with optional values "FR_OSINE" or "FR_NORM_L2"
      */
-    CV_WRAP virtual double match(InputArray _face_feature1, InputArray _face_feature2, int dis_type = FaceRecognizerSF::FR_COSINE) const = 0;
+    CV_WRAP virtual double match(InputArray face_feature1, InputArray face_feature2, int dis_type = FaceRecognizerSF::FR_COSINE) const = 0;
 
     /** @brief Creates an instance of this class with given parameters
      *  @param model the path of the onnx model used for face recognition
@@ -120,6 +124,7 @@ public:
     CV_WRAP static Ptr<FaceRecognizerSF> create(const String& model, const String& config, int backend_id = 0, int target_id = 0);
 };
 
+//! @}
 } // namespace cv
 
 #endif
diff --git a/modules/objdetect/src/face_detect.cpp b/modules/objdetect/src/face_detect.cpp
index a9ca2d8957..10259a32e6 100644
--- a/modules/objdetect/src/face_detect.cpp
+++ b/modules/objdetect/src/face_detect.cpp
@@ -6,13 +6,16 @@
 
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"
+#ifdef HAVE_OPENCV_DNN
 #include "opencv2/dnn.hpp"
+#endif
 
 #include <algorithm>
 
 namespace cv
 {
 
+#ifdef HAVE_OPENCV_DNN
 class FaceDetectorYNImpl : public FaceDetectorYN
 {
 public:
@@ -273,6 +276,7 @@ private:
 
     std::vector<Rect2f> priors;
 };
+#endif
 
 Ptr<FaceDetectorYN> FaceDetectorYN::create(const String& model,
                                            const String& config,
@@ -283,7 +287,12 @@ Ptr<FaceDetectorYN> FaceDetectorYN::create(const String& model,
                                            const int backend_id,
                                            const int target_id)
 {
+#ifdef HAVE_OPENCV_DNN
     return makePtr<FaceDetectorYNImpl>(model, config, input_size, score_threshold, nms_threshold, top_k, backend_id, target_id);
+#else
+    CV_UNUSED(model); CV_UNUSED(config); CV_UNUSED(input_size); CV_UNUSED(score_threshold); CV_UNUSED(nms_threshold); CV_UNUSED(top_k); CV_UNUSED(backend_id); CV_UNUSED(target_id);
+    CV_Error(cv::Error::StsNotImplemented, "cv::FaceDetectorYN requires enabled 'dnn' module.");
+#endif
 }
 
 } // namespace cv
diff --git a/modules/objdetect/src/face_recognize.cpp b/modules/objdetect/src/face_recognize.cpp
index 66271068b2..497303e42b 100644
--- a/modules/objdetect/src/face_recognize.cpp
+++ b/modules/objdetect/src/face_recognize.cpp
@@ -4,13 +4,17 @@
 
 #include "precomp.hpp"
 
+#include "opencv2/core.hpp"
+#ifdef HAVE_OPENCV_DNN
 #include "opencv2/dnn.hpp"
+#endif
 
 #include <algorithm>
 
 namespace cv
 {
 
+#ifdef HAVE_OPENCV_DNN
 class FaceRecognizerSFImpl : public FaceRecognizerSF
 {
 public:
@@ -173,10 +177,16 @@ private:
 private:
     dnn::Net net;
 };
+#endif
 
 Ptr<FaceRecognizerSF> FaceRecognizerSF::create(const String& model, const String& config, int backend_id, int target_id)
 {
+#ifdef HAVE_OPENCV_DNN
     return makePtr<FaceRecognizerSFImpl>(model, config, backend_id, target_id);
+#else
+    CV_UNUSED(model); CV_UNUSED(config); CV_UNUSED(backend_id); CV_UNUSED(target_id);
+    CV_Error(cv::Error::StsNotImplemented, "cv::FaceRecognizerSF requires enabled 'dnn' module");
+#endif
 }
 
 } // namespace cv
diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp
index 281b009558..b57e92ff9a 100644
--- a/modules/objdetect/src/hog.cpp
+++ b/modules/objdetect/src/hog.cpp
@@ -42,7 +42,6 @@
 
 #include "precomp.hpp"
 #include "cascadedetect.hpp"
-#include "opencv2/core/core_c.h"
 #include "opencv2/core/hal/intrin.hpp"
 #include "opencl_kernels_objdetect.hpp"
 
@@ -1887,7 +1886,7 @@ static bool ocl_detectMultiScale(InputArray _img, std::vector<Rect> &found_locat
 void HOGDescriptor::detectMultiScale(
     InputArray _img, std::vector<Rect>& foundLocations, std::vector<double>& foundWeights,
     double hitThreshold, Size winStride, Size padding,
-    double scale0, double finalThreshold, bool useMeanshiftGrouping) const
+    double scale0, double groupThreshold, bool useMeanshiftGrouping) const
 {
     CV_INSTRUMENT_REGION();
 
@@ -1913,7 +1912,7 @@ void HOGDescriptor::detectMultiScale(
 
     CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 &&
         winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat(),
-        ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold, oclSvmDetector,
+        ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, groupThreshold, oclSvmDetector,
         blockSize, cellSize, nbins, blockStride, winSize, gammaCorrection, L2HysThreshold, (float)getWinSigma(), free_coef, signedGradient));
 
     std::vector<Rect> allCandidates;
@@ -1934,21 +1933,21 @@ void HOGDescriptor::detectMultiScale(
     std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights));
 
     if ( useMeanshiftGrouping )
-        groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize);
+        groupRectangles_meanshift(foundLocations, foundWeights, foundScales, groupThreshold, winSize);
     else
-        groupRectangles(foundLocations, foundWeights, (int)finalThreshold, 0.2);
+        groupRectangles(foundLocations, foundWeights, (int)groupThreshold, 0.2);
     clipObjects(imgSize, foundLocations, 0, &foundWeights);
 }
 
 void HOGDescriptor::detectMultiScale(InputArray img, std::vector<Rect>& foundLocations,
     double hitThreshold, Size winStride, Size padding,
-    double scale0, double finalThreshold, bool useMeanshiftGrouping) const
+    double scale0, double groupThreshold, bool useMeanshiftGrouping) const
 {
     CV_INSTRUMENT_REGION();
 
     std::vector<double> foundWeights;
     detectMultiScale(img, foundLocations, foundWeights, hitThreshold, winStride,
-                padding, scale0, finalThreshold, useMeanshiftGrouping);
+                padding, scale0, groupThreshold, useMeanshiftGrouping);
 }
 
 std::vector<float> HOGDescriptor::getDefaultPeopleDetector()
diff --git a/modules/objdetect/test/test_face.cpp b/modules/objdetect/test/test_face.cpp
index 2e944c50df..d33032fa2f 100644
--- a/modules/objdetect/test/test_face.cpp
+++ b/modules/objdetect/test/test_face.cpp
@@ -78,7 +78,7 @@ TEST(Objdetect_face_detection, regression)
     // }
 
     // Initialize detector
-    std::string model = findDataFile("dnn/onnx/models/yunet-202109.onnx", false);
+    std::string model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false);
     Ptr<FaceDetectorYN> faceDetector = FaceDetectorYN::create(model, "", Size(300, 300));
     faceDetector->setScoreThreshold(0.7f);
 
@@ -178,7 +178,7 @@ TEST(Objdetect_face_recognition, regression)
     }
 
     // Initialize detector
-    std::string detect_model = findDataFile("dnn/onnx/models/yunet-202109.onnx", false);
+    std::string detect_model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false);
     Ptr<FaceDetectorYN> faceDetector = FaceDetectorYN::create(detect_model, "", Size(150, 150), score_thresh, nms_thresh);
 
     std::string recog_model = findDataFile("dnn/onnx/models/face_recognizer_fast.onnx", false);
diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp
index b39db34fcb..294905c783 100644
--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@@ -130,45 +130,155 @@ struct ConstDef
     long long val;
 };
 
-static void init_submodule(PyObject * root, const char * name, PyMethodDef * methods, ConstDef * consts)
-{
-  // traverse and create nested submodules
-  std::string s = name;
-  size_t i = s.find('.');
-  while (i < s.length() && i != std::string::npos)
-  {
-    size_t j = s.find('.', i);
-    if (j == std::string::npos)
-        j = s.length();
-    std::string short_name = s.substr(i, j-i);
-    std::string full_name = s.substr(0, j);
-    i = j+1;
+static inline bool strStartsWith(const std::string& str, const std::string& prefix) {
+    return prefix.empty() || \
+        (str.size() >= prefix.size() && std::memcmp(str.data(), prefix.data(), prefix.size()) == 0);
+}
 
-    PyObject * d = PyModule_GetDict(root);
-    PyObject * submod = PyDict_GetItemString(d, short_name.c_str());
-    if (submod == NULL)
+static inline bool strEndsWith(const std::string& str, char symbol) {
+    return !str.empty() && str[str.size() - 1] == symbol;
+}
+
+/**
+ * \brief Creates a submodule of the `root`. Missing parents submodules
+ * are created as needed. If name equals to parent module name than
+ * borrowed reference to parent module is returned (no reference counting
+ * are done).
+ * Submodule lifetime is managed by the parent module.
+ * If nested submodules are created than the lifetime is managed by the
+ * predecessor submodule in a list.
+ *
+ * \param parent_module Parent module object.
+ * \param name Submodule name.
+ * \return borrowed reference to the created submodule.
+ *         If any of submodules can't be created than NULL is returned.
+ */
+static PyObject* createSubmodule(PyObject* parent_module, const std::string& name)
+{
+    if (!parent_module)
     {
-        submod = PyImport_AddModule(full_name.c_str());
-        PyDict_SetItemString(d, short_name.c_str(), submod);
+        return PyErr_Format(PyExc_ImportError,
+            "Bindings generation error. "
+            "Parent module is NULL during the submodule '%s' creation",
+            name.c_str()
+        );
+    }
+    if (strEndsWith(name, '.'))
+    {
+        return PyErr_Format(PyExc_ImportError,
+            "Bindings generation error. "
+            "Submodule can't end with a dot. Got: %s", name.c_str()
+        );
     }
 
-    if (short_name != "")
-        root = submod;
-  }
+    const std::string parent_name = PyModule_GetName(parent_module);
 
-  // populate module's dict
-  PyObject * d = PyModule_GetDict(root);
-  for (PyMethodDef * m = methods; m->ml_name != NULL; ++m)
-  {
-    PyObject * method_obj = PyCFunction_NewEx(m, NULL, NULL);
-    PyDict_SetItemString(d, m->ml_name, method_obj);
-    Py_DECREF(method_obj);
-  }
-  for (ConstDef * c = consts; c->name != NULL; ++c)
-  {
-    PyDict_SetItemString(d, c->name, PyLong_FromLongLong(c->val));
-  }
+    /// Special case handling when caller tries to register a submodule of the parent module with
+    /// the same name
+    if (name == parent_name) {
+        return parent_module;
+    }
 
+    if (!strStartsWith(name, parent_name))
+    {
+        return PyErr_Format(PyExc_ImportError,
+            "Bindings generation error. "
+            "Submodule name should always start with a parent module name. "
+            "Parent name: %s. Submodule name: %s", parent_name.c_str(),
+            name.c_str()
+        );
+    }
+
+    size_t submodule_name_end = name.find('.', parent_name.size() + 1);
+    /// There is no intermediate submodules in the provided name
+    if (submodule_name_end == std::string::npos)
+    {
+        submodule_name_end = name.size();
+    }
+
+    PyObject* submodule = parent_module;
+
+    for (size_t submodule_name_start = parent_name.size() + 1;
+         submodule_name_start < name.size(); )
+    {
+        const std::string submodule_name = name.substr(submodule_name_start,
+                                                       submodule_name_end - submodule_name_start);
+
+        const std::string full_submodule_name = name.substr(0, submodule_name_end);
+
+
+        PyObject* parent_module_dict = PyModule_GetDict(submodule);
+        /// If submodule already exists it can be found in the parent module dictionary,
+        /// otherwise it should be added to it.
+        submodule = PyDict_GetItemString(parent_module_dict,
+                                         submodule_name.c_str());
+        if (!submodule)
+        {
+            /// Populates global modules dictionary and returns borrowed reference to it
+            submodule = PyImport_AddModule(full_submodule_name.c_str());
+            if (!submodule)
+            {
+                /// Return `PyImport_AddModule` NULL with an exception set on failure.
+                return NULL;
+            }
+            /// Populates parent module dictionary. Submodule lifetime should be managed
+            /// by the global modules dictionary and parent module dictionary, so Py_DECREF after
+            /// successfull call to the `PyDict_SetItemString` is redundant.
+            if (PyDict_SetItemString(parent_module_dict, submodule_name.c_str(), submodule) < 0) {
+                return PyErr_Format(PyExc_ImportError,
+                    "Can't register a submodule '%s' (full name: '%s')",
+                    submodule_name.c_str(), full_submodule_name.c_str()
+                );
+            }
+        }
+
+        submodule_name_start = submodule_name_end + 1;
+
+        submodule_name_end = name.find('.', submodule_name_start);
+        if (submodule_name_end == std::string::npos) {
+            submodule_name_end = name.size();
+        }
+    }
+    return submodule;
+}
+
+static bool init_submodule(PyObject * root, const char * name, PyMethodDef * methods, ConstDef * consts)
+{
+    // traverse and create nested submodules
+    PyObject* submodule = createSubmodule(root, name);
+    if (!submodule)
+    {
+        return false;
+    }
+    // populate module's dict
+    PyObject * d = PyModule_GetDict(submodule);
+    for (PyMethodDef * m = methods; m->ml_name != NULL; ++m)
+    {
+        PyObject * method_obj = PyCFunction_NewEx(m, NULL, NULL);
+        if (PyDict_SetItemString(d, m->ml_name, method_obj) < 0)
+        {
+            PyErr_Format(PyExc_ImportError,
+                "Can't register function %s in module: %s", m->ml_name, name
+            );
+            Py_CLEAR(method_obj);
+            return false;
+        }
+        Py_DECREF(method_obj);
+    }
+    for (ConstDef * c = consts; c->name != NULL; ++c)
+    {
+        PyObject* const_obj = PyLong_FromLongLong(c->val);
+        if (PyDict_SetItemString(d, c->name, const_obj) < 0)
+        {
+            PyErr_Format(PyExc_ImportError,
+                "Can't register constant %s in module %s", c->name, name
+            );
+            Py_CLEAR(const_obj);
+            return false;
+        }
+        Py_DECREF(const_obj);
+    }
+    return true;
 }
 
 #include "pyopencv_generated_modules_content.h"
@@ -176,7 +286,10 @@ static void init_submodule(PyObject * root, const char * name, PyMethodDef * met
 static bool init_body(PyObject * m)
 {
 #define CVPY_MODULE(NAMESTR, NAME) \
-    init_submodule(m, MODULESTR NAMESTR, methods_##NAME, consts_##NAME)
+    if (!init_submodule(m, MODULESTR NAMESTR, methods_##NAME, consts_##NAME)) \
+    { \
+        return false; \
+    }
     #include "pyopencv_generated_modules.h"
 #undef CVPY_MODULE
 
@@ -193,7 +306,13 @@ static bool init_body(PyObject * m)
     PyObject* d = PyModule_GetDict(m);
 
 
-    PyDict_SetItemString(d, "__version__", PyString_FromString(CV_VERSION));
+    PyObject* version_obj = PyString_FromString(CV_VERSION);
+    if (PyDict_SetItemString(d, "__version__", version_obj) < 0) {
+        PyErr_SetString(PyExc_ImportError, "Can't update module version");
+        Py_CLEAR(version_obj);
+        return false;
+    }
+    Py_DECREF(version_obj);
 
     PyObject *opencv_error_dict = PyDict_New();
     PyDict_SetItemString(opencv_error_dict, "file", Py_None);
@@ -207,7 +326,18 @@ static bool init_body(PyObject * m)
     PyDict_SetItemString(d, "error", opencv_error);
 
 
-#define PUBLISH(I) PyDict_SetItemString(d, #I, PyInt_FromLong(I))
+#define PUBLISH_(I, var_name, type_obj) \
+    PyObject* type_obj = PyInt_FromLong(I); \
+    if (PyDict_SetItemString(d, var_name, type_obj) < 0) \
+    { \
+        PyErr_SetString(PyExc_ImportError, "Can't register "  var_name " constant"); \
+        Py_CLEAR(type_obj); \
+        return false; \
+    } \
+    Py_DECREF(type_obj);
+
+#define PUBLISH(I) PUBLISH_(I, #I, I ## _obj)
+
     PUBLISH(CV_8U);
     PUBLISH(CV_8UC1);
     PUBLISH(CV_8UC2);
@@ -243,6 +373,7 @@ static bool init_body(PyObject * m)
     PUBLISH(CV_64FC2);
     PUBLISH(CV_64FC3);
     PUBLISH(CV_64FC4);
+#undef PUBLISH_
 #undef PUBLISH
 
     return true;
diff --git a/modules/python/src2/pycompat.hpp b/modules/python/src2/pycompat.hpp
index 03379ec956..8b44726d5f 100644
--- a/modules/python/src2/pycompat.hpp
+++ b/modules/python/src2/pycompat.hpp
@@ -231,7 +231,12 @@ PyObject* pyopencv_from(const TYPE& src)
             ERROR_HANDLER; \
         } \
         CVPY_TYPE_INCREF(pyopencv_##NAME##_TypePtr); \
-        PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr); \
+        if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \
+        { \
+            printf("Failed to register a new type: " #WNAME  ", base (" #BASE ")\n"); \
+            Py_DECREF(pyopencv_##NAME##_TypePtr); \
+            ERROR_HANDLER; \
+        } \
     }
 
 //==================================================================================================
@@ -304,10 +309,15 @@ PyObject* pyopencv_from(const TYPE& src)
         pyopencv_##NAME##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##NAME##_Spec, bases); \
         if (!pyopencv_##NAME##_TypePtr) \
         { \
-            printf("Failed to init: " #WNAME ", base (" #BASE ")" "\n"); \
+            printf("Failed to create type from spec: " #WNAME ", base (" #BASE ")\n"); \
+            ERROR_HANDLER; \
+        } \
+        if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \
+        { \
+            printf("Failed to register a new type: " #WNAME  ", base (" #BASE ")\n"); \
+            Py_DECREF(pyopencv_##NAME##_TypePtr); \
             ERROR_HANDLER; \
         } \
-        PyModule_AddObject(m, #NAME, (PyObject *)pyopencv_##NAME##_TypePtr); \
     }
 
 // Debug module load:
diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py
index 051ac33ac9..48657d595c 100644
--- a/modules/python/test/test_misc.py
+++ b/modules/python/test/test_misc.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 from __future__ import print_function
 
+import sys
 import ctypes
 from functools import partial
 from collections import namedtuple
@@ -607,6 +608,32 @@ class Arguments(NewOpenCVTests):
         self.assertTrue(isinstance(rr, tuple), msg=type(rrv))
         self.assertEqual(len(rr), 3)
 
+    def test_nested_function_availability(self):
+        self.assertTrue(hasattr(cv.utils, "nested"),
+                        msg="Module is not generated for nested namespace")
+        self.assertTrue(hasattr(cv.utils.nested, "testEchoBooleanFunction"),
+                        msg="Function in nested module is not available")
+
+        if sys.version_info[0] < 3:
+            # Nested submodule is managed only by the global submodules dictionary
+            # and parent native module
+            expected_ref_count = 2
+        else:
+            # Nested submodule is managed by the global submodules dictionary,
+            # parent native module and Python part of the submodule
+            expected_ref_count = 3
+
+        # `getrefcount` temporary increases reference counter by 1
+        actual_ref_count = sys.getrefcount(cv.utils.nested) - 1
+
+        self.assertEqual(actual_ref_count, expected_ref_count,
+                         msg="Nested submodule reference counter has wrong value\n"
+                         "Expected: {}. Actual: {}".format(expected_ref_count, actual_ref_count))
+        for flag in (True, False):
+            self.assertEqual(flag, cv.utils.nested.testEchoBooleanFunction(flag),
+                             msg="Function in nested module returns wrong result")
+
+
 class CanUsePurePythonModuleFunction(NewOpenCVTests):
     def test_can_get_ocv_version(self):
         import sys
diff --git a/modules/stitching/src/seam_finders.cpp b/modules/stitching/src/seam_finders.cpp
index c5e4cb04ff..0e0c7d1967 100644
--- a/modules/stitching/src/seam_finders.cpp
+++ b/modules/stitching/src/seam_finders.cpp
@@ -587,8 +587,8 @@ void DpSeamFinder::computeGradients(const Mat &image1, const Mat &image2)
 bool DpSeamFinder::hasOnlyOneNeighbor(int comp)
 {
     std::set<std::pair<int, int> >::iterator begin, end;
-    begin = lower_bound(edges_.begin(), edges_.end(), std::make_pair(comp, std::numeric_limits<int>::min()));
-    end = upper_bound(edges_.begin(), edges_.end(), std::make_pair(comp, std::numeric_limits<int>::max()));
+    begin = edges_.lower_bound(std::make_pair(comp, std::numeric_limits<int>::min()));
+    end = edges_.upper_bound(std::make_pair(comp, std::numeric_limits<int>::max()));
     return ++begin == end;
 }
 
diff --git a/modules/ts/include/opencv2/ts/cuda_test.hpp b/modules/ts/include/opencv2/ts/cuda_test.hpp
index 53bdbc8a4f..f1851c5f8f 100644
--- a/modules/ts/include/opencv2/ts/cuda_test.hpp
+++ b/modules/ts/include/opencv2/ts/cuda_test.hpp
@@ -63,6 +63,7 @@ namespace cvtest
     // GpuMat create
 
     cv::cuda::GpuMat createMat(cv::Size size, int type, bool useRoi = false);
+    cv::cuda::GpuMat createMat(cv::Size size, int type, cv::Size& size0, cv::Point& ofs, bool useRoi = false);
     cv::cuda::GpuMat loadMat(const cv::Mat& m, bool useRoi = false);
 
     //////////////////////////////////////////////////////////////////////
diff --git a/modules/ts/src/cuda_test.cpp b/modules/ts/src/cuda_test.cpp
index 3870415f05..a50f2cc3ce 100644
--- a/modules/ts/src/cuda_test.cpp
+++ b/modules/ts/src/cuda_test.cpp
@@ -91,7 +91,13 @@ namespace cvtest
 
     GpuMat createMat(Size size, int type, bool useRoi)
     {
-        Size size0 = size;
+        Size size0; Point ofs;
+        return createMat(size, type, size0, ofs, useRoi);
+    }
+
+    GpuMat createMat(Size size, int type, Size& size0, Point& ofs, bool useRoi)
+    {
+        size0 = size;
 
         if (useRoi)
         {
@@ -100,9 +106,10 @@ namespace cvtest
         }
 
         GpuMat d_m(size0, type);
-
-        if (size0 != size)
-            d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
+        if (size0 != size) {
+            ofs = Point((size0.width - size.width) / 2, (size0.height - size.height) / 2);
+            d_m = d_m(Rect(ofs, size));
+        }
 
         return d_m;
     }
diff --git a/modules/videoio/cmake/detect_aravis.cmake b/modules/videoio/cmake/detect_aravis.cmake
index e7b3828993..cf8429e5dc 100644
--- a/modules/videoio/cmake/detect_aravis.cmake
+++ b/modules/videoio/cmake/detect_aravis.cmake
@@ -1,6 +1,6 @@
 # --- Aravis SDK ---
 if(NOT HAVE_ARAVIS_API AND PKG_CONFIG_FOUND)
-  ocv_check_modules(ARAVIS aravis-0.6 QUIET)
+  ocv_check_modules(ARAVIS aravis-0.8 QUIET)
   if(ARAVIS_FOUND)
     set(HAVE_ARAVIS_API TRUE)
   endif()
@@ -9,9 +9,9 @@ endif()
 if(NOT HAVE_ARAVIS_API)
   find_path(ARAVIS_INCLUDE "arv.h"
     PATHS "${ARAVIS_ROOT}" ENV ARAVIS_ROOT
-    PATH_SUFFIXES "include/aravis-0.6"
+    PATH_SUFFIXES "include/aravis-0.8"
     NO_DEFAULT_PATH)
-  find_library(ARAVIS_LIBRARY "aravis-0.6"
+  find_library(ARAVIS_LIBRARY "aravis-0.8"
     PATHS "${ARAVIS_ROOT}" ENV ARAVIS_ROOT
     PATH_SUFFIXES "lib"
     NO_DEFAULT_PATH)
diff --git a/modules/videoio/cmake/detect_gstreamer.cmake b/modules/videoio/cmake/detect_gstreamer.cmake
index fc6c347383..b2ab06060d 100644
--- a/modules/videoio/cmake/detect_gstreamer.cmake
+++ b/modules/videoio/cmake/detect_gstreamer.cmake
@@ -44,6 +44,10 @@ if(NOT HAVE_GSTREAMER AND WIN32)
     NAMES gstvideo gstvideo-1.0
     PATHS ${env_paths}
     PATH_SUFFIXES "lib")
+  find_library(GSTREAMER_audio_LIBRARY
+    NAMES gstaudio gstaudio-1.0
+    PATHS ${env_paths}
+    PATH_SUFFIXES "lib")
 
   find_library(GSTREAMER_glib_LIBRARY
     NAMES glib-2.0
@@ -63,6 +67,7 @@ if(NOT HAVE_GSTREAMER AND WIN32)
       AND GSTREAMER_pbutils_LIBRARY
       AND GSTREAMER_riff_LIBRARY
       AND GSTREAMER_video_LIBRARY
+      AND GSTREAMER_audio_LIBRARY
       AND GSTREAMER_glib_LIBRARY
       AND GSTREAMER_gobject_LIBRARY)
     file(STRINGS "${GSTREAMER_gst_INCLUDE_DIR}/gst/gstversion.h" ver_strings REGEX "#define +GST_VERSION_(MAJOR|MINOR|MICRO|NANO).*")
@@ -77,6 +82,7 @@ if(NOT HAVE_GSTREAMER AND WIN32)
       ${GSTREAMER_app_LIBRARY}
       ${GSTREAMER_riff_LIBRARY}
       ${GSTREAMER_video_LIBRARY}
+      ${GSTREAMER_audio_LIBRARY}
       ${GSTREAMER_pbutils_LIBRARY}
       ${GSTREAMER_glib_LIBRARY}
       ${GSTREAMER_gobject_LIBRARY})
diff --git a/modules/videoio/src/cap_aravis.cpp b/modules/videoio/src/cap_aravis.cpp
index 1f0e21eb33..49f7789f80 100644
--- a/modules/videoio/src/cap_aravis.cpp
+++ b/modules/videoio/src/cap_aravis.cpp
@@ -51,8 +51,8 @@
 #include <arv.h>
 
 //
-// This file provides wrapper for using Aravis SDK library to access GigE Vision cameras.
-// Aravis library (version 0.4 or 0.6) shall be installed else this code will not be included in build.
+// This file provides wrapper for using Aravis SDK library to access GigE and USB 3 Vision cameras.
+// Aravis library (version 0.8) shall be installed else this code will not be included in build.
 //
 // To include this module invoke cmake with -DWITH_ARAVIS=ON
 //
@@ -151,10 +151,6 @@ protected:
     bool            softwareTriggered;      // Flag if the camera is software triggered
     bool            allowAutoTrigger;       // Flag that user allowed to trigger software triggered cameras automatically
 
-    gint64          *pixelFormats;
-    guint           pixelFormatsCnt;
-
-
     int             num_buffers;            // number of payload transmission buffers
 
     ArvPixelFormat  pixelFormat;            // pixel format
@@ -225,7 +221,7 @@ bool CvCaptureCAM_Aravis::create( int index )
     if(!getDeviceNameById(index, deviceName))
         return false;
 
-    return NULL != (camera = arv_camera_new(deviceName.c_str()));
+    return NULL != (camera = arv_camera_new(deviceName.c_str(), NULL));
 }
 
 bool CvCaptureCAM_Aravis::init_buffers()
@@ -234,7 +230,7 @@ bool CvCaptureCAM_Aravis::init_buffers()
         g_object_unref(stream);
         stream = NULL;
     }
-    if( (stream = arv_camera_create_stream(camera, NULL, NULL)) ) {
+    if( (stream = arv_camera_create_stream(camera, NULL, NULL, NULL)) ) {
         if( arv_camera_is_gv_device(camera) ) {
             g_object_set(stream,
                 "socket-buffer", ARV_GV_STREAM_SOCKET_BUFFER_AUTO,
@@ -245,7 +241,7 @@ bool CvCaptureCAM_Aravis::init_buffers()
                 "packet-timeout", (unsigned) 40000,
                 "frame-retention", (unsigned) 200000, NULL);
         }
-        payload = arv_camera_get_payload (camera);
+        payload = arv_camera_get_payload (camera, NULL);
 
         for (int i = 0; i < num_buffers; i++)
             arv_stream_push_buffer(stream, arv_buffer_new(payload, NULL));
@@ -260,25 +256,23 @@ bool CvCaptureCAM_Aravis::open( int index )
 {
     if(create(index)) {
         // fetch properties bounds
-        pixelFormats = arv_camera_get_available_pixel_formats(camera, &pixelFormatsCnt);
+        arv_camera_get_width_bounds(camera, &widthMin, &widthMax, NULL);
+        arv_camera_get_height_bounds(camera, &heightMin, &heightMax, NULL);
+        arv_camera_set_region(camera, 0, 0, widthMax, heightMax, NULL);
 
-        arv_camera_get_width_bounds(camera, &widthMin, &widthMax);
-        arv_camera_get_height_bounds(camera, &heightMin, &heightMax);
-        arv_camera_set_region(camera, 0, 0, widthMax, heightMax);
-
-        if( (fpsAvailable = arv_camera_is_frame_rate_available(camera)) )
-            arv_camera_get_frame_rate_bounds(camera, &fpsMin, &fpsMax);
-        if( (gainAvailable = arv_camera_is_gain_available(camera)) )
-            arv_camera_get_gain_bounds (camera, &gainMin, &gainMax);
-        if( (exposureAvailable = arv_camera_is_exposure_time_available(camera)) )
-            arv_camera_get_exposure_time_bounds (camera, &exposureMin, &exposureMax);
+        if( (fpsAvailable = arv_camera_is_frame_rate_available(camera, NULL)) )
+            arv_camera_get_frame_rate_bounds(camera, &fpsMin, &fpsMax, NULL);
+        if( (gainAvailable = arv_camera_is_gain_available(camera, NULL)) )
+            arv_camera_get_gain_bounds (camera, &gainMin, &gainMax, NULL);
+        if( (exposureAvailable = arv_camera_is_exposure_time_available(camera, NULL)) )
+            arv_camera_get_exposure_time_bounds (camera, &exposureMin, &exposureMax, NULL);
 
         // get initial values
-        pixelFormat = arv_camera_get_pixel_format(camera);
-        exposure = exposureAvailable ? arv_camera_get_exposure_time(camera) : 0;
-        gain = gainAvailable ? arv_camera_get_gain(camera) : 0;
-        fps = arv_camera_get_frame_rate(camera);
-        softwareTriggered = (strcmp(arv_camera_get_trigger_source(camera), "Software") == 0);
+        pixelFormat = arv_camera_get_pixel_format(camera, NULL);
+        exposure = exposureAvailable ? arv_camera_get_exposure_time(camera, NULL) : 0;
+        gain = gainAvailable ? arv_camera_get_gain(camera, NULL) : 0;
+        fps = arv_camera_get_frame_rate(camera, NULL);
+        softwareTriggered = (strcmp(arv_camera_get_trigger_source(camera, NULL), "Software") == 0);
 
         return startCapture();
     }
@@ -295,7 +289,7 @@ bool CvCaptureCAM_Aravis::grabFrame()
         int max_tries = 10;
         int tries = 0;
         if (softwareTriggered && allowAutoTrigger) {
-            arv_camera_software_trigger (camera);
+            arv_camera_software_trigger (camera, NULL);
         }
         for(; tries < max_tries; tries ++) {
             arv_buffer = arv_stream_timeout_pop_buffer (stream, 200000);
@@ -402,7 +396,7 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image)
 
             if( ng < gain ) {
                 // priority 1 - reduce gain
-                arv_camera_set_gain(camera, (gain = ng));
+                arv_camera_set_gain(camera, (gain = ng), NULL);
                 return;
             }
         }
@@ -411,7 +405,7 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image)
             // priority 2 - control of exposure time
             if(std::fabs(exposure - ne) > 2) {
                 // we have not yet reach the max-e level
-                arv_camera_set_exposure_time(camera, (exposure = ne) );
+                arv_camera_set_exposure_time(camera, (exposure = ne), NULL);
                 return;
             }
         }
@@ -420,12 +414,12 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image)
             if(exposureAvailable) {
                 // exposure at maximum - increase gain if possible
                 if(ng > gain && ng < gainMax && ne >= maxe) {
-                    arv_camera_set_gain(camera, (gain = ng));
+                    arv_camera_set_gain(camera, (gain = ng), NULL);
                     return;
                 }
             } else {
                 // priority 3 - increase gain
-                arv_camera_set_gain(camera, (gain = ng));
+                arv_camera_set_gain(camera, (gain = ng), NULL);
                 return;
             }
         }
@@ -435,7 +429,7 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image)
     if(gainAvailable && autoGain && exposureAvailable) {
         if(gain > gainMin && exposure < maxe) {
             exposure = CLIP( ne * 1.05, exposureMin, maxe);
-            arv_camera_set_exposure_time(camera, exposure );
+            arv_camera_set_exposure_time(camera, exposure, NULL);
         }
     }
 }
@@ -461,25 +455,25 @@ double CvCaptureCAM_Aravis::getProperty( int property_id ) const
         case CV_CAP_PROP_EXPOSURE:
             if(exposureAvailable) {
                 /* exposure time in seconds, like 1/100 s */
-                return arv_camera_get_exposure_time(camera) / 1e6;
+                return arv_camera_get_exposure_time(camera, NULL) / 1e6;
             }
             break;
 
         case CV_CAP_PROP_FPS:
             if(fpsAvailable) {
-                return arv_camera_get_frame_rate(camera);
+                return arv_camera_get_frame_rate(camera, NULL);
             }
             break;
 
         case CV_CAP_PROP_GAIN:
             if(gainAvailable) {
-                return arv_camera_get_gain(camera);
+                return arv_camera_get_gain(camera, NULL);
             }
             break;
 
         case CV_CAP_PROP_FOURCC:
             {
-                ArvPixelFormat currFormat = arv_camera_get_pixel_format(camera);
+                ArvPixelFormat currFormat = arv_camera_get_pixel_format(camera, NULL);
                 switch( currFormat ) {
                     case ARV_PIXEL_FORMAT_MONO_8:
                         return MODE_Y800;
@@ -517,8 +511,8 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value )
         case CV_CAP_PROP_AUTO_EXPOSURE:
             if(exposureAvailable || gainAvailable) {
                 if( (controlExposure = (bool)(int)value) ) {
-                    exposure = exposureAvailable ? arv_camera_get_exposure_time(camera) : 0;
-                    gain = gainAvailable ? arv_camera_get_gain(camera) : 0;
+                    exposure = exposureAvailable ? arv_camera_get_exposure_time(camera, NULL) : 0;
+                    gain = gainAvailable ? arv_camera_get_gain(camera, NULL) : 0;
                 }
             }
             break;
@@ -531,13 +525,13 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value )
                 /* exposure time in seconds, like 1/100 s */
                 value *= 1e6; // -> from s to us
 
-                arv_camera_set_exposure_time(camera, exposure = CLIP(value, exposureMin, exposureMax));
+                arv_camera_set_exposure_time(camera, exposure = CLIP(value, exposureMin, exposureMax), NULL);
                 break;
             } else return false;
 
         case CV_CAP_PROP_FPS:
             if(fpsAvailable) {
-                arv_camera_set_frame_rate(camera, fps = CLIP(value, fpsMin, fpsMax));
+                arv_camera_set_frame_rate(camera, fps = CLIP(value, fpsMin, fpsMax), NULL);
                 break;
             } else return false;
 
@@ -546,7 +540,7 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value )
                 if ( (autoGain = (-1 == value) ) )
                     break;
 
-                arv_camera_set_gain(camera, gain = CLIP(value, gainMin, gainMax));
+                arv_camera_set_gain(camera, gain = CLIP(value, gainMin, gainMax), NULL);
                 break;
             } else return false;
 
@@ -574,7 +568,7 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value )
                 }
                 if(newFormat != pixelFormat) {
                     stopCapture();
-                    arv_camera_set_pixel_format(camera, pixelFormat = newFormat);
+                    arv_camera_set_pixel_format(camera, pixelFormat = newFormat, NULL);
                     startCapture();
                 }
             }
@@ -606,7 +600,7 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value )
 
 void CvCaptureCAM_Aravis::stopCapture()
 {
-    arv_camera_stop_acquisition(camera);
+    arv_camera_stop_acquisition(camera, NULL);
 
     if(stream) {
         g_object_unref(stream);
@@ -617,8 +611,8 @@ void CvCaptureCAM_Aravis::stopCapture()
 bool CvCaptureCAM_Aravis::startCapture()
 {
     if(init_buffers() ) {
-        arv_camera_set_acquisition_mode(camera, ARV_ACQUISITION_MODE_CONTINUOUS);
-        arv_camera_start_acquisition(camera);
+        arv_camera_set_acquisition_mode(camera, ARV_ACQUISITION_MODE_CONTINUOUS, NULL);
+        arv_camera_start_acquisition(camera, NULL);
 
         return true;
     }
diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp
index 2188c25444..47dc00ab04 100644
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -980,7 +980,11 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters&
     char* options = getenv("OPENCV_FFMPEG_CAPTURE_OPTIONS");
     if(options == NULL)
     {
+#if LIBAVFORMAT_VERSION_MICRO >= 100  && LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(55, 48, 100)
+        av_dict_set(&dict, "rtsp_flags", "prefer_tcp", 0);
+#else
         av_dict_set(&dict, "rtsp_transport", "tcp", 0);
+#endif
     }
     else
     {
diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp
index d78236913b..d3002a5151 100644
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@@ -536,7 +536,7 @@ private:
     // Destructor is private. Caller should call Release.
     virtual ~SourceReaderCB()
     {
-        CV_LOG_WARNING(NULL, "terminating async callback");
+        CV_LOG_INFO(NULL, "terminating async callback");
     }
 
 public:
diff --git a/platforms/apple/build_xcframework.py b/platforms/apple/build_xcframework.py
index afea5e4691..49878435d0 100755
--- a/platforms/apple/build_xcframework.py
+++ b/platforms/apple/build_xcframework.py
@@ -58,7 +58,7 @@ if __name__ == "__main__":
         macos_archs = "x86_64,arm64"
     print('Using MacOS ARCHS={}'.format(macos_archs))
 
-    catalyst_archs = args.macos_archs
+    catalyst_archs = args.catalyst_archs
     if not catalyst_archs and not args.build_only_specified_archs:
         # Supply defaults
         catalyst_archs = "x86_64,arm64"
diff --git a/platforms/winpack_dldt/2021.4.2/20220118-dldt-fix-msvs-compilation-21469.patch b/platforms/winpack_dldt/2021.4.2/20220118-dldt-fix-msvs-compilation-21469.patch
new file mode 100644
index 0000000000..411d5cbd5c
--- /dev/null
+++ b/platforms/winpack_dldt/2021.4.2/20220118-dldt-fix-msvs-compilation-21469.patch
@@ -0,0 +1,12 @@
+diff --git a/inference-engine/src/plugin_api/caseless.hpp b/inference-engine/src/plugin_api/caseless.hpp
+index d8ce739..0dd8886 100644
+--- a/inference-engine/src/plugin_api/caseless.hpp
++++ b/inference-engine/src/plugin_api/caseless.hpp
+@@ -12,6 +12,7 @@
+ #include <algorithm>
+ #include <cctype>
+ #include <functional>
++#include <iterator>
+ #include <map>
+ #include <set>
+ #include <unordered_map>
diff --git a/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig-version.cmake b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig-version.cmake
new file mode 100644
index 0000000000..bc449d05cd
--- /dev/null
+++ b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig-version.cmake
@@ -0,0 +1,29 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(PACKAGE_VERSION_MAJOR 2021)
+set(PACKAGE_VERSION_MINOR 4)
+set(PACKAGE_VERSION_PATCH 2)
+set(PACKAGE_VERSION "${PACKAGE_VERSION_MAJOR}.${PACKAGE_VERSION_MINOR}.${PACKAGE_VERSION_PATCH}")
+
+set(PACKAGE_VERSION_EXACT False)
+set(PACKAGE_VERSION_COMPATIBLE False)
+
+# Compatibility with old versioning for 2.x
+if(PACKAGE_FIND_VERSION_MAJOR VERSION_EQUAL 2)
+    set(PACKAGE_VERSION_COMPATIBLE True)
+    if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
+        message(WARNING "Inference Engine versioning has changed. Use ${PACKAGE_VERSION} instead of ${PACKAGE_FIND_VERSION}")
+    endif()
+endif()
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+    set(PACKAGE_VERSION_EXACT True)
+    set(PACKAGE_VERSION_COMPATIBLE True)
+endif()
+
+if(PACKAGE_FIND_VERSION_MAJOR EQUAL PACKAGE_VERSION_MAJOR AND
+   PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION)
+    set(PACKAGE_VERSION_COMPATIBLE True)
+endif()
diff --git a/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig.cmake b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig.cmake
new file mode 100644
index 0000000000..d9c9a12de6
--- /dev/null
+++ b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig.cmake
@@ -0,0 +1,31 @@
+# Inference Engine CMake config for OpenCV windows package
+
+get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
+get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
+get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
+get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
+
+set(InferenceEngine_LIBRARIES IE::inference_engine)
+add_library(IE::inference_engine SHARED IMPORTED)
+
+set_target_properties(IE::inference_engine PROPERTIES
+  INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/deployment_tools/inference_engine/include"
+)
+
+# Import target "IE::inference_engine" for configuration "Debug"
+set_property(TARGET IE::inference_engine APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
+set_target_properties(IE::inference_engine PROPERTIES
+  IMPORTED_IMPLIB_DEBUG "${_IMPORT_PREFIX}/deployment_tools/inference_engine/lib/intel64/inference_engined.lib"
+  IMPORTED_LINK_DEPENDENT_LIBRARIES_DEBUG ""
+  IMPORTED_LOCATION_DEBUG "${_IMPORT_PREFIX}/bin/inference_engined.dll"
+  )
+
+# Import target "IE::inference_engine" for configuration "Release"
+set_property(TARGET IE::inference_engine APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
+set_target_properties(IE::inference_engine PROPERTIES
+  IMPORTED_IMPLIB_RELEASE "${_IMPORT_PREFIX}/deployment_tools/inference_engine/lib/intel64/inference_engine.lib"
+  IMPORTED_LINK_DEPENDENT_LIBRARIES_RELEASE ""
+  IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/bin/inference_engine.dll"
+  )
+
+set(InferenceEngine_FOUND ON)
diff --git a/platforms/winpack_dldt/2021.4.2/patch.config.py b/platforms/winpack_dldt/2021.4.2/patch.config.py
index 7f8715aae2..bd31af236f 100644
--- a/platforms/winpack_dldt/2021.4.2/patch.config.py
+++ b/platforms/winpack_dldt/2021.4.2/patch.config.py
@@ -2,3 +2,4 @@ applyPatch('20210630-dldt-disable-unused-targets.patch')
 applyPatch('20210630-dldt-pdb.patch')
 applyPatch('20210630-dldt-disable-multidevice-autoplugin.patch')
 applyPatch('20210630-dldt-vs-version.patch')
+applyPatch('20220118-dldt-fix-msvs-compilation-21469.patch')
diff --git a/platforms/winpack_dldt/2021.4.2/sysroot.config.py b/platforms/winpack_dldt/2021.4.2/sysroot.config.py
index fa4281107d..f11e99f843 100644
--- a/platforms/winpack_dldt/2021.4.2/sysroot.config.py
+++ b/platforms/winpack_dldt/2021.4.2/sysroot.config.py
@@ -1,3 +1,5 @@
+copytree(self.cpath / 'cmake', self.sysrootdir / 'deployment_tools' / 'inference_engine' / 'cmake')
+
 sysroot_bin_dir = prepare_dir(self.sysrootdir / 'bin')
 copytree(self.build_dir / 'install', self.sysrootdir / 'ngraph')
 #rm_one(self.sysrootdir / 'ngraph' / 'lib' / 'ngraph.dll')
diff --git a/platforms/winpack_dldt/build_package.py b/platforms/winpack_dldt/build_package.py
index 88154bafb5..277a13c232 100644
--- a/platforms/winpack_dldt/build_package.py
+++ b/platforms/winpack_dldt/build_package.py
@@ -388,10 +388,9 @@ class Builder:
         if self.config.dldt_release:
             cmake_vars['INF_ENGINE_RELEASE'] = str(self.config.dldt_release)
 
-        cmake_vars['INF_ENGINE_LIB_DIRS:PATH'] = str(builderDLDT.sysrootdir / 'deployment_tools/inference_engine/lib/intel64')
-        assert os.path.exists(cmake_vars['INF_ENGINE_LIB_DIRS:PATH']), cmake_vars['INF_ENGINE_LIB_DIRS:PATH']
-        cmake_vars['INF_ENGINE_INCLUDE_DIRS:PATH'] = str(builderDLDT.sysrootdir / 'deployment_tools/inference_engine/include')
-        assert os.path.exists(cmake_vars['INF_ENGINE_INCLUDE_DIRS:PATH']), cmake_vars['INF_ENGINE_INCLUDE_DIRS:PATH']
+        InferenceEngine_DIR = str(builderDLDT.sysrootdir / 'deployment_tools' / 'inference_engine' / 'cmake')
+        assert os.path.exists(InferenceEngine_DIR), InferenceEngine_DIR
+        cmake_vars['InferenceEngine_DIR:PATH'] = InferenceEngine_DIR
 
         ngraph_DIR = str(builderDLDT.sysrootdir / 'ngraph/cmake')
         if not os.path.exists(ngraph_DIR):
diff --git a/samples/dnn/face_detect.cpp b/samples/dnn/face_detect.cpp
index 161940cb4a..d1e6314969 100644
--- a/samples/dnn/face_detect.cpp
+++ b/samples/dnn/face_detect.cpp
@@ -44,8 +44,8 @@ int main(int argc, char** argv)
         "{image2 i2         |            | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
         "{video v           | 0          | Path to the input video}"
         "{scale sc          | 1.0        | Scale factor used to resize input video frames}"
-        "{fd_model fd       | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx }"
-        "{fr_model fr       | face_recognizer_fast.onnx | Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view}"
+        "{fd_model fd       | face_detection_yunet_2021dec.onnx| Path to the model. Download yunet.onnx in https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet}"
+        "{fr_model fr       | face_recognition_sface_2021dec.onnx | Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface}"
         "{score_threshold   | 0.9        | Filter out faces of score < score_threshold}"
         "{nms_threshold     | 0.3        | Suppress bounding boxes of iou >= nms_threshold}"
         "{top_k             | 5000       | Keep top_k bounding boxes before NMS}"
@@ -65,6 +65,7 @@ int main(int argc, char** argv)
     int topK = parser.get<int>("top_k");
 
     bool save = parser.get<bool>("save");
+    float scale = parser.get<float>("scale");
 
     double cosine_similar_thresh = 0.363;
     double l2norm_similar_thresh = 1.128;
@@ -87,6 +88,9 @@ int main(int argc, char** argv)
             return 2;
         }
 
+        int imageWidth = int(image1.cols * scale);
+        int imageHeight = int(image1.rows * scale);
+        resize(image1, image1, Size(imageWidth, imageHeight));
         tm.start();
 
         //! [inference]
@@ -199,7 +203,6 @@ int main(int argc, char** argv)
     else
     {
         int frameWidth, frameHeight;
-        float scale = parser.get<float>("scale");
         VideoCapture capture;
         std::string video = parser.get<string>("video");
         if (video.size() == 1 && isdigit(video[0]))
diff --git a/samples/dnn/face_detect.py b/samples/dnn/face_detect.py
index 8900a7f7ad..9cf38b5d5f 100644
--- a/samples/dnn/face_detect.py
+++ b/samples/dnn/face_detect.py
@@ -16,8 +16,8 @@ parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1.
 parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.')
 parser.add_argument('--video', '-v', type=str, help='Path to the input video.')
 parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.')
-parser.add_argument('--face_detection_model', '-fd', type=str, default='yunet.onnx', help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
-parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognizer_fast.onnx', help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
+parser.add_argument('--face_detection_model', '-fd', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the face detection model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet')
+parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface')
 parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.')
 parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
 parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
@@ -56,11 +56,15 @@ if __name__ == '__main__':
     # If input is an image
     if args.image1 is not None:
         img1 = cv.imread(cv.samples.findFile(args.image1))
+        img1Width = int(img1.shape[1]*args.scale)
+        img1Height = int(img1.shape[0]*args.scale)
 
+        img1 = cv.resize(img1, (img1Width, img1Height))
         tm.start()
+
         ## [inference]
         # Set input size before inference
-        detector.setInputSize((img1.shape[1], img1.shape[0]))
+        detector.setInputSize((img1Width, img1Height))
 
         faces1 = detector.detect(img1)
         ## [inference]
diff --git a/samples/dnn/text_detection.py b/samples/dnn/text_detection.py
index 6fb1e90901..db0ea197bd 100644
--- a/samples/dnn/text_detection.py
+++ b/samples/dnn/text_detection.py
@@ -195,7 +195,7 @@ def main():
         indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)
         for i in indices:
             # get 4 corners of the rotated rect
-            vertices = cv.boxPoints(boxes[i[0]])
+            vertices = cv.boxPoints(boxes[i])
             # scale the bounding box coordinates based on the respective ratios
             for j in range(4):
                 vertices[j][0] *= rW
diff --git a/samples/python/camera_calibration_show_extrinsics.py b/samples/python/camera_calibration_show_extrinsics.py
index d676691f15..0ee2a19b68 100755
--- a/samples/python/camera_calibration_show_extrinsics.py
+++ b/samples/python/camera_calibration_show_extrinsics.py
@@ -1,5 +1,18 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+
+'''
+Plot camera calibration extrinsics.
+
+usage:
+    camera_calibration_show_extrinsics.py [--calibration <input path>] [--cam_width] [--cam_height] [--scale_focal] [--patternCentric ]
+
+default values:
+    --calibration    : left_intrinsics.yml
+    --cam_width      : 0.064/2
+    --cam_height     : 0.048/2
+    --scale_focal    : 40
+    --patternCentric : True
+'''
 
 # Python 2/3 compatibility
 from __future__ import print_function
diff --git a/samples/python/common.py b/samples/python/common.py
index 85cda62cd4..e7ad478b88 100755
--- a/samples/python/common.py
+++ b/samples/python/common.py
@@ -222,7 +222,7 @@ def mosaic(w, imgs):
     pad = np.zeros_like(img0)
     imgs = it.chain([img0], imgs)
     rows = grouper(w, imgs, pad)
-    return np.vstack(map(np.hstack, rows))
+    return np.vstack(list(map(np.hstack, rows)))
 
 def getsize(img):
     h, w = img.shape[:2]
diff --git a/samples/python/digits.py b/samples/python/digits.py
index e5d8ceb59a..25db411f94 100755
--- a/samples/python/digits.py
+++ b/samples/python/digits.py
@@ -191,3 +191,4 @@ if __name__ == '__main__':
     model.save('digits_svm.dat')
 
     cv.waitKey(0)
+    cv.destroyAllWindows()
diff --git a/samples/python/digits_video.py b/samples/python/digits_video.py
index 692da91219..17f44c333d 100755
--- a/samples/python/digits_video.py
+++ b/samples/python/digits_video.py
@@ -29,7 +29,7 @@ def main():
         src = sys.argv[1]
     except:
         src = 0
-    cap = video.create_capture(src)
+    cap = video.create_capture(src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('sudoku.png')))
 
     classifier_fn = 'digits_svm.dat'
     if not os.path.exists(classifier_fn):
diff --git a/samples/python/facedetect.py b/samples/python/facedetect.py
index 488c92d5e5..248206a7cd 100755
--- a/samples/python/facedetect.py
+++ b/samples/python/facedetect.py
@@ -39,13 +39,13 @@ def main():
     except:
         video_src = 0
     args = dict(args)
-    cascade_fn = args.get('--cascade', "data/haarcascades/haarcascade_frontalface_alt.xml")
-    nested_fn  = args.get('--nested-cascade', "data/haarcascades/haarcascade_eye.xml")
+    cascade_fn = args.get('--cascade', "haarcascades/haarcascade_frontalface_alt.xml")
+    nested_fn  = args.get('--nested-cascade', "haarcascades/haarcascade_eye.xml")
 
     cascade = cv.CascadeClassifier(cv.samples.findFile(cascade_fn))
     nested = cv.CascadeClassifier(cv.samples.findFile(nested_fn))
 
-    cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('samples/data/lena.jpg')))
+    cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('lena.jpg')))
 
     while True:
         _ret, img = cam.read()
diff --git a/samples/python/qrcode.py b/samples/python/qrcode.py
index b3253f96c6..21b1a59073 100644
--- a/samples/python/qrcode.py
+++ b/samples/python/qrcode.py
@@ -245,4 +245,6 @@ def main():
 
 
 if __name__ == '__main__':
+    print(__doc__)
     main()
+    cv.destroyAllWindows()
diff --git a/samples/python/stitching_detailed.py b/samples/python/stitching_detailed.py
index 316af979c2..3e0ec50acd 100644
--- a/samples/python/stitching_detailed.py
+++ b/samples/python/stitching_detailed.py
@@ -246,9 +246,9 @@ def get_matcher(args):
     if matcher_type == "affine":
         matcher = cv.detail_AffineBestOf2NearestMatcher(False, try_cuda, match_conf)
     elif range_width == -1:
-        matcher = cv.detail.BestOf2NearestMatcher_create(try_cuda, match_conf)
+        matcher = cv.detail_BestOf2NearestMatcher(try_cuda, match_conf)
     else:
-        matcher = cv.detail.BestOf2NearestRangeMatcher_create(range_width, try_cuda, match_conf)
+        matcher = cv.detail_BestOf2NearestRangeMatcher(range_width, try_cuda, match_conf)
     return matcher
 
 
diff --git a/samples/python/text_skewness_correction.py b/samples/python/text_skewness_correction.py
index c8ee33b39d..c3e97a333b 100644
--- a/samples/python/text_skewness_correction.py
+++ b/samples/python/text_skewness_correction.py
@@ -15,7 +15,7 @@ import argparse
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--image", required=True, help="path to input image file")
+    parser.add_argument("-i", "--image", default="imageTextR.png", help="path to input image file")
     args = vars(parser.parse_args())
 
     # load the image from disk
@@ -37,9 +37,9 @@ def main():
     coords = cv.findNonZero(thresh)
     angle = cv.minAreaRect(coords)[-1]
     # the `cv.minAreaRect` function returns values in the
-    # range [-90, 0) if the angle is less than -45 we need to add 90 to it
-    if angle < -45:
-        angle = (90 + angle)
+    # range [0, 90) if the angle is more than 45 we need to subtract 90 from it
+    if angle > 45:
+        angle = (angle - 90)
 
     (h, w) = image.shape[:2]
     center = (w // 2, h // 2)
@@ -55,4 +55,6 @@ def main():
 
 
 if __name__ == "__main__":
+    print(__doc__)
     main()
+    cv.destroyAllWindows()
diff --git a/samples/python/tracker.py b/samples/python/tracker.py
index 753e166ad8..3b04c57e8a 100644
--- a/samples/python/tracker.py
+++ b/samples/python/tracker.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-
 '''
 Tracker demo
 
@@ -36,43 +35,49 @@ class App(object):
 
     def __init__(self, args):
         self.args = args
+        self.trackerAlgorithm = args.tracker_algo
+        self.tracker = self.createTracker()
 
-    def initializeTracker(self, image, trackerAlgorithm):
+    def createTracker(self):
+        if self.trackerAlgorithm == 'mil':
+            tracker = cv.TrackerMIL_create()
+        elif self.trackerAlgorithm == 'goturn':
+            params = cv.TrackerGOTURN_Params()
+            params.modelTxt = self.args.goturn
+            params.modelBin = self.args.goturn_model
+            tracker = cv.TrackerGOTURN_create(params)
+        elif self.trackerAlgorithm == 'dasiamrpn':
+            params = cv.TrackerDaSiamRPN_Params()
+            params.model = self.args.dasiamrpn_net
+            params.kernel_cls1 = self.args.dasiamrpn_kernel_cls1
+            params.kernel_r1 = self.args.dasiamrpn_kernel_r1
+            tracker = cv.TrackerDaSiamRPN_create(params)
+        else:
+            sys.exit("Tracker {} is not recognized. Please use one of three available: mil, goturn, dasiamrpn.".format(self.trackerAlgorithm))
+        return tracker
+
+    def initializeTracker(self, image):
         while True:
-            if trackerAlgorithm == 'mil':
-                tracker = cv.TrackerMIL_create()
-            elif trackerAlgorithm == 'goturn':
-                params = cv.TrackerGOTURN_Params()
-                params.modelTxt = self.args.goturn
-                params.modelBin = self.args.goturn_model
-                tracker = cv.TrackerGOTURN_create(params)
-            elif trackerAlgorithm == 'dasiamrpn':
-                params = cv.TrackerDaSiamRPN_Params()
-                params.model = self.args.dasiamrpn_net
-                params.kernel_cls1 = self.args.dasiamrpn_kernel_cls1
-                params.kernel_r1 = self.args.dasiamrpn_kernel_r1
-                tracker = cv.TrackerDaSiamRPN_create(params)
-            else:
-                sys.exit("Tracker {} is not recognized. Please use one of three available: mil, goturn, dasiamrpn.".format(trackerAlgorithm))
-
             print('==> Select object ROI for tracker ...')
             bbox = cv.selectROI('tracking', image)
             print('ROI: {}'.format(bbox))
+            if bbox[2] <= 0 or bbox[3] <= 0:
+                sys.exit("ROI selection cancelled. Exiting...")
 
             try:
-                tracker.init(image, bbox)
+                self.tracker.init(image, bbox)
             except Exception as e:
                 print('Unable to initialize tracker with requested bounding box. Is there any object?')
                 print(e)
                 print('Try again ...')
                 continue
 
-            return tracker
+            return
 
     def run(self):
         videoPath = self.args.input
-        trackerAlgorithm = self.args.tracker_algo
-        camera = create_capture(videoPath, presets['cube'])
+        print('Using video: {}'.format(videoPath))
+        camera = create_capture(cv.samples.findFileOrKeep(videoPath), presets['cube'])
         if not camera.isOpened():
             sys.exit("Can't open video stream: {}".format(videoPath))
 
@@ -82,7 +87,7 @@ class App(object):
         assert image is not None
 
         cv.namedWindow('tracking')
-        tracker = self.initializeTracker(image, trackerAlgorithm)
+        self.initializeTracker(image)
 
         print("==> Tracking is started. Press 'SPACE' to re-initialize tracker or 'ESC' for exit...")
 
@@ -92,7 +97,7 @@ class App(object):
                 print("Can't read frame")
                 break
 
-            ok, newbox = tracker.update(image)
+            ok, newbox = self.tracker.update(image)
             #print(ok, newbox)
 
             if ok:
@@ -101,7 +106,7 @@ class App(object):
             cv.imshow("tracking", image)
             k = cv.waitKey(1)
             if k == 32:  # SPACE
-                tracker = self.initializeTracker(image)
+                self.initializeTracker(image)
             if k == 27:  # ESC
                 break
 
@@ -112,22 +117,13 @@ if __name__ == '__main__':
     print(__doc__)
     parser = argparse.ArgumentParser(description="Run tracker")
     parser.add_argument("--input", type=str, default="vtest.avi", help="Path to video source")
-    parser.add_argument("--tracker_algo", type=str, default="mil", help="One of three available tracking algorithms: mil, goturn, dasiamrpn")
+    parser.add_argument("--tracker_algo", type=str, default="mil", help="One of available tracking algorithms: mil, goturn, dasiamrpn")
     parser.add_argument("--goturn", type=str, default="goturn.prototxt", help="Path to GOTURN architecture")
     parser.add_argument("--goturn_model", type=str, default="goturn.caffemodel", help="Path to GOTERN model")
     parser.add_argument("--dasiamrpn_net", type=str, default="dasiamrpn_model.onnx", help="Path to onnx model of DaSiamRPN net")
     parser.add_argument("--dasiamrpn_kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Path to onnx model of DaSiamRPN kernel_r1")
     parser.add_argument("--dasiamrpn_kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Path to onnx model of DaSiamRPN kernel_cls1")
-    parser.add_argument("--dasiamrpn_backend", type=int, default=0, help="Choose one of computation backends:\
-                                                                           0: automatically (by default),\
-                                                                           1: Halide language (http://halide-lang.org/),\
-                                                                           2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit),\
-                                                                           3: OpenCV implementation")
-    parser.add_argument("--dasiamrpn_target", type=int, default=0, help="Choose one of target computation devices:\
-                                                                         0: CPU target (by default),\
-                                                                         1: OpenCL,\
-                                                                         2: OpenCL fp16 (half-float precision),\
-                                                                         3: VPU")
+
     args = parser.parse_args()
     App(args).run()
     cv.destroyAllWindows()